Mysql 8 regex changes and word boundary weirdness branch by robinsowell · Pull Request #5293 · ExpressionEngine/ExpressionEngine · GitHub
Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,37 @@ public function testSqlMultipleValues()
$sql = $this->fields->sql('field_id_5', 'val1|val2');
$this->assertStringContainsString('LIKE', $sql);
}

public function testSqlFullWordSearchUsesDatabaseWordBoundaryHelper()
{
$db = new class extends ProSearchFakeDb {
public $wordBoundaryTerms = [];

public function word_boundary_regex($term)
{
$this->wordBoundaryTerms[] = $term;

return '(\\b|^)' . preg_quote((string) $term) . '(\\b|$)';
}
};
ee()->setMock('db', $db);

$sql = $this->fields->sql('field_id_5', 'term\W');

$this->assertSame(['term'], $db->wordBoundaryTerms);
$this->assertSame("(field_id_5 REGEXP '(\\\\b|^)term(\\\\b|$)')", $sql);
}

public function testSqlFullWordSearchUsesLegacyBoundaryForMariaDb()
{
$db = new ProSearchFakeDb();
$db->versionString = '10.6.18-MariaDB';
ee()->setMock('db', $db);

$sql = $this->fields->sql('field_id_5', 'term\W');

$this->assertSame("(field_id_5 REGEXP '([[:<:]]|^)term([[:>:]]|$)')", $sql);
}

public function testInvalidMethodCall()
{
Expand All @@ -473,4 +504,3 @@ public function testIsMethodWithoutArgs()
$this->fields->is_native();
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ public function prepare($str) { return $str; }
class ProSearchFakeDb extends FakeDb
{
public $dbprefix = 'exp_';
public $versionString = false;

public function get($table = null)
{
Expand All @@ -161,6 +162,30 @@ public function escape_like_str($str)
{
return addcslashes($str, '%_');
}

public function escape_str($str, $like = false)
{
return addslashes($str);
}

public function version()
{
return $this->versionString;
}

public function word_boundary_regex($term)
{
$term = preg_quote((string) $term);

if (is_string($this->versionString)
&& stripos($this->versionString, 'mariadb') === false
&& preg_match('/\d+(?:\.\d+){1,2}/', $this->versionString, $match)
&& version_compare($match[0], '8.0.4', '>=')) {
return '(\\b|^)' . $term . '(\\b|$)';
}

return '([[:<:]]|^)' . $term . '([[:>:]]|$)';
}
}

class ProSearchDbResult extends eeDbResultMock
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
<?php

use PHPUnit\Framework\TestCase;

require_once __DIR__ . '/../../../eeObjectMock.php';
require_once SYSPATH . 'ee/legacy/database/DB_driver.php';

if (! class_exists('CI_Model')) {
require_once BASEPATH . 'core/Model.php';
}

require_once BASEPATH . 'models/channel_model.php';

class ChannelModelFieldSearchTest extends TestCase
{
protected function tearDown(): void
{
ee()->resetMocks();
}

public function testFullWordFieldSearchUsesIcuBoundaryForMysqlEight(): void
{
ee()->setMock('db', new ChannelModelFieldSearchDbStub('8.0.44'));

$sql = (new Channel_model())->field_search_sql('\Wterm', 'field_id_1');

$this->assertStringContainsString('REGEXP "(\\\\b|^)term(\\\\b|$)"', $sql);
$this->assertStringNotContainsString('[[:<:]]', $sql);
$this->assertStringNotContainsString('[[:>:]]', $sql);
}

public function testFullWordFieldSearchUsesLegacyBoundaryForMariaDb(): void
{
ee()->setMock('db', new ChannelModelFieldSearchDbStub('5.5.5-10.6.18-MariaDB'));

$sql = (new Channel_model())->field_search_sql('\Wterm', 'field_id_1');

$this->assertStringContainsString('REGEXP "([[:<:]]|^)term([[:>:]]|$)"', $sql);
}

public function testNegatedFullWordFieldSearchKeepsNullFallback(): void
{
ee()->setMock('db', new ChannelModelFieldSearchDbStub('8.0.44'));

$sql = (new Channel_model())->field_search_sql('not \Wterm', 'field_id_1');

$this->assertStringContainsString('NOT REGEXP "(\\\\b|^)term(\\\\b|$)"', $sql);
$this->assertStringContainsString('OR (field_id_1 IS NULL)', $sql);
}
}

class ChannelModelFieldSearchDbStub extends CI_DB_driver
{
private $versionString;

public function __construct($versionString)
{
$this->versionString = $versionString;

parent::__construct([]);
}

public function version()
{
return $this->versionString;
}

public function escape_str($str, $like = false)
{
return addslashes($str);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,50 @@ public function testVersionHandlesUnsupportedAndSuccessfulPaths(): void
$this->assertSame('8.0.0', $driver->version());
}

/**
* @dataProvider wordBoundaryRegexProvider
*/
public function testWordBoundaryRegexUsesDatabaseCompatibleBoundaries($version, $expected): void
{
$driver = new DBDriverMethodHarness([]);
$driver->queryResponse = new DBDriverQueryResultStub(1, [], (object) ['ver' => $version], []);

$this->assertSame($expected, $driver->word_boundary_regex('term'));
}

public function wordBoundaryRegexProvider()
{
return [
'mysql 8.0.4' => ['8.0.4', '(\\b|^)term(\\b|$)'],
'mysql 8.0.44' => ['8.0.44', '(\\b|^)term(\\b|$)'],
'mysql 8.0.44 suffix' => ['8.0.44-commercial', '(\\b|^)term(\\b|$)'],
'mysql 8.0.3' => ['8.0.3', '([[:<:]]|^)term([[:>:]]|$)'],
'mysql 5.7' => ['5.7.44', '([[:<:]]|^)term([[:>:]]|$)'],
'mariadb' => ['10.6.18-MariaDB', '([[:<:]]|^)term([[:>:]]|$)'],
'prefixed mariadb' => ['5.5.5-10.6.18-MariaDB', '([[:<:]]|^)term([[:>:]]|$)'],
'unknown' => ['', '([[:<:]]|^)term([[:>:]]|$)'],
];
}

public function testWordBoundaryRegexEscapesTermAndFallsBackWhenVersionUnsupported(): void
{
$driver = new DBDriverMethodHarness([]);
$driver->versionSql = false;
$driver->db_debug = false;

$this->assertSame('([[:<:]]|^)term\\.one([[:>:]]|$)', $driver->word_boundary_regex('term.one'));
}

public function testWordBoundaryRegexCachesVersionDetection(): void
{
$driver = new DBDriverMethodHarness([]);
$driver->queryResponse = new DBDriverQueryResultStub(1, [], (object) ['ver' => '8.0.44'], []);

$this->assertSame('(\\b|^)one(\\b|$)', $driver->word_boundary_regex('one'));
$this->assertSame('(\\b|^)two(\\b|$)', $driver->word_boundary_regex('two'));
$this->assertCount(1, $driver->querySqls);
}

public function testQueryHandlesEmptySqlFailureWriteAndReadPaths(): void
{
$driver = new DBDriverQueryHarness([]);
Expand Down
10 changes: 10 additions & 0 deletions system/ee/ExpressionEngine/Tests/eeObjectMock.php
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,11 @@ public function last_query()
return $this->last_query ?? '';
}

public function word_boundary_regex($term)
{
return '([[:<:]]|^)' . preg_quote((string) $term) . '([[:>:]]|$)';
}

public function count_all_results($table = '')
{
// Simple implementation - return count of filtered rows
Expand Down Expand Up @@ -868,6 +873,11 @@ public function escape_str($str)
return addslashes($str);
}

public function word_boundary_regex($term)
{
return '([[:<:]]|^)' . preg_quote((string) $term) . '([[:>:]]|$)';
}

public function get($table = null)
{
if ($table) {
Expand Down
51 changes: 51 additions & 0 deletions system/ee/legacy/database/DB_driver.php
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class CI_DB_driver
public $limit_used;

protected $_escape_char = '"';
protected $_regexp_uses_icu_word_boundaries = null;

/**
* Constructor. Accepts one parameter containing the database
Expand Down Expand Up @@ -152,6 +153,56 @@ public function version()
return $this->query($sql)->row('ver');
}

/**
* Build a whole-word REGEXP pattern for the active database.
*
* MySQL 8.0.4+ uses ICU regular expressions, which do not support the
* Spencer word-boundary markers used by older MySQL and MariaDB.
*
* @param string $term
* @return string
*/
public function word_boundary_regex($term)
{
$term = preg_quote((string) $term);

if ($this->_uses_icu_regexp_word_boundaries()) {
return '(\\b|^)' . $term . '(\\b|$)';
}

return '([[:<:]]|^)' . $term . '([[:>:]]|$)';
}

/**
* Determine whether the active database uses ICU REGEXP word boundaries.
*
* @return bool
*/
protected function _uses_icu_regexp_word_boundaries()
{
if ($this->_regexp_uses_icu_word_boundaries !== null) {
return $this->_regexp_uses_icu_word_boundaries;
}

$this->_regexp_uses_icu_word_boundaries = false;

try {
$version = $this->version();
} catch (Throwable $e) {
return $this->_regexp_uses_icu_word_boundaries;
}

if (! is_string($version) || $version === '' || stripos($version, 'mariadb') !== false) {
return $this->_regexp_uses_icu_word_boundaries;
}

if (preg_match('/\d+(?:\.\d+){1,2}/', $version, $match)) {
$this->_regexp_uses_icu_word_boundaries = version_compare($match[0], '8.0.4', '>=');
}

return $this->_regexp_uses_icu_word_boundaries;
}

/**
* Execute the query
*
Expand Down
3 changes: 1 addition & 2 deletions system/ee/legacy/models/channel_model.php
Loading