Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions psalm-baseline.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1092,11 +1092,6 @@
<code><![CDATA[isset(self::$translator)]]></code>
</RedundantPropertyInitializationCheck>
</file>
<file src="src/UtfString.php">
<PossiblyUnusedProperty>
<code><![CDATA[$byteLen]]></code>
</PossiblyUnusedProperty>
</file>
<file src="src/Utils/BufferedQuery.php">
<PossiblyNullOperand>
<code><![CDATA[$this->status]]></code>
Expand Down Expand Up @@ -1567,6 +1562,12 @@
* }]]></code>
</InvalidReturnType>
</file>
<file src="tests/UtfStringSerializer.php">
<PossiblyUnusedMethod>
<code><![CDATA[serialize]]></code>
<code><![CDATA[unserialize]]></code>
</PossiblyUnusedMethod>
</file>
<file src="tests/Utils/BufferedQueryTest.php">
<PossiblyUnusedMethod>
<code><![CDATA[extractProvider]]></code>
Expand Down
6 changes: 5 additions & 1 deletion src/Tools/TestGenerator.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
use PhpMyAdmin\SqlParser\Exceptions\ParserException;
use PhpMyAdmin\SqlParser\Lexer;
use PhpMyAdmin\SqlParser\Parser;
use PhpMyAdmin\SqlParser\Tests\UtfStringSerializer;
use PhpMyAdmin\SqlParser\Token;
use PhpMyAdmin\SqlParser\UtfString;

use function dirname;
use function file_exists;
Expand Down Expand Up @@ -168,7 +170,9 @@ public static function build(

// unset mode, reset to default every time, to be sure
Context::setMode();
$serializer = new CustomJsonSerializer();
$serializer = new CustomJsonSerializer(null, [
UtfString::class => new UtfStringSerializer(),
]);
// Writing test's data.
$encoded = $serializer->serialize($test);

Expand Down
83 changes: 13 additions & 70 deletions src/UtfString.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,10 @@
use Exception;
use Stringable;

use function count;
use function implode;
use function mb_check_encoding;
use function mb_strlen;
use function mb_substr;
use function ord;
use function strlen;
use function substr;
use function mb_str_split;

/**
* Implementation for UTF-8 strings.
Expand All @@ -32,44 +30,19 @@
class UtfString implements ArrayAccess, Stringable
{
/**
* The raw, multi-byte string.
*/
public string $str = '';

/**
* The index of current byte.
*
* For ASCII strings, the byte index is equal to the character index.
*/
public int $byteIdx = 0;

/**
* The index of current character.
* The multi-byte characters.
*
* For non-ASCII strings, some characters occupy more than one byte and
* the character index will have a lower value than the byte index.
*/
public int $charIdx = 0;

/**
* The length of the string (in bytes).
* @var list<string>
*/
public int $byteLen = 0;

/**
* The length of the string (in characters).
*/
public int $charLen = 0;
public array $characters;

/** @param string $str the string */
public function __construct(string $str)
{
$this->str = $str;
$this->byteLen = mb_strlen($str, '8bit');
if (! mb_check_encoding($str, 'UTF-8')) {
$this->charLen = 0;
if (mb_check_encoding($str, 'UTF-8')) {
$this->characters = mb_str_split($str, 1, 'UTF-8');
} else {
$this->charLen = mb_strlen($str, 'UTF-8');
$this->characters = [];
}
}

Expand All @@ -80,7 +53,7 @@ public function __construct(string $str)
*/
public function offsetExists(mixed $offset): bool
{
return ($offset >= 0) && ($offset < $this->charLen);
return $offset >= 0 && $offset < count($this->characters);
}

/**
Expand All @@ -90,37 +63,7 @@ public function offsetExists(mixed $offset): bool
*/
public function offsetGet(mixed $offset): string|null
{
// This function moves the internal byte and character pointer to the requested offset.
// This function is part of hot code so the aim is to do the following
// operations as efficiently as possible.
// UTF-8 character encoding is a variable length encoding that encodes Unicode
// characters in 1-4 bytes. Thus we fetch 4 bytes from the current offset and then use mb_substr
// to get the first UTF-8 character in it. We then use strlen to get the character's size in bytes.
if (($offset < 0) || ($offset >= $this->charLen)) {
return null;
}

$delta = $offset - $this->charIdx;

if ($delta > 0) {
// Fast forwarding.
$this->byteIdx += strlen(mb_substr(substr($this->str, $this->byteIdx, 4 * $delta), 0, $delta));
$this->charIdx += $delta;
} elseif ($delta < 0) {
// Rewinding.
while ($delta++ < 0) {
// We rewind byte by byte and only count characters that are not continuation bytes,
// i.e. ASCII characters and first octets of multibyte characters
do {
$byte = ord($this->str[--$this->byteIdx]);
} while (($byte >= 128) && ($byte < 192));

--$this->charIdx;
}
}

// Fetch the first Unicode character within the next 4 bytes in the string.
return mb_substr(substr($this->str, $this->byteIdx, 4), 0, 1);
return $this->characters[$offset] ?? null;
}

/**
Expand Down Expand Up @@ -153,14 +96,14 @@ public function offsetUnset(mixed $offset): void
*/
public function length(): int
{
return $this->charLen;
return count($this->characters);
}

/**
* Returns the contained string.
*/
public function __toString(): string
{
return $this->str;
return implode('', $this->characters);
}
}
5 changes: 4 additions & 1 deletion tests/TestCase.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
use PhpMyAdmin\SqlParser\Token;
use PhpMyAdmin\SqlParser\TokensList;
use PhpMyAdmin\SqlParser\Tools\CustomJsonSerializer;
use PhpMyAdmin\SqlParser\UtfString;
use PHPUnit\Framework\TestCase as BaseTestCase;

use function file_get_contents;
Expand Down Expand Up @@ -95,7 +96,9 @@ public function getData(string $name): array
$serializedData = file_get_contents('tests/data/' . $name . '.out');
$this->assertIsString($serializedData);

$serializer = new CustomJsonSerializer();
$serializer = new CustomJsonSerializer(null, [
UtfString::class => new UtfStringSerializer(),
]);
$data = $serializer->unserialize($serializedData);

$this->assertIsArray($data);
Expand Down
28 changes: 28 additions & 0 deletions tests/UtfStringSerializer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?php

declare(strict_types=1);

namespace PhpMyAdmin\SqlParser\Tests;

use PhpMyAdmin\SqlParser\UtfString;

class UtfStringSerializer
{
/**
* @return array<string,string>
* @psalm-return array{str: string}
*/
public function serialize(UtfString $str): array
{
return ['str' => (string) $str];
}

/**
* @param array<string,string> $data
* @psalm-param array{str: string} $data
*/
public function unserialize(array $data): UtfString
{
return new UtfString($data['str']);
}
}
6 changes: 1 addition & 5 deletions tests/data/lexer/lexUtf8.out
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,7 @@
"errors": [],
"str": {
"@type": "PhpMyAdmin\\SqlParser\\UtfString",
"str": "select * from école",
"byteIdx": 19,
"charIdx": 18,
"byteLen": 20,
"charLen": 19
"str": "select * from école"
},
"len": 19,
"last": 19,
Expand Down
6 changes: 1 addition & 5 deletions tests/data/parser/parseCreateProcedure3.out
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,7 @@
"errors": [],
"str": {
"@type": "PhpMyAdmin\\SqlParser\\UtfString",
"str": "DELIMITER $$\nCREATE DEFINER=`user`@`localhost` PROCEDURE `multiDBqueryRun_V1`(IN `query` TEXT, IN `table_name_var` VARCHAR(255), IN `columns_used_var` TEXT, IN `where_text_var` TEXT, IN `separator_value_var` VARCHAR(255)) COMMENT 'Query: SingleDB → MultiDB (All DBs) + run it' NOT DETERMINISTIC MODIFIES SQL DATA SQL SECURITY INVOKER BEGIN\nSET @TABLE_NAME = table_name_var;\nSET @WHERE_TEXT = where_text_var;\nSET @COLUMNS_USED = columns_used_var;\nSET @MULTIDB_QUERY = CONCAT('SELECT \"$MULTIDB\" FROM `$MULTIDB`.', @TABLE_NAME, @WHERE_TEXT);\n\n-- EXECUTION --\nCREATE TEMPORARY TABLE `MULTIDB_TEMP_DB_TBL_COLS` AS\nSELECT * FROM (\n SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME\n FROM INFORMATION_SCHEMA.COLUMNS\n WHERE\n TABLE_SCHEMA NOT IN('mysql', 'test', 'tmp', 'information_schema', 'sys', 'performance_schema') AND\n TABLE_NAME = @TABLE_NAME AND\n FIND_IN_SET(COLUMN_NAME, @COLUMNS_USED)\n) tbl\nGROUP BY\n TABLE_SCHEMA,\n TABLE_NAME;\n\nSELECT GROUP_CONCAT(REPLACE(@MULTIDB_QUERY, '$MULTIDB', CONCAT('', TABLE_SCHEMA, '')) SEPARATOR \"\\nUNION ALL\\n\")\nINTO @stmt_sql\nFROM `MULTIDB_TEMP_DB_TBL_COLS`;\n\nPREPARE stmt FROM @stmt_sql;\nEXECUTE stmt;\nDEALLOCATE PREPARE stmt;\nEND",
"byteIdx": 1174,
"charIdx": 1172,
"byteLen": 1175,
"charLen": 1173
"str": "DELIMITER $$\nCREATE DEFINER=`user`@`localhost` PROCEDURE `multiDBqueryRun_V1`(IN `query` TEXT, IN `table_name_var` VARCHAR(255), IN `columns_used_var` TEXT, IN `where_text_var` TEXT, IN `separator_value_var` VARCHAR(255)) COMMENT 'Query: SingleDB → MultiDB (All DBs) + run it' NOT DETERMINISTIC MODIFIES SQL DATA SQL SECURITY INVOKER BEGIN\nSET @TABLE_NAME = table_name_var;\nSET @WHERE_TEXT = where_text_var;\nSET @COLUMNS_USED = columns_used_var;\nSET @MULTIDB_QUERY = CONCAT('SELECT \"$MULTIDB\" FROM `$MULTIDB`.', @TABLE_NAME, @WHERE_TEXT);\n\n-- EXECUTION --\nCREATE TEMPORARY TABLE `MULTIDB_TEMP_DB_TBL_COLS` AS\nSELECT * FROM (\n SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME\n FROM INFORMATION_SCHEMA.COLUMNS\n WHERE\n TABLE_SCHEMA NOT IN('mysql', 'test', 'tmp', 'information_schema', 'sys', 'performance_schema') AND\n TABLE_NAME = @TABLE_NAME AND\n FIND_IN_SET(COLUMN_NAME, @COLUMNS_USED)\n) tbl\nGROUP BY\n TABLE_SCHEMA,\n TABLE_NAME;\n\nSELECT GROUP_CONCAT(REPLACE(@MULTIDB_QUERY, '$MULTIDB', CONCAT('', TABLE_SCHEMA, '')) SEPARATOR \"\\nUNION ALL\\n\")\nINTO @stmt_sql\nFROM `MULTIDB_TEMP_DB_TBL_COLS`;\n\nPREPARE stmt FROM @stmt_sql;\nEXECUTE stmt;\nDEALLOCATE PREPARE stmt;\nEND"
},
"len": 1173,
"last": 1173,
Expand Down
6 changes: 1 addition & 5 deletions tests/data/parser/parseCreateProcedure4.out
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,7 @@
"errors": [],
"str": {
"@type": "PhpMyAdmin\\SqlParser\\UtfString",
"str": "DELIMITER $$\nCREATE DEFINER=`user`@`localhost` PROCEDURE `multiDBqueryRun_V12`(IN `query` TEXT, IN `table_name_var` VARCHAR(255), IN `columns_used_var` TEXT, IN `where_text_var` TEXT, IN `separator_value_var` VARCHAR(255)) COMMENT 'Query: SingleDB → MultiDB (All DBs) + run it' NOT DETERMINISTIC MODIFIES SQL DATA SQL SECURITY INVOKER BEGIN\nSET @TABLE_NAME = table_name_var;\nSET @WHERE_TEXT = where_text_var;\nSET @COLUMNS_USED = columns_used_var;\nSET @MULTIDB_QUERY = CONCAT('SELECT \"$MULTIDB\" FROM `$MULTIDB`.', @TABLE_NAME, @WHERE_TEXT);\n\n-- EXECUTION --\nCREATE TEMPORARY TABLE `MULTIDB_TEMP_DB_TBL_COLS` AS\nSELECT * FROM (\n SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME\n FROM INFORMATION_SCHEMA.COLUMNS\n WHERE\n TABLE_SCHEMA NOT IN('mysql', 'test', 'tmp', 'information_schema', 'sys', 'performance_schema') AND\n TABLE_NAME = @TABLE_NAME AND\n FIND_IN_SET(COLUMN_NAME, @COLUMNS_USED)\n) tbl\nGROUP BY\n TABLE_SCHEMA,\n TABLE_NAME;\n\nSELECT GROUP_CONCAT(REPLACE(@MULTIDB_QUERY, '$MULTIDB', CONCAT('', TABLE_SCHEMA, '')) SEPARATOR \"\\nUNION ALL\\n\")\nINTO @stmt_sql\nFROM `MULTIDB_TEMP_DB_TBL_COLS`;\n\nPREPARE stmt FROM @stmt_sql;\nEXECUTE stmt;\nDEALLOCATE PREPARE stmt;\nEND$$\n\n--\n-- Functions\n--\nDELIMITER $$\nCREATE DEFINER=`root`@`localhost` FUNCTION `attrParentShiftIds` (`parent_id` TEXT, `option_id_shift` INT, `option_value_id_shift` INT) RETURNS TEXT CHARSET utf8mb4 COLLATE utf8mb4_unicode_520_ci DETERMINISTIC READS SQL DATA SQL SECURITY INVOKER BEGIN\n DECLARE i INT UNSIGNED DEFAULT 0;\n DECLARE pair_count INT UNSIGNED;\n DECLARE result TEXT DEFAULT '';\n\n DECLARE pair VARCHAR(255) DEFAULT '';\n DECLARE oid INT DEFAULT '';\n DECLARE vid INT DEFAULT '';\n\n SET pair_count = substrCount(parent_id, ',') + 1;\n\n WHILE i < pair_count DO\n SET result = CONCAT(result, IF(i <= 0, '', ','));\n\n SET pair = split(parent_id, ',', i + 1);\n\n SET oid = split(pair, '-', 1) + option_id_shift;\n SET vid = split(pair, '-', 2) + option_value_id_shift;\n\n SET pair = CONCAT(oid, '-', vid);\n SET result = CONCAT(result, pair);\n\n SET i = i + 1;\n END WHILE;\n\n RETURN result;\nEND$$\n\nDELIMITER $$\nCREATE DEFINER=`user`@`localhost` FUNCTION `split` (`string` TEXT, `delim` TEXT, `n` INT) RETURNS TEXT CHARSET utf8mb4 COLLATE utf8mb4_unicode_520_ci DETERMINISTIC SQL SECURITY INVOKER RETURN IF(\n (LENGTH(string) - LENGTH(REPLACE(string, delim, ''))) / LENGTH(delim) < n - 1,\n NULL,\n SUBSTRING_INDEX(SUBSTRING_INDEX(string, delim, n), delim, -1)\n)$$\n\nDELIMITER $$\nCREATE DEFINER=`root`@`localhost` FUNCTION `substrCount` (`s` VARCHAR(255), `ss` VARCHAR(255)) RETURNS TINYINT(3) UNSIGNED DETERMINISTIC READS SQL DATA SQL SECURITY INVOKER BEGIN\nDECLARE COUNT TINYINT(3) UNSIGNED;\nDECLARE OFFSET_I TINYINT(3) UNSIGNED;\nDECLARE CONTINUE HANDLER FOR SQLSTATE '02000' SET s = NULL;\n\nSET COUNT = 0;\nSET OFFSET_I = 1;\n\nREPEAT\nIF NOT ISNULL(s) AND OFFSET_I > 0 THEN\nSET OFFSET_I = LOCATE(ss, s, OFFSET_I);\nIF OFFSET_I > 0 THEN\nSET COUNT = COUNT + 1;\nSET OFFSET_I = OFFSET_I + 1;\nEND IF;\nEND IF;\nUNTIL ISNULL(s) OR OFFSET_I = 0 END REPEAT;\n\nRETURN COUNT;\nEND$$\n\nDELIMITER ;\n\n",
"byteIdx": 3084,
"charIdx": 3082,
"byteLen": 3085,
"charLen": 3083
"str": "DELIMITER $$\nCREATE DEFINER=`user`@`localhost` PROCEDURE `multiDBqueryRun_V12`(IN `query` TEXT, IN `table_name_var` VARCHAR(255), IN `columns_used_var` TEXT, IN `where_text_var` TEXT, IN `separator_value_var` VARCHAR(255)) COMMENT 'Query: SingleDB → MultiDB (All DBs) + run it' NOT DETERMINISTIC MODIFIES SQL DATA SQL SECURITY INVOKER BEGIN\nSET @TABLE_NAME = table_name_var;\nSET @WHERE_TEXT = where_text_var;\nSET @COLUMNS_USED = columns_used_var;\nSET @MULTIDB_QUERY = CONCAT('SELECT \"$MULTIDB\" FROM `$MULTIDB`.', @TABLE_NAME, @WHERE_TEXT);\n\n-- EXECUTION --\nCREATE TEMPORARY TABLE `MULTIDB_TEMP_DB_TBL_COLS` AS\nSELECT * FROM (\n SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME\n FROM INFORMATION_SCHEMA.COLUMNS\n WHERE\n TABLE_SCHEMA NOT IN('mysql', 'test', 'tmp', 'information_schema', 'sys', 'performance_schema') AND\n TABLE_NAME = @TABLE_NAME AND\n FIND_IN_SET(COLUMN_NAME, @COLUMNS_USED)\n) tbl\nGROUP BY\n TABLE_SCHEMA,\n TABLE_NAME;\n\nSELECT GROUP_CONCAT(REPLACE(@MULTIDB_QUERY, '$MULTIDB', CONCAT('', TABLE_SCHEMA, '')) SEPARATOR \"\\nUNION ALL\\n\")\nINTO @stmt_sql\nFROM `MULTIDB_TEMP_DB_TBL_COLS`;\n\nPREPARE stmt FROM @stmt_sql;\nEXECUTE stmt;\nDEALLOCATE PREPARE stmt;\nEND$$\n\n--\n-- Functions\n--\nDELIMITER $$\nCREATE DEFINER=`root`@`localhost` FUNCTION `attrParentShiftIds` (`parent_id` TEXT, `option_id_shift` INT, `option_value_id_shift` INT) RETURNS TEXT CHARSET utf8mb4 COLLATE utf8mb4_unicode_520_ci DETERMINISTIC READS SQL DATA SQL SECURITY INVOKER BEGIN\n DECLARE i INT UNSIGNED DEFAULT 0;\n DECLARE pair_count INT UNSIGNED;\n DECLARE result TEXT DEFAULT '';\n\n DECLARE pair VARCHAR(255) DEFAULT '';\n DECLARE oid INT DEFAULT '';\n DECLARE vid INT DEFAULT '';\n\n SET pair_count = substrCount(parent_id, ',') + 1;\n\n WHILE i < pair_count DO\n SET result = CONCAT(result, IF(i <= 0, '', ','));\n\n SET pair = split(parent_id, ',', i + 1);\n\n SET oid = split(pair, '-', 1) + option_id_shift;\n SET vid = split(pair, '-', 2) + option_value_id_shift;\n\n SET pair = CONCAT(oid, '-', vid);\n SET result = CONCAT(result, pair);\n\n SET i = i + 1;\n END WHILE;\n\n RETURN result;\nEND$$\n\nDELIMITER $$\nCREATE DEFINER=`user`@`localhost` FUNCTION `split` (`string` TEXT, `delim` TEXT, `n` INT) RETURNS TEXT CHARSET utf8mb4 COLLATE utf8mb4_unicode_520_ci DETERMINISTIC SQL SECURITY INVOKER RETURN IF(\n (LENGTH(string) - LENGTH(REPLACE(string, delim, ''))) / LENGTH(delim) < n - 1,\n NULL,\n SUBSTRING_INDEX(SUBSTRING_INDEX(string, delim, n), delim, -1)\n)$$\n\nDELIMITER $$\nCREATE DEFINER=`root`@`localhost` FUNCTION `substrCount` (`s` VARCHAR(255), `ss` VARCHAR(255)) RETURNS TINYINT(3) UNSIGNED DETERMINISTIC READS SQL DATA SQL SECURITY INVOKER BEGIN\nDECLARE COUNT TINYINT(3) UNSIGNED;\nDECLARE OFFSET_I TINYINT(3) UNSIGNED;\nDECLARE CONTINUE HANDLER FOR SQLSTATE '02000' SET s = NULL;\n\nSET COUNT = 0;\nSET OFFSET_I = 1;\n\nREPEAT\nIF NOT ISNULL(s) AND OFFSET_I > 0 THEN\nSET OFFSET_I = LOCATE(ss, s, OFFSET_I);\nIF OFFSET_I > 0 THEN\nSET COUNT = COUNT + 1;\nSET OFFSET_I = OFFSET_I + 1;\nEND IF;\nEND IF;\nUNTIL ISNULL(s) OR OFFSET_I = 0 END REPEAT;\n\nRETURN COUNT;\nEND$$\n\nDELIMITER ;\n\n"
},
"len": 3083,
"last": 3083,
Expand Down
Loading