Skip to content

Commit db2d9cc

Browse files
Fix(tokenizer): handle empty hex strings (#5763)
1 parent 85845bb commit db2d9cc

File tree

3 files changed

+12
-2
lines changed

3 files changed

+12
-2
lines changed

sqlglot/tokens.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1427,7 +1427,7 @@ def _scan_string(self, start: str) -> bool:
14271427
self._advance(len(start))
14281428
text = self._extract_string(end, raw_string=token_type == TokenType.RAW_STRING)
14291429

1430-
if base:
1430+
if base and text:
14311431
try:
14321432
int(text, base)
14331433
except Exception:

sqlglotrs/src/tokenizer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ impl<'a> TokenizerState<'a> {
468468
self.extract_string(&end, false, token_type == self.token_types.raw_string, true)?;
469469

470470
if let Some(b) = base {
471-
if u128::from_str_radix(&text, b).is_err() {
471+
if !text.is_empty() && u128::from_str_radix(&text, b).is_err() {
472472
return self.error_result(format!(
473473
"Numeric string contains invalid characters from {}:{}",
474474
self.line, self.start

tests/dialects/test_spark.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,16 @@ def test_string(self):
10291029
query = parse_one("STRING(a)", dialect=dialect)
10301030
self.assertEqual(query.sql(dialect), "CAST(a AS STRING)")
10311031

1032+
def test_binary_string(self):
1033+
for dialect in ("spark2", "spark", "databricks"):
1034+
with self.subTest(f"Testing HEX strings for {dialect}"):
1035+
query = parse_one("X'ab'", dialect=dialect)
1036+
self.assertEqual(query.sql(dialect), "X'ab'")
1037+
1038+
with self.subTest(f"Testing empty HEX strings for {dialect}"):
1039+
query = parse_one("X''", dialect=dialect)
1040+
self.assertEqual(query.sql(dialect), "X''")
1041+
10321042
def test_analyze(self):
10331043
self.validate_identity("ANALYZE TABLE tbl COMPUTE STATISTICS NOSCAN")
10341044
self.validate_identity("ANALYZE TABLE tbl COMPUTE STATISTICS FOR ALL COLUMNS")

0 commit comments

Comments
 (0)