Skip to content

Commit 35923e9

Browse files
authored
Feat!: POSITION and all their variants for all dialects (#4606)
* Feat!: POSITION for all dialects * Feat: add clickhouse parsing test * cleanup * mypy fix * Comments addressed * more tests and default params for strposition_sql * test fix * cleanup * another cleanup * more comments addressed
1 parent ffa0df7 commit 35923e9

22 files changed

+311
-112
lines changed

sqlglot/dialects/bigquery.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
timestrtotime_sql,
2727
ts_or_ds_add_cast,
2828
unit_to_var,
29-
str_position_sql,
29+
strposition_sql,
3030
)
3131
from sqlglot.helper import seq_get, split_num_words
3232
from sqlglot.tokens import TokenType
@@ -934,7 +934,11 @@ class Generator(generator.Generator):
934934
"DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC"
935935
),
936936
exp.String: rename_func("STRING"),
937-
exp.StrPosition: str_position_sql,
937+
exp.StrPosition: lambda self, e: (
938+
strposition_sql(
939+
self, e, func_name="INSTR", supports_position=True, supports_occurrence=True
940+
)
941+
),
938942
exp.StrToDate: _str_to_datetime_sql,
939943
exp.StrToTime: _str_to_datetime_sql,
940944
exp.TimeAdd: date_add_interval_sql("TIME", "ADD"),

sqlglot/dialects/clickhouse.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
build_json_extract_path,
1717
rename_func,
1818
sha256_sql,
19+
strposition_sql,
1920
var_map_sql,
2021
timestamptrunc_sql,
2122
unit_to_var,
@@ -997,8 +998,12 @@ class Generator(generator.Generator):
997998
exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression),
998999
exp.Rand: rename_func("randCanonical"),
9991000
exp.StartsWith: rename_func("startsWith"),
1000-
exp.StrPosition: lambda self, e: self.func(
1001-
"position", e.this, e.args.get("substr"), e.args.get("position")
1001+
exp.StrPosition: lambda self, e: strposition_sql(
1002+
self,
1003+
e,
1004+
func_name="POSITION",
1005+
supports_position=True,
1006+
use_ansi_position=False,
10021007
),
10031008
exp.TimeToStr: lambda self, e: self.func(
10041009
"formatDateTime", e.this, self.format_time(e), e.args.get("zone")

sqlglot/dialects/dialect.py

Lines changed: 36 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,36 +1067,46 @@ def property_sql(self: Generator, expression: exp.Property) -> str:
10671067
return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}"
10681068

10691069

1070-
def str_position_sql(
1070+
def strposition_sql(
10711071
self: Generator,
10721072
expression: exp.StrPosition,
1073-
generate_instance: bool = False,
1074-
str_position_func_name: str = "STRPOS",
1073+
func_name: str = "STRPOS",
1074+
supports_position: bool = False,
1075+
supports_occurrence: bool = False,
1076+
use_ansi_position: bool = True,
10751077
) -> str:
1076-
this = self.sql(expression, "this")
1077-
substr = self.sql(expression, "substr")
1078-
position = self.sql(expression, "position")
1079-
instance = expression.args.get("instance") if generate_instance else None
1080-
position_offset = ""
1081-
1082-
if position:
1083-
# Normalize third 'pos' argument into 'SUBSTR(..) + offset' across dialects
1084-
this = self.func("SUBSTR", this, position)
1085-
position_offset = f" + {position} - 1"
1086-
1087-
strpos_sql = self.func(str_position_func_name, this, substr, instance)
1088-
1089-
if position_offset:
1090-
zero = exp.Literal.number(0)
1091-
# If match is not found (returns 0) the position offset should not be applied
1092-
case = exp.If(
1093-
this=exp.EQ(this=strpos_sql, expression=zero),
1094-
true=zero,
1095-
false=strpos_sql + position_offset,
1096-
)
1097-
strpos_sql = self.sql(case)
1078+
string = expression.this
1079+
substr = expression.args.get("substr")
1080+
position = expression.args.get("position")
1081+
occurrence = expression.args.get("occurrence")
1082+
zero = exp.Literal.number(0)
1083+
one = exp.Literal.number(1)
1084+
1085+
if supports_occurrence and occurrence and supports_position and not position:
1086+
position = one
1087+
1088+
if position and not supports_position:
1089+
string = exp.Substring(this=string, start=position)
1090+
1091+
if func_name == "POSITION" and use_ansi_position:
1092+
func = exp.Anonymous(this=func_name, expressions=[exp.In(this=substr, field=string)])
1093+
else:
1094+
args = [substr, string] if func_name in ("LOCATE", "CHARINDEX") else [string, substr]
1095+
if supports_position:
1096+
args.append(position)
1097+
if occurrence:
1098+
if supports_occurrence:
1099+
args.append(occurrence)
1100+
else:
1101+
self.unsupported(f"{func_name} does not support the occurrence parameter.")
1102+
func = exp.Anonymous(this=func_name, expressions=args)
1103+
1104+
if position and not supports_position:
1105+
func_with_offset = exp.Sub(this=func + position, expression=one)
1106+
func_wrapped = exp.If(this=func.eq(zero), true=zero, false=func_with_offset)
1107+
return self.sql(func_wrapped)
10981108

1099-
return strpos_sql
1109+
return self.sql(func)
11001110

11011111

11021112
def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
@@ -1269,18 +1279,6 @@ def no_datetime_sql(self: Generator, expression: exp.Datetime) -> str:
12691279
return self.sql(exp.cast(exp.Add(this=this, expression=expr), exp.DataType.Type.TIMESTAMP))
12701280

12711281

1272-
def locate_to_strposition(args: t.List) -> exp.Expression:
1273-
return exp.StrPosition(
1274-
this=seq_get(args, 1), substr=seq_get(args, 0), position=seq_get(args, 2)
1275-
)
1276-
1277-
1278-
def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
1279-
return self.func(
1280-
"LOCATE", expression.args.get("substr"), expression.this, expression.args.get("position")
1281-
)
1282-
1283-
12841282
def left_to_substring_sql(self: Generator, expression: exp.Left) -> str:
12851283
return self.sql(
12861284
exp.Substring(

sqlglot/dialects/drill.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
build_formatted_time,
99
no_trycast_sql,
1010
rename_func,
11-
str_position_sql,
11+
strposition_sql,
1212
timestrtotime_sql,
1313
)
1414
from sqlglot.dialects.mysql import date_add_sql
@@ -136,12 +136,12 @@ class Generator(generator.Generator):
136136
),
137137
exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
138138
exp.RegexpLike: rename_func("REGEXP_MATCHES"),
139-
exp.StrPosition: str_position_sql,
140139
exp.StrToDate: _str_to_date,
141140
exp.Pow: rename_func("POW"),
142141
exp.Select: transforms.preprocess(
143142
[transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins]
144143
),
144+
exp.StrPosition: strposition_sql,
145145
exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)),
146146
exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)),
147147
exp.TimeStrToTime: timestrtotime_sql,

sqlglot/dialects/duckdb.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
no_timestamp_sql,
2525
pivot_column_names,
2626
rename_func,
27-
str_position_sql,
27+
strposition_sql,
2828
str_to_time_sql,
2929
timestamptrunc_sql,
3030
timestrtotime_sql,
@@ -618,7 +618,7 @@ class Generator(generator.Generator):
618618
exp.SHA2: sha256_sql,
619619
exp.Split: rename_func("STR_SPLIT"),
620620
exp.SortArray: _sort_array_sql,
621-
exp.StrPosition: str_position_sql,
621+
exp.StrPosition: strposition_sql,
622622
exp.StrToUnix: lambda self, e: self.func(
623623
"EPOCH", self.func("STRPTIME", e.this, self.format_time(e))
624624
),

sqlglot/dialects/hive.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
if_sql,
1616
is_parse_json,
1717
left_to_substring_sql,
18-
locate_to_strposition,
1918
max_or_greatest,
2019
min_or_least,
2120
no_ilike_sql,
@@ -25,7 +24,7 @@
2524
regexp_replace_sql,
2625
rename_func,
2726
right_to_substring_sql,
28-
strposition_to_locate_sql,
27+
strposition_sql,
2928
struct_extract_sql,
3029
time_format,
3130
timestrtotime_sql,
@@ -305,7 +304,6 @@ class Parser(parser.Parser):
305304
"GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list,
306305
"LAST": _build_with_ignore_nulls(exp.Last),
307306
"LAST_VALUE": _build_with_ignore_nulls(exp.LastValue),
308-
"LOCATE": locate_to_strposition,
309307
"MAP": parser.build_var_map,
310308
"MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)),
311309
"PERCENTILE": exp.Quantile.from_arg_list,
@@ -562,7 +560,9 @@ class Generator(generator.Generator):
562560
transforms.any_to_exists,
563561
]
564562
),
565-
exp.StrPosition: strposition_to_locate_sql,
563+
exp.StrPosition: lambda self, e: strposition_sql(
564+
self, e, func_name="LOCATE", supports_position=True
565+
),
566566
exp.StrToDate: _str_to_date_sql,
567567
exp.StrToTime: _str_to_time_sql,
568568
exp.StrToUnix: _str_to_unix_sql,

sqlglot/dialects/mysql.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
build_formatted_time,
1313
isnull_to_is_null,
1414
length_or_char_length_sql,
15-
locate_to_strposition,
1615
max_or_greatest,
1716
min_or_least,
1817
no_ilike_sql,
@@ -23,7 +22,7 @@
2322
build_date_delta,
2423
build_date_delta_with_interval,
2524
rename_func,
26-
strposition_to_locate_sql,
25+
strposition_sql,
2726
unit_to_var,
2827
trim_sql,
2928
timestrtotime_sql,
@@ -311,7 +310,6 @@ class Parser(parser.Parser):
311310
"FROM_UNIXTIME": build_formatted_time(exp.UnixToTime, "mysql"),
312311
"ISNULL": isnull_to_is_null,
313312
"LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
314-
"LOCATE": locate_to_strposition,
315313
"MAKETIME": exp.TimeFromParts.from_arg_list,
316314
"MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate(this=seq_get(args, 0))),
317315
"MONTHNAME": lambda args: exp.TimeToStr(
@@ -750,7 +748,9 @@ class Generator(generator.Generator):
750748
transforms.unnest_generate_date_array_using_recursive_cte,
751749
]
752750
),
753-
exp.StrPosition: strposition_to_locate_sql,
751+
exp.StrPosition: lambda self, e: strposition_sql(
752+
self, e, func_name="LOCATE", supports_position=True
753+
),
754754
exp.StrToDate: _str_to_date_sql,
755755
exp.StrToTime: _str_to_date_sql,
756756
exp.Stuff: rename_func("INSERT"),

sqlglot/dialects/oracle.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
build_formatted_time,
1010
no_ilike_sql,
1111
rename_func,
12-
str_position_sql,
12+
strposition_sql,
1313
to_number_with_nls_param,
1414
trim_sql,
1515
)
@@ -300,8 +300,10 @@ class Generator(generator.Generator):
300300
transforms.eliminate_qualify,
301301
]
302302
),
303-
exp.StrPosition: lambda self, e: str_position_sql(
304-
self, e, str_position_func_name="INSTR"
303+
exp.StrPosition: lambda self, e: (
304+
strposition_sql(
305+
self, e, func_name="INSTR", supports_position=True, supports_occurrence=True
306+
)
305307
),
306308
exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)),
307309
exp.StrToDate: lambda self, e: self.func("TO_DATE", e.this, self.format_time(e)),

sqlglot/dialects/postgres.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
timestrtotime_sql,
3333
trim_sql,
3434
ts_or_ds_add_cast,
35-
str_position_sql,
35+
strposition_sql,
3636
)
3737
from sqlglot.helper import is_int, seq_get
3838
from sqlglot.parser import binary_range_parser
@@ -584,7 +584,7 @@ class Generator(generator.Generator):
584584
]
585585
),
586586
exp.SHA2: sha256_sql,
587-
exp.StrPosition: str_position_sql,
587+
exp.StrPosition: lambda self, e: strposition_sql(self, e, func_name="POSITION"),
588588
exp.StrToDate: lambda self, e: self.func("TO_DATE", e.this, self.format_time(e)),
589589
exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)),
590590
exp.StructExtract: struct_extract_sql,

sqlglot/dialects/presto.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
rename_func,
2222
right_to_substring_sql,
2323
sha256_sql,
24+
strposition_sql,
2425
struct_extract_sql,
25-
str_position_sql,
2626
timestamptrunc_sql,
2727
timestrtotime_sql,
2828
ts_or_ds_add_cast,
@@ -291,7 +291,7 @@ class Parser(parser.Parser):
291291
"SET_AGG": exp.ArrayUniqueAgg.from_arg_list,
292292
"SPLIT_TO_MAP": exp.StrToMap.from_arg_list,
293293
"STRPOS": lambda args: exp.StrPosition(
294-
this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2)
294+
this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2)
295295
),
296296
"TO_CHAR": _build_to_char,
297297
"TO_UNIXTIME": exp.TimeToUnix.from_arg_list,
@@ -428,7 +428,7 @@ class Generator(generator.Generator):
428428
]
429429
),
430430
exp.SortArray: _no_sort_array,
431-
exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True),
431+
exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True),
432432
exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)",
433433
exp.StrToMap: rename_func("SPLIT_TO_MAP"),
434434
exp.StrToTime: _str_to_time_sql,

0 commit comments

Comments
 (0)