Skip to content

Commit 672d656

Browse files
authored
Chore: clean up ANALYZE implementation (#4607)
* Chore: clean up ANALYZE implementation * PR feedback
1 parent e617d40 commit 672d656

File tree

4 files changed

+39
-55
lines changed

4 files changed

+39
-55
lines changed

sqlglot/expressions.py

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2329,7 +2329,7 @@ class LoadData(Expression):
23292329

23302330

23312331
class Partition(Expression):
2332-
arg_types = {"expressions": True}
2332+
arg_types = {"expressions": True, "subpartition": False}
23332333

23342334

23352335
class PartitionRange(Expression):
@@ -4728,7 +4728,7 @@ class Analyze(Expression):
47284728
}
47294729

47304730

4731-
class Statistics(Expression):
4731+
class AnalyzeStatistics(Expression):
47324732
arg_types = {
47334733
"kind": True,
47344734
"option": False,
@@ -4737,7 +4737,7 @@ class Statistics(Expression):
47374737
}
47384738

47394739

4740-
class Histogram(Expression):
4740+
class AnalyzeHistogram(Expression):
47414741
arg_types = {
47424742
"this": True,
47434743
"expressions": True,
@@ -4746,29 +4746,20 @@ class Histogram(Expression):
47464746
}
47474747

47484748

4749-
class Sample(Expression):
4750-
arg_types = {
4751-
"kind": True,
4752-
"sample": True,
4753-
}
4749+
class AnalyzeSample(Expression):
4750+
arg_types = {"kind": True, "sample": True}
47544751

47554752

47564753
class AnalyzeListChainedRows(Expression):
4757-
arg_types = {
4758-
"expression": False,
4759-
}
4754+
arg_types = {"expression": False}
47604755

47614756

47624757
class AnalyzeDelete(Expression):
4763-
arg_types = {
4764-
"kind": False,
4765-
}
4758+
arg_types = {"kind": False}
47664759

47674760

47684761
class AnalyzeWith(Expression):
4769-
arg_types = {
4770-
"expressions": True,
4771-
}
4762+
arg_types = {"expressions": True}
47724763

47734764

47744765
class AnalyzeValidate(Expression):

sqlglot/generator.py

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ class Generator(metaclass=_Generator):
114114
**JSON_PATH_PART_TRANSFORMS,
115115
exp.AllowedValuesProperty: lambda self,
116116
e: f"ALLOWED_VALUES {self.expressions(e, flat=True)}",
117+
exp.AnalyzeColumns: lambda self, e: self.sql(e, "this"),
118+
exp.AnalyzeWith: lambda self, e: self.expressions(e, prefix="WITH ", sep=" "),
117119
exp.ArrayContainsAll: lambda self, e: self.binary(e, "@>"),
118120
exp.ArrayOverlaps: lambda self, e: self.binary(e, "&&"),
119121
exp.AutoRefreshProperty: lambda self, e: f"AUTO REFRESH {self.sql(e, 'this')}",
@@ -198,6 +200,7 @@ class Generator(metaclass=_Generator):
198200
exp.TransientProperty: lambda *_: "TRANSIENT",
199201
exp.Union: lambda self, e: self.set_operations(e),
200202
exp.UnloggedProperty: lambda *_: "UNLOGGED",
203+
exp.UsingData: lambda self, e: f"USING DATA {self.sql(e, 'this')}",
201204
exp.Uuid: lambda *_: "UUID()",
202205
exp.UppercaseColumnConstraint: lambda *_: "UPPERCASE",
203206
exp.VarMap: lambda self, e: self.func("MAP", e.args["keys"], e.args["values"]),
@@ -1558,7 +1561,8 @@ def national_sql(self, expression: exp.National, prefix: str = "N") -> str:
15581561
return f"{prefix}{string}"
15591562

15601563
def partition_sql(self, expression: exp.Partition) -> str:
1561-
return f"PARTITION({self.expressions(expression, flat=True)})"
1564+
partition_keyword = "SUBPARTITION" if expression.args.get("subpartition") else "PARTITION"
1565+
return f"{partition_keyword}({self.expressions(expression, flat=True)})"
15621566

15631567
def properties_sql(self, expression: exp.Properties) -> str:
15641568
root_properties = []
@@ -4656,12 +4660,12 @@ def unpivotcolumns_sql(self, expression: exp.UnpivotColumns) -> str:
46564660

46574661
return f"NAME {name} VALUE {values}"
46584662

4659-
def sample_sql(self, expression: exp.Sample) -> str:
4663+
def analyzesample_sql(self, expression: exp.AnalyzeSample) -> str:
46604664
kind = self.sql(expression, "kind")
46614665
sample = self.sql(expression, "sample")
46624666
return f"SAMPLE {sample} {kind}"
46634667

4664-
def statistics_sql(self, expression: exp.Statistics) -> str:
4668+
def analyzestatistics_sql(self, expression: exp.AnalyzeStatistics) -> str:
46654669
kind = self.sql(expression, "kind")
46664670
option = self.sql(expression, "option")
46674671
option = f" {option}" if option else ""
@@ -4671,7 +4675,7 @@ def statistics_sql(self, expression: exp.Statistics) -> str:
46714675
columns = f" {columns}" if columns else ""
46724676
return f"{kind}{option} STATISTICS{this}{columns}"
46734677

4674-
def histogram_sql(self, expression: exp.Histogram) -> str:
4678+
def analyzehistogram_sql(self, expression: exp.AnalyzeHistogram) -> str:
46754679
this = self.sql(expression, "this")
46764680
columns = self.expressions(expression)
46774681
inner_expression = self.sql(expression, "expression")
@@ -4680,21 +4684,11 @@ def histogram_sql(self, expression: exp.Histogram) -> str:
46804684
update_options = f" {update_options} UPDATE" if update_options else ""
46814685
return f"{this} HISTOGRAM ON {columns}{inner_expression}{update_options}"
46824686

4683-
def usingdata_sql(self, expression: exp.UsingData) -> str:
4684-
data = self.sql(expression, "this")
4685-
return f"USING DATA {data}"
4686-
4687-
def analyzecolumns_sql(self, expression: exp.AnalyzeColumns) -> str:
4688-
return self.sql(expression, "this")
4689-
46904687
def analyzedelete_sql(self, expression: exp.AnalyzeDelete) -> str:
46914688
kind = self.sql(expression, "kind")
46924689
kind = f" {kind}" if kind else ""
46934690
return f"DELETE{kind} STATISTICS"
46944691

4695-
def analyzewith_sql(self, expression: exp.AnalyzeWith) -> str:
4696-
return self.expressions(expression, prefix="WITH ", sep=" ")
4697-
46984692
def analyzelistchainedrows_sql(self, expression: exp.AnalyzeListChainedRows) -> str:
46994693
inner_expression = self.sql(expression, "expression")
47004694
return f"LIST CHAINED ROWS{inner_expression}"

sqlglot/parser.py

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,6 +1352,8 @@ class Parser(metaclass=_Parser):
13521352
"VALIDATE": lambda self: self._parse_analyze_validate(),
13531353
}
13541354

1355+
PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"}
1356+
13551357
AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET)
13561358

13571359
OPERATION_MODIFIERS: t.Set[str] = set()
@@ -2984,11 +2986,13 @@ def _parse_cache(self) -> exp.Cache:
29842986
)
29852987

29862988
def _parse_partition(self) -> t.Optional[exp.Partition]:
2987-
if not self._match(TokenType.PARTITION):
2989+
if not self._match_texts(self.PARTITION_KEYWORDS):
29882990
return None
29892991

29902992
return self.expression(
2991-
exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment)
2993+
exp.Partition,
2994+
subpartition=self._prev.text.upper() == "SUBPARTITION",
2995+
expressions=self._parse_wrapped_csv(self._parse_assignment),
29922996
)
29932997

29942998
def _parse_value(self) -> t.Optional[exp.Tuple]:
@@ -7145,44 +7149,41 @@ def _parse_analyze(self) -> exp.Analyze | exp.Command:
71457149
else:
71467150
options.append(self._prev.text.upper())
71477151

7148-
kind = None
7149-
mode = None
71507152
this: t.Optional[exp.Expression] = None
7151-
partition: t.Optional[exp.Expression] = None
71527153
inner_expression: t.Optional[exp.Expression] = None
71537154

7155+
kind = self._curr and self._curr.text.upper()
7156+
71547157
if self._match(TokenType.TABLE) or self._match(TokenType.INDEX):
7155-
kind = self._prev.text.upper()
71567158
this = self._parse_table_parts()
71577159
elif self._match_text_seq("TABLES"):
7158-
kind = self._prev.text.upper()
71597160
if self._match_set((TokenType.FROM, TokenType.IN)):
71607161
kind = f"{kind} {self._prev.text.upper()}"
71617162
this = self._parse_table(schema=True, is_db_reference=True)
71627163
elif self._match_text_seq("DATABASE"):
7163-
kind = self._prev.text.upper()
71647164
this = self._parse_table(schema=True, is_db_reference=True)
71657165
elif self._match_text_seq("CLUSTER"):
7166-
kind = self._prev.text.upper()
71677166
this = self._parse_table()
71687167
# Try matching inner expr keywords before fallback to parse table.
71697168
elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS):
7169+
kind = None
71707170
inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self)
71717171
else:
71727172
# Empty kind https://prestodb.io/docs/current/sql/analyze.html
7173+
kind = None
71737174
this = self._parse_table_parts()
71747175

7175-
try:
7176-
partition = self._parse_partition()
7177-
except ParseError:
7176+
partition = self._try_parse(self._parse_partition)
7177+
if not partition and self._match_texts(self.PARTITION_KEYWORDS):
71787178
return self._parse_as_command(start)
71797179

71807180
# https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/
7181-
if self._match_text_seq("WITH", "SYNC", "MODE", advance=False) or self._match_text_seq(
7182-
"WITH", "ASYNC", "MODE", advance=False
7181+
if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq(
7182+
"WITH", "ASYNC", "MODE"
71837183
):
7184-
mode = f"WITH {self._next.text.upper()} MODE"
7185-
self._advance(3)
7184+
mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE"
7185+
else:
7186+
mode = None
71867187

71877188
if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS):
71887189
inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self)
@@ -7200,7 +7201,7 @@ def _parse_analyze(self) -> exp.Analyze | exp.Command:
72007201
)
72017202

72027203
# https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html
7203-
def _parse_analyze_statistics(self) -> exp.Statistics:
7204+
def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics:
72047205
this = None
72057206
kind = self._prev.text.upper()
72067207
option = self._prev.text.upper() if self._match_text_seq("DELTA") else None
@@ -7221,14 +7222,14 @@ def _parse_analyze_statistics(self) -> exp.Statistics:
72217222
sample = self._parse_number()
72227223
expressions = [
72237224
self.expression(
7224-
exp.Sample,
7225+
exp.AnalyzeSample,
72257226
sample=sample,
72267227
kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None,
72277228
)
72287229
]
72297230

72307231
return self.expression(
7231-
exp.Statistics, kind=kind, option=option, this=this, expressions=expressions
7232+
exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions
72327233
)
72337234

72347235
# https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html
@@ -7257,24 +7258,21 @@ def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]:
72577258
this = self._prev.text.upper()
72587259
if self._match_text_seq("COLUMNS"):
72597260
return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}")
7260-
self.raise_error("Expecting COLUMNS")
72617261
return None
72627262

72637263
def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]:
72647264
kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None
72657265
if self._match_text_seq("STATISTICS"):
72667266
return self.expression(exp.AnalyzeDelete, kind=kind)
7267-
self.raise_error("Expecting STATISTICS")
72687267
return None
72697268

72707269
def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]:
72717270
if self._match_text_seq("CHAINED", "ROWS"):
72727271
return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into())
7273-
self.raise_error("Expecting CHAINED ROWS")
72747272
return None
72757273

72767274
# https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html
7277-
def _parse_analyze_histogram(self) -> exp.Histogram:
7275+
def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram:
72787276
this = self._prev.text.upper()
72797277
expression: t.Optional[exp.Expression] = None
72807278
expressions = []
@@ -7305,7 +7303,7 @@ def _parse_analyze_histogram(self) -> exp.Histogram:
73057303
expression = self.expression(exp.UsingData, this=self._parse_string())
73067304

73077305
return self.expression(
7308-
exp.Histogram,
7306+
exp.AnalyzeHistogram,
73097307
this=this,
73107308
expressions=expressions,
73117309
expression=expression,

tests/dialects/test_oracle.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,7 @@ def test_analyze(self):
659659
self.validate_identity("ANALYZE TABLE tbl")
660660
self.validate_identity("ANALYZE INDEX ndx")
661661
self.validate_identity("ANALYZE TABLE db.tbl PARTITION(foo = 'foo', bar = 'bar')")
662+
self.validate_identity("ANALYZE TABLE db.tbl SUBPARTITION(foo = 'foo', bar = 'bar')")
662663
self.validate_identity("ANALYZE INDEX db.ndx PARTITION(foo = 'foo', bar = 'bar')")
663664
self.validate_identity("ANALYZE INDEX db.ndx PARTITION(part1)")
664665
self.validate_identity("ANALYZE CLUSTER db.cluster")

0 commit comments

Comments
 (0)