Skip to content

Commit 7110f63

Browse files
committed
Add UnifiedQueryParser with language-specific implementations
Extract parsing logic from UnifiedQueryPlanner into a UnifiedQueryParser interface with language-specific implementations: PPLQueryParser (returns UnresolvedPlan) and CalciteSqlQueryParser (returns SqlNode). UnifiedQueryContext owns the parser instance, created eagerly by the builder which has direct access to query type and future SQL config. Each implementation receives only its required dependencies: PPLQueryParser takes Settings, CalciteSqlQueryParser takes CalcitePlanContext. UnifiedQueryPlanner.CustomVisitorStrategy now obtains the parser from the context via the interface type. Signed-off-by: Chen Dai <daichen@amazon.com>
1 parent f0bcbab commit 7110f63

File tree

8 files changed

+345
-31
lines changed

8 files changed

+345
-31
lines changed

api/README.md

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ This module provides components organized into two main areas aligned with the [
88

99
### Unified Language Specification
1010

11-
- **`UnifiedQueryPlanner`**: Accepts PPL (Piped Processing Language) or SQL queries and returns Calcite `RelNode` logical plans as intermediate representation.
11+
- **`UnifiedQueryParser`**: Parses PPL (Piped Processing Language) or SQL queries and returns the native parse result (`UnresolvedPlan` for PPL, `SqlNode` for Calcite SQL).
12+
- **`UnifiedQueryPlanner`**: Accepts PPL or SQL queries and returns Calcite `RelNode` logical plans as intermediate representation.
1213
- **`UnifiedQueryTranspiler`**: Converts Calcite logical plans (`RelNode`) into SQL strings for various target databases using different SQL dialects.
1314

1415
### Unified Execution Runtime
@@ -42,6 +43,20 @@ UnifiedQueryContext context = UnifiedQueryContext.builder()
4243
.build();
4344
```
4445

46+
### UnifiedQueryParser
47+
48+
Use `UnifiedQueryParser` to parse queries into their native parse tree. The parser is owned by `UnifiedQueryContext` and returns the native parse result for each language.
49+
50+
```java
51+
// PPL parsing
52+
UnresolvedPlan ast = (UnresolvedPlan) context.getParser().parse("source = logs | where status = 200");
53+
54+
// SQL parsing (with QueryType.SQL context)
55+
SqlNode sqlNode = (SqlNode) sqlContext.getParser().parse("SELECT * FROM logs WHERE status = 200");
56+
```
57+
58+
Callers can then use each language's native visitor infrastructure (`AbstractNodeVisitor` for PPL, `SqlBasicVisitor` for Calcite SQL) on the typed result for further analysis.
59+
4560
### UnifiedQueryPlanner
4661

4762
Use `UnifiedQueryPlanner` to parse and analyze PPL or SQL queries into Calcite logical plans. The planner accepts a `UnifiedQueryContext` and can be reused for multiple queries.

api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
import java.util.Objects;
1616
import java.util.Optional;
1717
import java.util.concurrent.Callable;
18-
import lombok.Value;
18+
import lombok.AllArgsConstructor;
19+
import lombok.Getter;
1920
import org.apache.calcite.avatica.util.Casing;
2021
import org.apache.calcite.jdbc.CalciteSchema;
2122
import org.apache.calcite.plan.RelTraitDef;
@@ -26,6 +27,9 @@
2627
import org.apache.calcite.tools.FrameworkConfig;
2728
import org.apache.calcite.tools.Frameworks;
2829
import org.apache.calcite.tools.Programs;
30+
import org.opensearch.sql.api.parser.CalciteSqlQueryParser;
31+
import org.opensearch.sql.api.parser.PPLQueryParser;
32+
import org.opensearch.sql.api.parser.UnifiedQueryParser;
2933
import org.opensearch.sql.calcite.CalcitePlanContext;
3034
import org.opensearch.sql.calcite.SysLimit;
3135
import org.opensearch.sql.common.setting.Settings;
@@ -40,14 +44,18 @@
4044
* centralizes configuration for catalog schemas, query type, execution limits, and other settings,
4145
* enabling consistent behavior across all unified query operations.
4246
*/
43-
@Value
47+
@AllArgsConstructor
48+
@Getter
4449
public class UnifiedQueryContext implements AutoCloseable {
4550

4651
/** CalcitePlanContext containing Calcite framework configuration and query type. */
47-
CalcitePlanContext planContext;
52+
private final CalcitePlanContext planContext;
4853

4954
/** Settings containing execution limits and feature flags used by parsers and planners. */
50-
Settings settings;
55+
private final Settings settings;
56+
57+
/** Query parser created eagerly from this context's configuration. */
58+
private final UnifiedQueryParser<?> parser;
5159

5260
/**
5361
* Returns the profiling result. Call after query execution to retrieve collected metrics. Returns
@@ -202,7 +210,14 @@ public UnifiedQueryContext build() {
202210
CalcitePlanContext.create(
203211
buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType);
204212
QueryProfiling.activate(profiling);
205-
return new UnifiedQueryContext(planContext, settings);
213+
return new UnifiedQueryContext(planContext, settings, createParser(planContext, settings));
214+
}
215+
216+
private UnifiedQueryParser<?> createParser(CalcitePlanContext planContext, Settings settings) {
217+
return switch (queryType) {
218+
case PPL -> new PPLQueryParser(settings);
219+
case SQL -> new CalciteSqlQueryParser(planContext);
220+
};
206221
}
207222

208223
private Settings buildSettings() {

api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java

Lines changed: 10 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import static org.opensearch.sql.monitor.profile.MetricName.ANALYZE;
99

1010
import lombok.RequiredArgsConstructor;
11-
import org.antlr.v4.runtime.tree.ParseTree;
1211
import org.apache.calcite.rel.RelCollation;
1312
import org.apache.calcite.rel.RelCollations;
1413
import org.apache.calcite.rel.RelNode;
@@ -18,15 +17,11 @@
1817
import org.apache.calcite.sql.SqlNode;
1918
import org.apache.calcite.tools.Frameworks;
2019
import org.apache.calcite.tools.Planner;
21-
import org.opensearch.sql.ast.statement.Query;
22-
import org.opensearch.sql.ast.statement.Statement;
20+
import org.opensearch.sql.api.parser.UnifiedQueryParser;
2321
import org.opensearch.sql.ast.tree.UnresolvedPlan;
2422
import org.opensearch.sql.calcite.CalciteRelNodeVisitor;
2523
import org.opensearch.sql.common.antlr.SyntaxCheckException;
2624
import org.opensearch.sql.executor.QueryType;
27-
import org.opensearch.sql.ppl.antlr.PPLSyntaxParser;
28-
import org.opensearch.sql.ppl.parser.AstBuilder;
29-
import org.opensearch.sql.ppl.parser.AstStatementBuilder;
3025

3126
/**
3227
* {@code UnifiedQueryPlanner} provides a high-level API for parsing and analyzing queries using the
@@ -93,36 +88,26 @@ public RelNode plan(String query) throws Exception {
9388
}
9489
}
9590

96-
/** AST-based planning via ANTLR parser → UnresolvedPlan → CalciteRelNodeVisitor. */
97-
@RequiredArgsConstructor
91+
/** AST-based planning via context-owned parser → UnresolvedPlan → CalciteRelNodeVisitor. */
9892
private static class CustomVisitorStrategy implements PlanningStrategy {
9993
private final UnifiedQueryContext context;
100-
private final PPLSyntaxParser parser = new PPLSyntaxParser();
94+
private final UnifiedQueryParser<UnresolvedPlan> parser;
10195
private final CalciteRelNodeVisitor relNodeVisitor =
10296
new CalciteRelNodeVisitor(new EmptyDataSourceService());
10397

98+
@SuppressWarnings("unchecked")
99+
CustomVisitorStrategy(UnifiedQueryContext context) {
100+
this.context = context;
101+
this.parser = (UnifiedQueryParser<UnresolvedPlan>) context.getParser();
102+
}
103+
104104
@Override
105105
public RelNode plan(String query) {
106-
UnresolvedPlan ast = parse(query);
106+
UnresolvedPlan ast = parser.parse(query);
107107
RelNode logical = relNodeVisitor.analyze(ast, context.getPlanContext());
108108
return preserveCollation(logical);
109109
}
110110

111-
private UnresolvedPlan parse(String query) {
112-
ParseTree cst = parser.parse(query);
113-
AstStatementBuilder astStmtBuilder =
114-
new AstStatementBuilder(
115-
new AstBuilder(query, context.getSettings()),
116-
AstStatementBuilder.StatementBuilderContext.builder().build());
117-
Statement statement = cst.accept(astStmtBuilder);
118-
119-
if (statement instanceof Query) {
120-
return ((Query) statement).getPlan();
121-
}
122-
throw new UnsupportedOperationException(
123-
"Only query statements are supported but got " + statement.getClass().getSimpleName());
124-
}
125-
126111
private RelNode preserveCollation(RelNode logical) {
127112
RelCollation collation = logical.getTraitSet().getCollation();
128113
if (!(logical instanceof Sort) && collation != RelCollations.EMPTY) {
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.parser;
7+
8+
import lombok.RequiredArgsConstructor;
9+
import org.apache.calcite.sql.SqlNode;
10+
import org.apache.calcite.sql.parser.SqlParseException;
11+
import org.apache.calcite.sql.parser.SqlParser;
12+
import org.opensearch.sql.calcite.CalcitePlanContext;
13+
import org.opensearch.sql.common.antlr.SyntaxCheckException;
14+
15+
/** Calcite SQL query parser that produces {@link SqlNode} as the native parse result. */
16+
@RequiredArgsConstructor
17+
public class CalciteSqlQueryParser implements UnifiedQueryParser<SqlNode> {
18+
19+
/** Calcite plan context providing parser configuration (e.g., case sensitivity, conformance). */
20+
private final CalcitePlanContext planContext;
21+
22+
@Override
23+
public SqlNode parse(String query) {
24+
try {
25+
SqlParser parser = SqlParser.create(query, planContext.config.getParserConfig());
26+
return parser.parseQuery();
27+
} catch (SqlParseException e) {
28+
throw new SyntaxCheckException("Failed to parse SQL query: " + e.getMessage());
29+
}
30+
}
31+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.parser;
7+
8+
import lombok.RequiredArgsConstructor;
9+
import org.antlr.v4.runtime.tree.ParseTree;
10+
import org.opensearch.sql.ast.statement.Query;
11+
import org.opensearch.sql.ast.statement.Statement;
12+
import org.opensearch.sql.ast.tree.UnresolvedPlan;
13+
import org.opensearch.sql.common.setting.Settings;
14+
import org.opensearch.sql.ppl.antlr.PPLSyntaxParser;
15+
import org.opensearch.sql.ppl.parser.AstBuilder;
16+
import org.opensearch.sql.ppl.parser.AstStatementBuilder;
17+
18+
/** PPL query parser that produces {@link UnresolvedPlan} as the native parse result. */
19+
@RequiredArgsConstructor
20+
public class PPLQueryParser implements UnifiedQueryParser<UnresolvedPlan> {
21+
22+
/** Settings containing execution limits and feature flags used by AST builders. */
23+
private final Settings settings;
24+
25+
/** Reusable ANTLR-based PPL syntax parser. Stateless and thread-safe. */
26+
private final PPLSyntaxParser syntaxParser = new PPLSyntaxParser();
27+
28+
@Override
29+
public UnresolvedPlan parse(String query) {
30+
ParseTree cst = syntaxParser.parse(query);
31+
AstStatementBuilder astStmtBuilder =
32+
new AstStatementBuilder(
33+
new AstBuilder(query, settings),
34+
AstStatementBuilder.StatementBuilderContext.builder().build());
35+
Statement statement = cst.accept(astStmtBuilder);
36+
37+
if (statement instanceof Query) {
38+
return ((Query) statement).getPlan();
39+
}
40+
throw new UnsupportedOperationException(
41+
"Only query statements are supported but got " + statement.getClass().getSimpleName());
42+
}
43+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.parser;
7+
8+
/**
9+
* Language-neutral query parser interface. Returns the native parse result for the language (e.g.,
10+
* {@code UnresolvedPlan} for PPL, {@code SqlNode} for Calcite SQL).
11+
*
12+
* @param <T> the native parse result type for this language
13+
*/
14+
public interface UnifiedQueryParser<T> {
15+
16+
/**
17+
* Parses the query and returns the native parse result.
18+
*
19+
* @param query the raw query string
20+
* @return the native parse result
21+
*/
22+
T parse(String query);
23+
}
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.parser;
7+
8+
import org.apache.calcite.sql.parser.SqlParserFixture;
9+
import org.junit.Test;
10+
import org.opensearch.sql.api.UnifiedQueryTestBase;
11+
import org.opensearch.sql.executor.QueryType;
12+
13+
/**
14+
* SQL parser tests using Calcite's {@link SqlParserFixture} for idiomatic parse-unparse assertions.
15+
* Parser config is read from {@link org.opensearch.sql.api.UnifiedQueryContext} to stay in sync
16+
* with production.
17+
*/
18+
public class UnifiedQueryParserSqlTest extends UnifiedQueryTestBase {
19+
20+
@Override
21+
protected QueryType queryType() {
22+
return QueryType.SQL;
23+
}
24+
25+
@Test
26+
public void testParseSelectStar() {
27+
sql("SELECT * FROM catalog.employees")
28+
.ok(
29+
"""
30+
SELECT *
31+
FROM `catalog`.`employees`\
32+
""");
33+
}
34+
35+
@Test
36+
public void testParseSelectColumns() {
37+
sql("SELECT id, name FROM catalog.employees")
38+
.ok(
39+
"""
40+
SELECT `id`, `name`
41+
FROM `catalog`.`employees`\
42+
""");
43+
}
44+
45+
@Test
46+
public void testParseFilter() {
47+
sql("""
48+
SELECT name
49+
FROM catalog.employees
50+
WHERE age > 30\
51+
""")
52+
.ok(
53+
"""
54+
SELECT `name`
55+
FROM `catalog`.`employees`
56+
WHERE (`age` > 30)\
57+
""");
58+
}
59+
60+
@Test
61+
public void testParseAggregate() {
62+
sql("""
63+
SELECT department, count(*) AS cnt
64+
FROM catalog.employees
65+
GROUP BY department\
66+
""")
67+
.ok(
68+
"""
69+
SELECT `department`, COUNT(*) AS `cnt`
70+
FROM `catalog`.`employees`
71+
GROUP BY `department`\
72+
""");
73+
}
74+
75+
@Test
76+
public void testParseOrderBy() {
77+
sql("""
78+
SELECT name
79+
FROM catalog.employees
80+
ORDER BY age DESC\
81+
""")
82+
.ok(
83+
"""
84+
SELECT `name`
85+
FROM `catalog`.`employees`
86+
ORDER BY `age` DESC\
87+
""");
88+
}
89+
90+
@Test
91+
public void testParseJoin() {
92+
sql("""
93+
SELECT a.id, b.name
94+
FROM catalog.employees a
95+
JOIN catalog.employees b ON a.id = b.age\
96+
""")
97+
.ok(
98+
"""
99+
SELECT `a`.`id`, `b`.`name`
100+
FROM `catalog`.`employees` AS `a`
101+
INNER JOIN `catalog`.`employees` AS `b` ON (`a`.`id` = `b`.`age`)\
102+
""");
103+
}
104+
105+
@Test
106+
public void testSyntaxErrorFails() {
107+
sql("SELECT ^FROM^").fails("(?s).*Incorrect syntax near the keyword 'FROM'.*");
108+
}
109+
110+
private SqlParserFixture sql(String sql) {
111+
return SqlParserFixture.DEFAULT
112+
.withConfig(c -> context.getPlanContext().config.getParserConfig())
113+
.sql(sql);
114+
}
115+
}

0 commit comments

Comments
 (0)