Skip to content

Commit 1175af3

Browse files
committed
resolve review comments
1 parent 7e10ca9 commit 1175af3

File tree

3 files changed

+49
-20
lines changed

3 files changed

+49
-20
lines changed

presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,12 @@ private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNod
132132
boolean hasMetadataFilter = metadataSqlQuery.isPresent() && !metadataSqlQuery.get().isEmpty();
133133
if (hasMetadataFilter) {
134134
metadataSqlQuery = Optional.of(splitFilterProvider.remapSplitFilterPushDownExpression(tableScope, metadataSqlQuery.get()));
135-
log.debug("Metadata SQL query: %s", metadataSqlQuery);
135+
log.debug("Metadata SQL query: %s", metadataSqlQuery.get());
136136
}
137137

138138
if (kqlQuery.isPresent() || hasMetadataFilter) {
139139
if (kqlQuery.isPresent()) {
140-
log.debug("KQL query: %s", kqlQuery);
140+
log.debug("KQL query: %s", kqlQuery.get());
141141
}
142142

143143
ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery, metadataSqlQuery);

presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpUdfRewriter.java

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
/**
5050
* Utility for rewriting CLP UDFs (e.g., <code>CLP_GET_*</code>) in {@link RowExpression} trees.
5151
* <p>
52-
* This optimizer traverses a query plan and rewrites calls to <code>CLP_GET_*</code> UDFs into
52+
* Traverses a query plan and rewrites calls to <code>CLP_GET_*</code> UDFs into
5353
* {@link VariableReferenceExpression}s with meaningful names derived from their arguments.
5454
* <p>
5555
* This enables querying fields that are not part of the original table schema but are available
@@ -62,7 +62,7 @@ public final class ClpUdfRewriter
6262

6363
public ClpUdfRewriter(FunctionMetadataManager functionManager)
6464
{
65-
this.functionManager = requireNonNull(functionManager);
65+
this.functionManager = requireNonNull(functionManager, "functionManager is null");
6666
}
6767

6868
@Override
@@ -73,14 +73,14 @@ public PlanNode optimize(PlanNode maxSubplan, ConnectorSession session, Variable
7373

7474
/**
7575
* Collects all existing variable assignments from {@link TableScanNode} instances that map
76-
* column handles to {@link VariableReferenceExpression}s.
76+
* {@link ColumnHandle}s to {@link VariableReferenceExpression}s.
7777
* <p>
7878
* This method traverses the given plan subtree, visiting the {@link TableScanNode} and
7979
* extracting its assignments. The resulting map allows tracking which scan-level variables
8080
* already exist for specific columns, so that subsequent optimizer passes (e.g., CLP UDF
8181
* rewriting) can reuse them instead of creating duplicates.
8282
* <p>
83-
* The returned map is keyed by the column handle, and the value is the
83+
* The key of the returned map is the {@link ColumnHandle}, and the value is the
8484
* {@link VariableReferenceExpression} assigned to it in the scan node.
8585
*
8686
* @param root the root {@link PlanNode} of the plan subtree
@@ -137,24 +137,26 @@ public PlanNode visitFilter(FilterNode node, RewriteContext<Void> context)
137137
}
138138

139139
/**
140-
* Rewrites <code>CLP_GET_*</code> UDFs in a {@link RowExpression}, collecting each resulting
141-
* variable into the given map along with its associated {@link ColumnHandle}.
140+
* Rewrites <code>CLP_GET_*</code> UDFs in a {@link RowExpression}, collecting each
141+
* resulting variable into the given map along with its associated {@link ColumnHandle}.
142142
* <p>
143-
* Each <code>CLP_GET_*</code> UDF must take a single constant string argument, which is used to
144-
* construct the name of the variable reference (e.g. <code>CLP_GET_STRING('foo')</code> becomes
145-
* a variable name <code>foo</code>). Invalid usages (e.g., non-constant arguments) will throw a
146-
* {@link PrestoException}.
143+
* Each <code>CLP_GET_*</code> UDF must take a single constant string argument, which is
144+
* used to construct the name of the variable reference (e.g.
145+
* <code>CLP_GET_STRING('foo')</code> becomes a variable name <code>foo</code>). Invalid
146+
* usages (e.g., non-constant arguments) will throw a {@link PrestoException}.
147147
*
148148
* @param expression the input expression to analyze and possibly rewrite
149149
* @param functionManager function manager used to resolve function metadata
150150
* @param variableAllocator variable allocator used to create new variable references
151-
* @return a possibly rewritten {@link RowExpression} with <code>CLP_GET_*</code> calls replaced
151+
* @return a possibly rewritten {@link RowExpression} with <code>CLP_GET_*</code> calls
152+
* replaced
152153
*/
153154
private RowExpression rewriteClpUdfs(
154155
RowExpression expression,
155156
FunctionMetadataManager functionManager,
156157
VariableAllocator variableAllocator)
157158
{
159+
// Handle CLP_GET_* function calls
158160
if (expression instanceof CallExpression) {
159161
CallExpression call = (CallExpression) expression;
160162
String functionName = functionManager.getFunctionMetadata(call.getFunctionHandle()).getName().getObjectName().toUpperCase();
@@ -191,6 +193,7 @@ private RowExpression rewriteClpUdfs(
191193
return new CallExpression(call.getDisplayName(), call.getFunctionHandle(), call.getType(), rewrittenArgs);
192194
}
193195

196+
// Handle special forms (e.g., AND, OR, etc.)
194197
if (expression instanceof SpecialFormExpression) {
195198
SpecialFormExpression special = (SpecialFormExpression) expression;
196199

@@ -258,8 +261,8 @@ else if (c == '_') {
258261
}
259262

260263
/**
261-
* Builds a new {@link TableScanNode} that includes additional variables and column handles
262-
* for rewritten CLP UDFs.
264+
* Builds a new {@link TableScanNode} that includes additional
265+
* {@link VariableReferenceExpression}s and {@link ColumnHandle}s for rewritten CLP UDFs.
263266
*
264267
* @param node the original table scan node
265268
* @return the updated table scan node

presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import com.facebook.presto.Session;
1717
import com.facebook.presto.common.transaction.TransactionId;
18+
import com.facebook.presto.common.type.ArrayType;
1819
import com.facebook.presto.cost.PlanNodeStatsEstimate;
1920
import com.facebook.presto.cost.StatsAndCosts;
2021
import com.facebook.presto.cost.StatsProvider;
@@ -56,6 +57,7 @@
5657

5758
import static com.facebook.presto.common.Utils.checkState;
5859
import static com.facebook.presto.common.type.BigintType.BIGINT;
60+
import static com.facebook.presto.common.type.DoubleType.DOUBLE;
5961
import static com.facebook.presto.common.type.VarcharType.VARCHAR;
6062
import static com.facebook.presto.metadata.FunctionExtractor.extractFunctions;
6163
import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.ARCHIVES_STORAGE_DIRECTORY_BASE;
@@ -145,7 +147,8 @@ public void testScanFilter()
145147

146148
Plan plan = localQueryRunner.createPlan(
147149
session,
148-
"SELECT * FROM test WHERE CLP_GET_INT('user_id') = 0 AND LOWER(city.Name) = 'BEIJING'",
150+
"SELECT * FROM test WHERE CLP_GET_INT('user_id') = 0 AND CLP_GET_FLOAT('fare') < 50.0 AND CLP_GET_STRING('city') = 'SF' AND " +
151+
"CLP_GET_BOOL('isHoliday') = true AND cardinality(CLP_GET_STRING_ARRAY('tags')) > 0 AND LOWER(city.Name) = 'BEIJING'",
149152
WarningCollector.NOOP);
150153
ClpUdfRewriter udfRewriter = new ClpUdfRewriter(functionAndTypeManager);
151154
PlanNode optimizedPlan = udfRewriter.optimize(plan.getRoot(), session.toConnectorSession(), variableAllocator, planNodeIdAllocator);
@@ -159,10 +162,20 @@ public void testScanFilter()
159162
new Plan(optimizedPlan, plan.getTypes(), StatsAndCosts.empty()),
160163
anyTree(
161164
filter(
162-
expression("lower(city.Name) = 'BEIJING'"),
165+
expression("lower(city.Name) = 'BEIJING' AND cardinality(tags) > 0"),
163166
ClpTableScanMatcher.clpTableScanPattern(
164-
new ClpTableLayoutHandle(table, Optional.of("(user_id: 0)"), Optional.empty()),
165-
ImmutableSet.of(city, fare, isHoliday, new ClpColumnHandle("user_id", BIGINT))))));
167+
new ClpTableLayoutHandle(
168+
table,
169+
Optional.of(
170+
"(((user_id: 0 AND fare < 50.0) AND (city: \"SF\" AND isHoliday: true)))"),
171+
Optional.empty()),
172+
ImmutableSet.of(
173+
city,
174+
fare,
175+
isHoliday,
176+
new ClpColumnHandle("user_id", BIGINT),
177+
new ClpColumnHandle("city", VARCHAR),
178+
new ClpColumnHandle("tags", new ArrayType(VARCHAR)))))));
166179
}
167180

168181
@Test
@@ -173,7 +186,8 @@ public void testScanProject()
173186

174187
Plan plan = localQueryRunner.createPlan(
175188
session,
176-
"SELECT CLP_GET_STRING('user'), city.Name FROM test",
189+
"SELECT CLP_GET_INT('user_id'), CLP_GET_FLOAT('fare'), CLP_GET_STRING('user'), " +
190+
"CLP_GET_BOOL('isHoliday'), CLP_GET_STRING_ARRAY('tags'), city.Name from test",
177191
WarningCollector.NOOP);
178192
ClpUdfRewriter udfRewriter = new ClpUdfRewriter(functionAndTypeManager);
179193
PlanNode optimizedPlan = udfRewriter.optimize(plan.getRoot(), session.toConnectorSession(), variableAllocator, planNodeIdAllocator);
@@ -188,8 +202,16 @@ public void testScanProject()
188202
anyTree(
189203
project(
190204
ImmutableMap.of(
205+
"clp_get_int",
206+
PlanMatchPattern.expression("user_id"),
207+
"clp_get_float",
208+
PlanMatchPattern.expression("fare"),
191209
"clp_get_string",
192210
PlanMatchPattern.expression("user"),
211+
"clp_get_bool",
212+
PlanMatchPattern.expression("isHoliday"),
213+
"clp_get_string_array",
214+
PlanMatchPattern.expression("tags"),
193215
"expr",
194216
PlanMatchPattern.expression("city.Name")),
195217
ClpTableScanMatcher.clpTableScanPattern(
@@ -198,7 +220,11 @@ public void testScanProject()
198220
Optional.empty(),
199221
Optional.empty()),
200222
ImmutableSet.of(
223+
new ClpColumnHandle("user_id", BIGINT),
224+
new ClpColumnHandle("fare", DOUBLE),
201225
new ClpColumnHandle("user", VARCHAR),
226+
isHoliday,
227+
new ClpColumnHandle("tags", new ArrayType(VARCHAR)),
202228
city)))));
203229
}
204230

0 commit comments

Comments
 (0)