-
Notifications
You must be signed in to change notification settings - Fork 3
feat: Add support for defining and using specific columns for metadata filtering. #19
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
948ccc0
e530913
b0e35cc
ee181b8
a9c4d8d
616e889
ed8e2b4
d944577
e0e74ac
3c73569
25840c5
08cf54e
5b29dca
7b7811b
3d3eda4
e033951
a8dd013
305acd6
f8b7eaf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -38,6 +38,7 @@ | |||||||||||||
| import java.util.Optional; | ||||||||||||||
| import java.util.Set; | ||||||||||||||
|
|
||||||||||||||
| import static com.facebook.presto.common.function.OperatorType.BETWEEN; | ||||||||||||||
| import static com.facebook.presto.common.function.OperatorType.EQUAL; | ||||||||||||||
| import static com.facebook.presto.common.function.OperatorType.GREATER_THAN; | ||||||||||||||
| import static com.facebook.presto.common.function.OperatorType.GREATER_THAN_OR_EQUAL; | ||||||||||||||
|
|
@@ -49,6 +50,8 @@ | |||||||||||||
| import static com.facebook.presto.common.function.OperatorType.flip; | ||||||||||||||
| import static com.facebook.presto.common.type.BooleanType.BOOLEAN; | ||||||||||||||
| import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION; | ||||||||||||||
| import static com.facebook.presto.plugin.clp.ClpUtils.escapeKqlSpecialCharsForStringValue; | ||||||||||||||
| import static com.facebook.presto.plugin.clp.ClpUtils.isNumericType; | ||||||||||||||
| import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.AND; | ||||||||||||||
| import static java.lang.Integer.parseInt; | ||||||||||||||
| import static java.lang.String.format; | ||||||||||||||
|
|
@@ -84,15 +87,18 @@ public class ClpFilterToKqlConverter | |||||||||||||
| private final StandardFunctionResolution standardFunctionResolution; | ||||||||||||||
| private final FunctionMetadataManager functionMetadataManager; | ||||||||||||||
| private final Map<VariableReferenceExpression, ColumnHandle> assignments; | ||||||||||||||
| private final Set<String> metadataFilterColumns; | ||||||||||||||
anlowee marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
|
|
||||||||||||||
| public ClpFilterToKqlConverter( | ||||||||||||||
| StandardFunctionResolution standardFunctionResolution, | ||||||||||||||
| FunctionMetadataManager functionMetadataManager, | ||||||||||||||
| Map<VariableReferenceExpression, ColumnHandle> assignments) | ||||||||||||||
| Map<VariableReferenceExpression, ColumnHandle> assignments, | ||||||||||||||
| Set<String> metadataFilterColumns) | ||||||||||||||
| { | ||||||||||||||
| this.standardFunctionResolution = requireNonNull(standardFunctionResolution, "standardFunctionResolution is null"); | ||||||||||||||
| this.functionMetadataManager = requireNonNull(functionMetadataManager, "function metadata manager is null"); | ||||||||||||||
| this.assignments = requireNonNull(assignments, "assignments is null"); | ||||||||||||||
| this.metadataFilterColumns = requireNonNull(metadataFilterColumns, "metadataFilterColumns is null"); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| @Override | ||||||||||||||
|
|
@@ -114,11 +120,68 @@ public ClpExpression visitCall(CallExpression node, Void context) | |||||||||||||
| if (operatorType.isComparisonOperator() && operatorType != IS_DISTINCT_FROM) { | ||||||||||||||
| return handleLogicalBinary(operatorType, node); | ||||||||||||||
| } | ||||||||||||||
| if (BETWEEN == operatorType) { | ||||||||||||||
| return handleBetween(node); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| return new ClpExpression(node); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| /** | ||||||||||||||
| * Handles the BETWEEN expression for numeric range, all arguments must be numeric. If the first | ||||||||||||||
anlowee marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| * argument is not a variable reference expression or either the second or the third is not | ||||||||||||||
| * constant expressions it won't translate the expression. | ||||||||||||||
anlowee marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| * | ||||||||||||||
| * <p></p> | ||||||||||||||
| * Example: <code>col1 BETWEEN 0 AND 5</code> → <code>col1 >= 0 AND col1 <= 5</code> | ||||||||||||||
| * | ||||||||||||||
| * @param node the BETWEEN call expression | ||||||||||||||
anlowee marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| * @return a ClpExpression containing either the equivalent KQL query, or the original | ||||||||||||||
| * expression if it couldn't be translated | ||||||||||||||
| */ | ||||||||||||||
| private ClpExpression handleBetween(CallExpression node) | ||||||||||||||
anlowee marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| { | ||||||||||||||
| if (node.getArguments().size() != 3) { | ||||||||||||||
| throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, | ||||||||||||||
| "BETWEEN operator must have exactly three arguments. Received: " + node); | ||||||||||||||
| } | ||||||||||||||
| if (!(node.getArguments().get(0) instanceof VariableReferenceExpression) | ||||||||||||||
| || !(node.getArguments().get(1) instanceof ConstantExpression) | ||||||||||||||
| || !(node.getArguments().get(2) instanceof ConstantExpression)) { | ||||||||||||||
| return new ClpExpression(node); | ||||||||||||||
| } | ||||||||||||||
| if (!isNumericType(node.getArguments().get(0).getType()) | ||||||||||||||
| || !isNumericType(node.getArguments().get(1).getType()) | ||||||||||||||
| || !isNumericType(node.getArguments().get(2).getType())) { | ||||||||||||||
coderabbitai[bot] marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
anlowee marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| // Let the Presto SQL analyzer throw exception | ||||||||||||||
anlowee marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
anlowee marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| return new ClpExpression(node); | ||||||||||||||
| } | ||||||||||||||
| Optional<String> variableReferencePushDownExpression = node.getArguments().get(0).accept(this, null).getPushDownExpression(); | ||||||||||||||
| if (!variableReferencePushDownExpression.isPresent()) { | ||||||||||||||
| return new ClpExpression(node); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| String lowerBoundConstantPushDownExpression = getLiteralString((ConstantExpression) node.getArguments().get(1)); | ||||||||||||||
| String upperBoundConstantPushDownExpression = getLiteralString((ConstantExpression) node.getArguments().get(2)); | ||||||||||||||
| String metadataSql = null; | ||||||||||||||
| String kql = format( | ||||||||||||||
| "%s >= %s AND %s <= %s", | ||||||||||||||
| variableReferencePushDownExpression.get(), | ||||||||||||||
| lowerBoundConstantPushDownExpression, | ||||||||||||||
| variableReferencePushDownExpression.get(), | ||||||||||||||
| upperBoundConstantPushDownExpression); | ||||||||||||||
| if (metadataFilterColumns.contains(variableReferencePushDownExpression.get())) { | ||||||||||||||
| metadataSql = format( | ||||||||||||||
| "\"%s\" >= %s AND \"%s\" <= %s", | ||||||||||||||
| variableReferencePushDownExpression.get(), | ||||||||||||||
| lowerBoundConstantPushDownExpression, | ||||||||||||||
| variableReferencePushDownExpression.get(), | ||||||||||||||
| upperBoundConstantPushDownExpression); | ||||||||||||||
| } | ||||||||||||||
| return new ClpExpression(kql, metadataSql); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| @Override | ||||||||||||||
| public ClpExpression visitConstant(ConstantExpression node, Void context) | ||||||||||||||
| { | ||||||||||||||
|
|
@@ -203,7 +266,13 @@ private ClpExpression handleNot(CallExpression node) | |||||||||||||
| if (expression.getRemainingExpression().isPresent() || !expression.getPushDownExpression().isPresent()) { | ||||||||||||||
| return new ClpExpression(node); | ||||||||||||||
| } | ||||||||||||||
| return new ClpExpression("NOT " + expression.getPushDownExpression().get()); | ||||||||||||||
| String notPushDownExpression = "NOT " + expression.getPushDownExpression().get(); | ||||||||||||||
anlowee marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
| if (expression.getMetadataSql().isPresent()) { | ||||||||||||||
| return new ClpExpression(notPushDownExpression, "NOT " + expression.getMetadataSql()); | ||||||||||||||
| } | ||||||||||||||
| else { | ||||||||||||||
| return new ClpExpression(notPushDownExpression); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| /** | ||||||||||||||
|
|
@@ -350,24 +419,40 @@ private ClpExpression buildClpExpression( | |||||||||||||
| Type literalType, | ||||||||||||||
| RowExpression originalNode) | ||||||||||||||
| { | ||||||||||||||
| String metadataSql = null; | ||||||||||||||
| if (operator.equals(EQUAL)) { | ||||||||||||||
| if (literalType instanceof VarcharType) { | ||||||||||||||
| return new ClpExpression(format("%s: \"%s\"", variableName, literalString)); | ||||||||||||||
| if (metadataFilterColumns.contains(variableName)) { | ||||||||||||||
| metadataSql = format("\"%s\" = '%s'", variableName, literalString); | ||||||||||||||
| } | ||||||||||||||
| return new ClpExpression(format("%s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString)), metadataSql); | ||||||||||||||
| } | ||||||||||||||
| else { | ||||||||||||||
| return new ClpExpression(format("%s: %s", variableName, literalString)); | ||||||||||||||
| if (metadataFilterColumns.contains(variableName)) { | ||||||||||||||
| metadataSql = format("\"%s\" = %s", variableName, literalString); | ||||||||||||||
| } | ||||||||||||||
| return new ClpExpression(format("%s: %s", variableName, literalString), metadataSql); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| else if (operator.equals(NOT_EQUAL)) { | ||||||||||||||
| if (literalType instanceof VarcharType) { | ||||||||||||||
| return new ClpExpression(format("NOT %s: \"%s\"", variableName, literalString)); | ||||||||||||||
| if (metadataFilterColumns.contains(variableName)) { | ||||||||||||||
| metadataSql = format("NOT \"%s\" = '%s'", variableName, literalString); | ||||||||||||||
| } | ||||||||||||||
| return new ClpExpression(format("NOT %s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString)), metadataSql); | ||||||||||||||
| } | ||||||||||||||
| else { | ||||||||||||||
| return new ClpExpression(format("NOT %s: %s", variableName, literalString)); | ||||||||||||||
| if (metadataFilterColumns.contains(variableName)) { | ||||||||||||||
| metadataSql = format("NOT \"%s\" = %s", variableName, literalString); | ||||||||||||||
| } | ||||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick (assertive) Use inequality operator for metadata SQL consistency For if (metadataFilterColumns.contains(variableName)) {
- metadataSql = format("NOT \"%s\" = %s", variableName, literalString);
+ metadataSql = format("\"%s\" != %s", variableName, literalString);
}This makes the SQL more conventional and consistent with the other comparison operators. 📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||
| return new ClpExpression(format("NOT %s: %s", variableName, literalString), metadataSql); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !(literalType instanceof VarcharType)) { | ||||||||||||||
| return new ClpExpression(format("%s %s %s", variableName, operator.getOperator(), literalString)); | ||||||||||||||
| if (metadataFilterColumns.contains(variableName)) { | ||||||||||||||
| metadataSql = format("\"%s\" %s %s", variableName, operator.getOperator(), literalString); | ||||||||||||||
| } | ||||||||||||||
| return new ClpExpression(format("%s %s %s", variableName, operator.getOperator(), literalString), metadataSql); | ||||||||||||||
| } | ||||||||||||||
| return new ClpExpression(originalNode); | ||||||||||||||
| } | ||||||||||||||
|
|
@@ -568,16 +653,24 @@ private Optional<Integer> parseLengthLiteral(RowExpression lengthExpression, Str | |||||||||||||
| */ | ||||||||||||||
| private ClpExpression handleAnd(SpecialFormExpression node) | ||||||||||||||
| { | ||||||||||||||
| StringBuilder metadataQueryBuilder = new StringBuilder(); | ||||||||||||||
| metadataQueryBuilder.append("("); | ||||||||||||||
| StringBuilder queryBuilder = new StringBuilder(); | ||||||||||||||
| queryBuilder.append("("); | ||||||||||||||
| List<RowExpression> remainingExpressions = new ArrayList<>(); | ||||||||||||||
| boolean hasMetadataSql = false; | ||||||||||||||
| boolean hasPushDownExpression = false; | ||||||||||||||
| for (RowExpression argument : node.getArguments()) { | ||||||||||||||
| ClpExpression expression = argument.accept(this, null); | ||||||||||||||
| if (expression.getPushDownExpression().isPresent()) { | ||||||||||||||
| hasPushDownExpression = true; | ||||||||||||||
| queryBuilder.append(expression.getPushDownExpression().get()); | ||||||||||||||
| queryBuilder.append(" AND "); | ||||||||||||||
| if (expression.getMetadataSql().isPresent()) { | ||||||||||||||
| hasMetadataSql = true; | ||||||||||||||
| metadataQueryBuilder.append(expression.getMetadataSql().get()); | ||||||||||||||
| metadataQueryBuilder.append(" AND "); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| if (expression.getRemainingExpression().isPresent()) { | ||||||||||||||
| remainingExpressions.add(expression.getRemainingExpression().get()); | ||||||||||||||
|
|
@@ -588,16 +681,21 @@ private ClpExpression handleAnd(SpecialFormExpression node) | |||||||||||||
| } | ||||||||||||||
| else if (!remainingExpressions.isEmpty()) { | ||||||||||||||
| if (remainingExpressions.size() == 1) { | ||||||||||||||
| return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 5) + ")", remainingExpressions.get(0)); | ||||||||||||||
| return new ClpExpression( | ||||||||||||||
| queryBuilder.substring(0, queryBuilder.length() - 5) + ")", | ||||||||||||||
| hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null, | ||||||||||||||
| remainingExpressions.get(0)); | ||||||||||||||
| } | ||||||||||||||
| else { | ||||||||||||||
| return new ClpExpression( | ||||||||||||||
| queryBuilder.substring(0, queryBuilder.length() - 5) + ")", | ||||||||||||||
| hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null, | ||||||||||||||
| new SpecialFormExpression(node.getSourceLocation(), AND, BOOLEAN, remainingExpressions)); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| // Remove the last " AND " from the query | ||||||||||||||
| return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 5) + ")"); | ||||||||||||||
| return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 5) + ")", | ||||||||||||||
| hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| /** | ||||||||||||||
|
|
@@ -614,18 +712,30 @@ else if (!remainingExpressions.isEmpty()) { | |||||||||||||
| */ | ||||||||||||||
| private ClpExpression handleOr(SpecialFormExpression node) | ||||||||||||||
| { | ||||||||||||||
| StringBuilder metadataQueryBuilder = new StringBuilder(); | ||||||||||||||
| metadataQueryBuilder.append("("); | ||||||||||||||
| StringBuilder queryBuilder = new StringBuilder(); | ||||||||||||||
| queryBuilder.append("("); | ||||||||||||||
| boolean hasAllMetadataSql = true; | ||||||||||||||
| for (RowExpression argument : node.getArguments()) { | ||||||||||||||
| ClpExpression expression = argument.accept(this, null); | ||||||||||||||
| if (expression.getRemainingExpression().isPresent() || !expression.getPushDownExpression().isPresent()) { | ||||||||||||||
| return new ClpExpression(node); | ||||||||||||||
| } | ||||||||||||||
| queryBuilder.append(expression.getPushDownExpression().get()); | ||||||||||||||
| queryBuilder.append(" OR "); | ||||||||||||||
| if (hasAllMetadataSql && expression.getMetadataSql().isPresent()) { | ||||||||||||||
| metadataQueryBuilder.append(expression.getMetadataSql().get()); | ||||||||||||||
| metadataQueryBuilder.append(" OR "); | ||||||||||||||
| } | ||||||||||||||
| else { | ||||||||||||||
| hasAllMetadataSql = false; | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| // Remove the last " OR " from the query | ||||||||||||||
| return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 4) + ")"); | ||||||||||||||
| return new ClpExpression( | ||||||||||||||
| queryBuilder.substring(0, queryBuilder.length() - 4) + ")", | ||||||||||||||
| hasAllMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 4) + ")" : null); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| /** | ||||||||||||||
|
|
||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧹 Nitpick (assertive)
LGTM! Consistent configuration property implementation.
The new
metadataFilterConfigproperty follows the established pattern with proper getter/setter methods and @config annotation.Consider adding validation for the metadata filter config path, similar to how
metadataTablePrefixis validated:@Config("clp.metadata-filter-config") public ClpConfig setMetadataFilterConfig(String metadataFilterConfig) { + if (metadataFilterConfig != null && !metadataFilterConfig.trim().isEmpty()) { + // Validate file path format or existence if needed + } this.metadataFilterConfig = metadataFilterConfig; return this; }Also applies to: 111-121
🤖 Prompt for AI Agents