diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index bc0c83a94ef68..6e0b35d56bd3e 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -65,6 +65,16 @@ provided + + com.fasterxml.jackson.core + jackson-core + + + + com.fasterxml.jackson.core + jackson-databind + + com.facebook.presto presto-spi diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java index 761c4702a64b2..7ffa6545b8ca0 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java @@ -29,6 +29,7 @@ public class ClpConfig private String metadataDbName; private String metadataDbUser; private String metadataDbPassword; + private String metadataFilterConfig; private String metadataTablePrefix; private long metadataRefreshInterval = 60; private long metadataExpireInterval = 600; @@ -107,6 +108,18 @@ public ClpConfig setMetadataDbPassword(String metadataDbPassword) return this; } + public String getMetadataFilterConfig() + { + return metadataFilterConfig; + } + + @Config("clp.metadata-filter-config") + public ClpConfig setMetadataFilterConfig(String metadataFilterConfig) + { + this.metadataFilterConfig = metadataFilterConfig; + return this; + } + public String getMetadataTablePrefix() { return metadataTablePrefix; diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnectorFactory.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnectorFactory.java index bec007135bccd..990b4bbc7c83f 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnectorFactory.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnectorFactory.java @@ -33,10 +33,12 @@ public class ClpConnectorFactory implements ConnectorFactory { + public static final String CONNECTOR_NAME = "clp"; + @Override public String getName() { - return "clp"; + return CONNECTOR_NAME; } @Override diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java index 8cb2438277404..94cc8bda560c4 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java @@ -18,6 +18,7 @@ import com.facebook.presto.spi.ErrorCodeSupplier; import static com.facebook.presto.common.ErrorType.EXTERNAL; +import static com.facebook.presto.common.ErrorType.USER_ERROR; public enum ClpErrorCode implements ErrorCodeSupplier @@ -26,7 +27,10 @@ public enum ClpErrorCode CLP_UNSUPPORTED_METADATA_SOURCE(1, EXTERNAL), CLP_UNSUPPORTED_SPLIT_SOURCE(2, EXTERNAL), CLP_UNSUPPORTED_TYPE(3, EXTERNAL), - CLP_UNSUPPORTED_CONFIG_OPTION(4, EXTERNAL); + CLP_UNSUPPORTED_CONFIG_OPTION(4, EXTERNAL), + + CLP_METADATA_FILTER_CONFIG_NOT_FOUND(10, USER_ERROR), + CLP_MANDATORY_METADATA_FILTER_NOT_VALID(11, USER_ERROR); private final ErrorCode errorCode; diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java index fd74933c709fe..e970f9848a9cf 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java @@ -18,22 +18,30 @@ import java.util.Optional; /** - * Represents the result of converting a Presto RowExpression into a CLP-compatible KQL query. In - * every case, `pushDownExpression` represents the part of the RowExpression that could be - * converted to a KQL expression, and `remainingExpression` represents the part that could not be - * converted. + * Represents the result of: + * */ public class ClpExpression { // Optional KQL query or column name representing the fully or partially translatable part of the expression. private final Optional pushDownExpression; + // Optional SQL string extracted from the query plan, which is only made of up of columns in + // CLP's metadata database. + private final Optional metadataSqlQuery; + // The remaining (non-translatable) portion of the RowExpression, if any. private final Optional remainingExpression; - public ClpExpression(String pushDownExpression, RowExpression remainingExpression) + public ClpExpression(String pushDownExpression, String metadataSqlQuery, RowExpression remainingExpression) { this.pushDownExpression = Optional.ofNullable(pushDownExpression); + this.metadataSqlQuery = Optional.ofNullable(metadataSqlQuery); this.remainingExpression = Optional.ofNullable(remainingExpression); } @@ -42,7 +50,7 @@ public ClpExpression(String pushDownExpression, RowExpression remainingExpressio */ public ClpExpression() { - this(null, null); + this(null, null, null); } /** @@ -52,7 +60,19 @@ public ClpExpression() */ public ClpExpression(String pushDownExpression) { - this(pushDownExpression, null); + this(pushDownExpression, null, null); + } + + /** + * Creates a ClpExpression from a fully translatable KQL string or column name, as well as a + * metadata SQL string. + * + * @param pushDownExpression + * @param metadataSqlQuery + */ + public ClpExpression(String pushDownExpression, String metadataSqlQuery) + { + this(pushDownExpression, metadataSqlQuery, null); } /** @@ -62,7 +82,7 @@ public ClpExpression(String pushDownExpression) */ public ClpExpression(RowExpression remainingExpression) { - this(null, remainingExpression); + this(null, null, remainingExpression); } public Optional getPushDownExpression() @@ -70,6 +90,11 @@ public Optional getPushDownExpression() return pushDownExpression; } + public Optional getMetadataSqlQuery() + { + return metadataSqlQuery; + } + public Optional getRemainingExpression() { return remainingExpression; diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java index 02fe7c5dc7f31..7e6fae13fdb8f 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java @@ -14,6 +14,7 @@ package com.facebook.presto.plugin.clp; import com.facebook.presto.common.function.OperatorType; +import com.facebook.presto.common.type.DecimalType; import com.facebook.presto.common.type.RowType; import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.VarcharType; @@ -38,6 +39,7 @@ import java.util.Optional; import java.util.Set; +import static com.facebook.presto.common.function.OperatorType.BETWEEN; import static com.facebook.presto.common.function.OperatorType.EQUAL; import static com.facebook.presto.common.function.OperatorType.GREATER_THAN; import static com.facebook.presto.common.function.OperatorType.GREATER_THAN_OR_EQUAL; @@ -47,7 +49,13 @@ import static com.facebook.presto.common.function.OperatorType.NEGATION; import static com.facebook.presto.common.function.OperatorType.NOT_EQUAL; import static com.facebook.presto.common.function.OperatorType.flip; +import static com.facebook.presto.common.type.BigintType.BIGINT; import static com.facebook.presto.common.type.BooleanType.BOOLEAN; +import static com.facebook.presto.common.type.DoubleType.DOUBLE; +import static com.facebook.presto.common.type.IntegerType.INTEGER; +import static com.facebook.presto.common.type.RealType.REAL; +import static com.facebook.presto.common.type.SmallintType.SMALLINT; +import static com.facebook.presto.common.type.TinyintType.TINYINT; import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION; import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.AND; import static java.lang.Integer.parseInt; @@ -55,14 +63,18 @@ import static java.util.Objects.requireNonNull; /** - * A translator to translate Presto RowExpressions into KQL (Kibana Query Language) filters used as - * CLP queries. This is used primarily for pushing down supported filters to the CLP engine. This - * class implements the RowExpressionVisitor interface and recursively walks Presto filter - * expressions, attempting to convert supported expressions into corresponding KQL filter strings. - * Any part of the expression that cannot be translated is preserved as a "remaining expression" for - * potential fallback processing. + * A translator to translate Presto {@link RowExpression}s into: + *
    + *
  • KQL (Kibana Query Language) filters used to push down supported filters to the CLP + * engine.
  • + *
  • SQL filters used for filtering splits in CLP's metadata database.
  • + *
+ * This class implements the {@link RowExpressionVisitor} interface and recursively walks Presto + * filter expressions, attempting to convert supported expressions into corresponding KQL filter + * strings and SQL filter strings for metadata filtering. Any part of the expression that cannot be + * translated to KQL is preserved as a "remaining expression" for potential fallback processing. *

- * Supported translations include: + * Supported translations for KQL include: *
    *
  • Comparisons between variables and constants (e.g., =, !=, <, >, <=, >=).
  • *
  • String pattern matches using LIKE with constant patterns only. Patterns that begin and @@ -74,6 +86,13 @@ *
  • Dereferencing fields from row-typed variables.
  • *
  • Logical operators AND, OR, and NOT.
  • *
+ *

+ * Supported translations for SQL include: + *
    + *
  • Comparisons between variables and constants (e.g., =, !=, <, >, <=, >=).
  • + *
  • Dereferencing fields from row-typed variables.
  • + *
  • Logical operators AND, OR, and NOT.
  • + *
*/ public class ClpFilterToKqlConverter implements RowExpressionVisitor @@ -84,15 +103,18 @@ public class ClpFilterToKqlConverter private final StandardFunctionResolution standardFunctionResolution; private final FunctionMetadataManager functionMetadataManager; private final Map assignments; + private final Set metadataFilterColumns; public ClpFilterToKqlConverter( StandardFunctionResolution standardFunctionResolution, FunctionMetadataManager functionMetadataManager, - Map assignments) + Map assignments, + Set metadataFilterColumns) { this.standardFunctionResolution = requireNonNull(standardFunctionResolution, "standardFunctionResolution is null"); this.functionMetadataManager = requireNonNull(functionMetadataManager, "function metadata manager is null"); this.assignments = requireNonNull(assignments, "assignments is null"); + this.metadataFilterColumns = requireNonNull(metadataFilterColumns, "metadataFilterColumns is null"); } @Override @@ -114,6 +136,9 @@ public ClpExpression visitCall(CallExpression node, Void context) if (operatorType.isComparisonOperator() && operatorType != IS_DISTINCT_FROM) { return handleLogicalBinary(operatorType, node); } + if (BETWEEN == operatorType) { + return handleBetween(node); + } } return new ClpExpression(node); @@ -182,6 +207,56 @@ private String getVariableName(VariableReferenceExpression variable) return ((ClpColumnHandle) assignments.get(variable)).getOriginalColumnName(); } + /** + * Handles the BETWEEN expression. + *

+ * The translation is only performed if: + *
    + *
  • all arguments have numeric types.
  • + *
  • the first argument is a variable reference expression.
  • + *
  • the second and third arguments are constant expressions.
  • + *
+ *

+ * Example: col1 BETWEEN 0 AND 5col1 >= 0 AND col1 <= 5 + * + * @param node the {@code BETWEEN} call expression + * @return a ClpExpression containing either the equivalent KQL query, or the original + * expression if it couldn't be translated + */ + private ClpExpression handleBetween(CallExpression node) + { + List arguments = node.getArguments(); + if (arguments.size() != 3) { + throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "BETWEEN operator must have exactly three arguments. Received: " + node); + } + RowExpression first = arguments.get(0); + RowExpression second = arguments.get(1); + RowExpression third = arguments.get(2); + if (!(first instanceof VariableReferenceExpression) + || !(second instanceof ConstantExpression) + || !(third instanceof ConstantExpression)) { + return new ClpExpression(node); + } + if (!isClpCompatibleNumericType(first.getType()) + || !isClpCompatibleNumericType(second.getType()) + || !isClpCompatibleNumericType(third.getType())) { + return new ClpExpression(node); + } + Optional variableOpt = first.accept(this, null).getPushDownExpression(); + if (!variableOpt.isPresent()) { + return new ClpExpression(node); + } + String variable = variableOpt.get(); + String lowerBound = getLiteralString((ConstantExpression) second); + String upperBound = getLiteralString((ConstantExpression) third); + String kql = String.format("%s >= %s AND %s <= %s", variable, lowerBound, variable, upperBound); + String metadataSqlQuery = metadataFilterColumns.contains(variable) + ? String.format("\"%s\" >= %s AND \"%s\" <= %s", variable, lowerBound, variable, upperBound) + : null; + return new ClpExpression(kql, metadataSqlQuery); + } + /** * Handles the logical NOT expression. *

@@ -203,7 +278,13 @@ private ClpExpression handleNot(CallExpression node) if (expression.getRemainingExpression().isPresent() || !expression.getPushDownExpression().isPresent()) { return new ClpExpression(node); } - return new ClpExpression("NOT " + expression.getPushDownExpression().get()); + String notPushDownExpression = "NOT " + expression.getPushDownExpression().get(); + if (expression.getMetadataSqlQuery().isPresent()) { + return new ClpExpression(notPushDownExpression, "NOT " + expression.getMetadataSqlQuery()); + } + else { + return new ClpExpression(notPushDownExpression); + } } /** @@ -350,24 +431,34 @@ private ClpExpression buildClpExpression( Type literalType, RowExpression originalNode) { + String metadataSqlQuery = null; if (operator.equals(EQUAL)) { if (literalType instanceof VarcharType) { - return new ClpExpression(format("%s: \"%s\"", variableName, literalString)); + return new ClpExpression(format("%s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString))); } else { - return new ClpExpression(format("%s: %s", variableName, literalString)); + if (metadataFilterColumns.contains(variableName)) { + metadataSqlQuery = format("\"%s\" = %s", variableName, literalString); + } + return new ClpExpression(format("%s: %s", variableName, literalString), metadataSqlQuery); } } else if (operator.equals(NOT_EQUAL)) { if (literalType instanceof VarcharType) { - return new ClpExpression(format("NOT %s: \"%s\"", variableName, literalString)); + return new ClpExpression(format("NOT %s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString))); } else { - return new ClpExpression(format("NOT %s: %s", variableName, literalString)); + if (metadataFilterColumns.contains(variableName)) { + metadataSqlQuery = format("NOT \"%s\" = %s", variableName, literalString); + } + return new ClpExpression(format("NOT %s: %s", variableName, literalString), metadataSqlQuery); } } else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !(literalType instanceof VarcharType)) { - return new ClpExpression(format("%s %s %s", variableName, operator.getOperator(), literalString)); + if (metadataFilterColumns.contains(variableName)) { + metadataSqlQuery = format("\"%s\" %s %s", variableName, operator.getOperator(), literalString); + } + return new ClpExpression(format("%s %s %s", variableName, operator.getOperator(), literalString), metadataSqlQuery); } return new ClpExpression(originalNode); } @@ -568,9 +659,12 @@ private Optional parseLengthLiteral(RowExpression lengthExpression, Str */ private ClpExpression handleAnd(SpecialFormExpression node) { + StringBuilder metadataQueryBuilder = new StringBuilder(); + metadataQueryBuilder.append("("); StringBuilder queryBuilder = new StringBuilder(); queryBuilder.append("("); List remainingExpressions = new ArrayList<>(); + boolean hasMetadataSql = false; boolean hasPushDownExpression = false; for (RowExpression argument : node.getArguments()) { ClpExpression expression = argument.accept(this, null); @@ -578,6 +672,11 @@ private ClpExpression handleAnd(SpecialFormExpression node) hasPushDownExpression = true; queryBuilder.append(expression.getPushDownExpression().get()); queryBuilder.append(" AND "); + if (expression.getMetadataSqlQuery().isPresent()) { + hasMetadataSql = true; + metadataQueryBuilder.append(expression.getMetadataSqlQuery().get()); + metadataQueryBuilder.append(" AND "); + } } if (expression.getRemainingExpression().isPresent()) { remainingExpressions.add(expression.getRemainingExpression().get()); @@ -588,16 +687,21 @@ private ClpExpression handleAnd(SpecialFormExpression node) } else if (!remainingExpressions.isEmpty()) { if (remainingExpressions.size() == 1) { - return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 5) + ")", remainingExpressions.get(0)); + return new ClpExpression( + queryBuilder.substring(0, queryBuilder.length() - 5) + ")", + hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null, + remainingExpressions.get(0)); } else { return new ClpExpression( queryBuilder.substring(0, queryBuilder.length() - 5) + ")", + hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null, new SpecialFormExpression(node.getSourceLocation(), AND, BOOLEAN, remainingExpressions)); } } // Remove the last " AND " from the query - return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 5) + ")"); + return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 5) + ")", + hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null); } /** @@ -614,8 +718,11 @@ else if (!remainingExpressions.isEmpty()) { */ private ClpExpression handleOr(SpecialFormExpression node) { + StringBuilder metadataQueryBuilder = new StringBuilder(); + metadataQueryBuilder.append("("); StringBuilder queryBuilder = new StringBuilder(); queryBuilder.append("("); + boolean hasAllMetadataSql = true; for (RowExpression argument : node.getArguments()) { ClpExpression expression = argument.accept(this, null); if (expression.getRemainingExpression().isPresent() || !expression.getPushDownExpression().isPresent()) { @@ -623,9 +730,18 @@ private ClpExpression handleOr(SpecialFormExpression node) } queryBuilder.append(expression.getPushDownExpression().get()); queryBuilder.append(" OR "); + if (hasAllMetadataSql && expression.getMetadataSqlQuery().isPresent()) { + metadataQueryBuilder.append(expression.getMetadataSqlQuery().get()); + metadataQueryBuilder.append(" OR "); + } + else { + hasAllMetadataSql = false; + } } // Remove the last " OR " from the query - return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 4) + ")"); + return new ClpExpression( + queryBuilder.substring(0, queryBuilder.length() - 4) + ")", + hasAllMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 4) + ")" : null); } /** @@ -753,6 +869,41 @@ private ClpExpression handleDereference(RowExpression expression) return new ClpExpression(baseString.getPushDownExpression().get() + "." + fieldName); } + /** + * See + * here + * for all special chars in the string value that need to be escaped. + * + * @param literalString + * @return the string with special characters escaped + */ + public static String escapeKqlSpecialCharsForStringValue(String literalString) + { + String escaped = literalString; + escaped = escaped.replace("\\", "\\\\"); + escaped = escaped.replace("\"", "\\\""); + escaped = escaped.replace("?", "\\?"); + escaped = escaped.replace("*", "\\*"); + return escaped; + } + + /** + * Checks if the type is one of the numeric types that can be pushed down to CLP. + * + * @param type the type to check + * @return whether the type can be pushed down. + */ + public static boolean isClpCompatibleNumericType(Type type) + { + return type.equals(BIGINT) + || type.equals(INTEGER) + || type.equals(SMALLINT) + || type.equals(TINYINT) + || type.equals(DOUBLE) + || type.equals(REAL) + || type instanceof DecimalType; + } + private static class SubstrInfo { String variableName; diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadata.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadata.java index 73c94dbdfba1d..1f9962a3456d9 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadata.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadata.java @@ -119,7 +119,7 @@ public ConnectorTableLayoutResult getTableLayoutForConstraint( Optional> desiredColumns) { ClpTableHandle tableHandle = (ClpTableHandle) table; - ConnectorTableLayout layout = new ConnectorTableLayout(new ClpTableLayoutHandle(tableHandle, Optional.empty())); + ConnectorTableLayout layout = new ConnectorTableLayout(new ClpTableLayoutHandle(tableHandle, Optional.empty(), Optional.empty())); return new ConnectorTableLayoutResult(layout, constraint.getSummary()); } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadataFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadataFilterProvider.java new file mode 100644 index 0000000000000..de886392136de --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadataFilterProvider.java @@ -0,0 +1,264 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp; + +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.SchemaTableName; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.inject.Inject; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.Function; + +import static com.facebook.presto.plugin.clp.ClpConnectorFactory.CONNECTOR_NAME; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_METADATA_FILTER_NOT_VALID; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_METADATA_FILTER_CONFIG_NOT_FOUND; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +/** + * Loads and manages metadata filter configurations for the CLP connector. + *

+ * The configuration file is specified by the {@code clp.metadata-filter-config} property + * and defines metadata filters used to optimize query execution through split pruning. + *

+ * Each filter config indicates how a data column--a column in the Presto table--should be mapped to + * a metadata column--a column in CLP’s metadata database. + *

+ * Filter configs can be declared at either a catalog, schema, or table scope. Filter configs under + * a particular scope will apply to all child scopes (e.g., schema-level filter configs will apply + * to all tables within that schema). + *

+ * Each filter config includes the following fields: + *
    + *
  • {@code columnName}: the data column's name. Currently, only numeric-type columns can + * be used as metadata filters.
  • + * + *
  • {@code rangeMapping} (optional): an object with the following properties: + * + *

    + * Note: This option is only valid if the column has a numeric type. + * + *
      + *
    • {@code lowerBound}: The metadata column that represents the lower bound of values + * in a split for the data column.
    • + *
    • {@code upperBound}: The metadata column that represents the upper bound of values + * in a split for the data column.
    • + *
    + *
  • + * + *
  • {@code required} (optional, defaults to {@code false}): indicates whether the + * filter must be present in the translated metadata filter SQL query. If a required filter + * is missing or cannot be pushed down, the query will be rejected.
  • + *
+ */ +public class ClpMetadataFilterProvider +{ + private final Map> filterMap; + + @Inject + public ClpMetadataFilterProvider(ClpConfig config) + { + requireNonNull(config, "config is null"); + + ObjectMapper mapper = new ObjectMapper(); + try { + filterMap = mapper.readValue( + new File(config.getMetadataFilterConfig()), + new TypeReference>>() {}); + } + catch (IOException e) { + throw new PrestoException(CLP_METADATA_FILTER_CONFIG_NOT_FOUND, "Failed to metadata filter config file open."); + } + } + + public void checkContainsRequiredFilters(SchemaTableName schemaTableName, String metadataFilterSql) + { + boolean hasRequiredMetadataFilterColumns = true; + ImmutableList.Builder notFoundListBuilder = ImmutableList.builder(); + for (String columnName : getRequiredColumnNames(format("%s.%s", CONNECTOR_NAME, schemaTableName))) { + if (!metadataFilterSql.contains(columnName)) { + hasRequiredMetadataFilterColumns = false; + notFoundListBuilder.add(columnName); + } + } + if (!hasRequiredMetadataFilterColumns) { + throw new PrestoException( + CLP_MANDATORY_METADATA_FILTER_NOT_VALID, + notFoundListBuilder.build() + " is a mandatory metadata filter column but not valid"); + } + } + + /** + * Rewrites the given SQL string to remap filter conditions based on the configured range + * mappings for the given scope. + * + *

The {@code scope} follows the format {@code catalog[.schema][.table]}, and determines + * which filter mappings to apply, since mappings from more specific scopes (e.g., table-level) + * override or supplement those from broader scopes (e.g., catalog-level). For each scope + * (catalog, schema, table), this method collects all range mappings defined in the metadata + * filter configuration. + * + *

This method performs regex-based replacements to convert numeric filter expressions such + * as: + *

    + *
  • {@code "msg.timestamp" >= 1234} → {@code end_timestamp >= 1234}
  • + *
  • {@code "msg.timestamp" <= 5678} → {@code begin_timestamp <= 5678}
  • + *
  • {@code "msg.timestamp" = 4567} → + * {@code (begin_timestamp <= 4567 AND end_timestamp >= 4567)}
  • + *
+ * + * @param scope + * @param sql + * @return the rewritten SQL string + */ + public String remapFilterSql(String scope, String sql) + { + String[] splitScope = scope.split("\\."); + + Map mappings = new HashMap<>(getAllMappingsFromFilters(filterMap.get(splitScope[0]))); + + if (1 < splitScope.length) { + mappings.putAll(getAllMappingsFromFilters(filterMap.get(splitScope[0] + "." + splitScope[1]))); + } + + if (3 == splitScope.length) { + mappings.putAll(getAllMappingsFromFilters(filterMap.get(scope))); + } + + String remappedSql = sql; + for (Map.Entry entry : mappings.entrySet()) { + String key = entry.getKey(); + RangeMapping value = entry.getValue(); + remappedSql = remappedSql.replaceAll( + format("\"(%s)\"\\s(>=?)\\s([0-9]*)", key), + format("%s $2 $3", value.upperBound)); + remappedSql = remappedSql.replaceAll( + format("\"(%s)\"\\s(<=?)\\s([0-9]*)", key), + format("%s $2 $3", value.lowerBound)); + remappedSql = remappedSql.replaceAll( + format("\"(%s)\"\\s(=)\\s([0-9]*)", key), + format("(%s <= $3 AND %s >= $3)", value.lowerBound, value.upperBound)); + } + return remappedSql; + } + + public Set getColumnNames(String scope) + { + return collectColumnNamesFromScopes(scope, this::getAllColumnNamesFromFilters); + } + + private Set getRequiredColumnNames(String scope) + { + return collectColumnNamesFromScopes(scope, this::getRequiredColumnNamesFromFilters); + } + + private Set collectColumnNamesFromScopes(String scope, Function, Set> extractor) + { + String[] splitScope = scope.split("\\."); + ImmutableSet.Builder builder = ImmutableSet.builder(); + + builder.addAll(extractor.apply(filterMap.get(splitScope[0]))); + + if (splitScope.length > 1) { + builder.addAll(extractor.apply(filterMap.get(splitScope[0] + "." + splitScope[1]))); + } + + if (splitScope.length == 3) { + builder.addAll(extractor.apply(filterMap.get(scope))); + } + + return builder.build(); + } + + private Set getAllColumnNamesFromFilters(List filters) + { + return null != filters ? filters.stream() + .map(filter -> filter.columnName) + .collect(toImmutableSet()) : ImmutableSet.of(); + } + + private Set getRequiredColumnNamesFromFilters(List filters) + { + return null != filters ? filters.stream() + .filter(filter -> filter.required) + .map(filter -> filter.columnName) + .collect(toImmutableSet()) : ImmutableSet.of(); + } + + private Map getAllMappingsFromFilters(List filters) + { + return null != filters + ? filters.stream() + .filter(filter -> null != filter.rangeMapping) + .collect(toImmutableMap( + filter -> filter.columnName, + filter -> filter.rangeMapping)) + : ImmutableMap.of(); + } + + private static class RangeMapping + { + @JsonProperty("lowerBound") + public String lowerBound; + + @JsonProperty("upperBound") + public String upperBound; + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (!(o instanceof RangeMapping)) { + return false; + } + RangeMapping that = (RangeMapping) o; + return Objects.equals(lowerBound, that.lowerBound) && + Objects.equals(upperBound, that.upperBound); + } + + @Override + public int hashCode() + { + return Objects.hash(lowerBound, upperBound); + } + } + + private static class Filter + { + @JsonProperty("columnName") + public String columnName; + + @JsonProperty("rangeMapping") + public RangeMapping rangeMapping; + + @JsonProperty("required") + public boolean required; + } +} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java index fa0386376f12a..2a5144b61d812 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java @@ -37,6 +37,7 @@ protected void setup(Binder binder) binder.bind(ClpConnector.class).in(Scopes.SINGLETON); binder.bind(ClpMetadata.class).in(Scopes.SINGLETON); binder.bind(ClpRecordSetProvider.class).in(Scopes.SINGLETON); + binder.bind(ClpMetadataFilterProvider.class).in(Scopes.SINGLETON); binder.bind(ClpSplitManager.class).in(Scopes.SINGLETON); configBinder(binder).bindConfig(ClpConfig.class); diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java index adab0bf71c9a8..157af459cb80e 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java @@ -32,6 +32,7 @@ import java.util.Map; import java.util.Optional; +import static com.facebook.presto.plugin.clp.ClpConnectorFactory.CONNECTOR_NAME; import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith; import static java.util.Objects.requireNonNull; @@ -41,11 +42,13 @@ public class ClpPlanOptimizer private static final Logger log = Logger.get(ClpPlanOptimizer.class); private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; + private final ClpMetadataFilterProvider metadataFilterProvider; - public ClpPlanOptimizer(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution) + public ClpPlanOptimizer(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpMetadataFilterProvider metadataFilterProvider) { this.functionManager = requireNonNull(functionManager, "functionManager is null"); this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); + this.metadataFilterProvider = requireNonNull(metadataFilterProvider, "metadataFilterProvider is null"); } @Override @@ -75,15 +78,31 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) Map assignments = tableScanNode.getAssignments(); TableHandle tableHandle = tableScanNode.getTable(); ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); - ClpExpression clpExpression = node.getPredicate() - .accept(new ClpFilterToKqlConverter(functionResolution, functionManager, assignments), null); + String scope = CONNECTOR_NAME + "." + clpTableHandle.getSchemaTableName().toString(); + ClpExpression clpExpression = node.getPredicate().accept( + new ClpFilterToKqlConverter( + functionResolution, + functionManager, + assignments, + metadataFilterProvider.getColumnNames(scope)), null); Optional kqlQuery = clpExpression.getPushDownExpression(); + Optional metadataSqlQuery = clpExpression.getMetadataSqlQuery(); Optional remainingPredicate = clpExpression.getRemainingExpression(); + + // Perform required metadata filter checks before handling the KQL query (if kqlQuery + // isn't present, we'll return early, skipping subsequent checks). + metadataFilterProvider.checkContainsRequiredFilters(clpTableHandle.getSchemaTableName(), metadataSqlQuery.orElse("")); + if (metadataSqlQuery.isPresent()) { + metadataSqlQuery = Optional.of(metadataFilterProvider.remapFilterSql(scope, metadataSqlQuery.get())); + log.debug("Metadata SQL query: %s", metadataSqlQuery); + } + if (!kqlQuery.isPresent()) { return node; } - log.debug("KQL query: %s", kqlQuery.get()); - ClpTableLayoutHandle clpTableLayoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery); + log.debug("KQL query: %s", kqlQuery); + + ClpTableLayoutHandle clpTableLayoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery, metadataSqlQuery); TableScanNode newTableScanNode = new TableScanNode( tableScanNode.getSourceLocation(), idAllocator.getNextId(), diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizerProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizerProvider.java index f6f166eb7f657..a8fd1623d172b 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizerProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizerProvider.java @@ -28,12 +28,14 @@ public class ClpPlanOptimizerProvider { private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; + private final ClpMetadataFilterProvider metadataFilterProvider; @Inject - public ClpPlanOptimizerProvider(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution) + public ClpPlanOptimizerProvider(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpMetadataFilterProvider metadataFilterProvider) { this.functionManager = functionManager; this.functionResolution = functionResolution; + this.metadataFilterProvider = metadataFilterProvider; } @Override @@ -45,6 +47,6 @@ public Set getLogicalPlanOptimizers() @Override public Set getPhysicalPlanOptimizers() { - return ImmutableSet.of(new ClpPlanOptimizer(functionManager, functionResolution)); + return ImmutableSet.of(new ClpPlanOptimizer(functionManager, functionResolution, metadataFilterProvider)); } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java index f1f7d7c708815..b82932f0c30fd 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java @@ -27,12 +27,14 @@ public class ClpTableLayoutHandle { private final ClpTableHandle table; private final Optional kqlQuery; + private final Optional metadataSql; @JsonCreator - public ClpTableLayoutHandle(@JsonProperty("table") ClpTableHandle table, @JsonProperty("kqlQuery") Optional kqlQuery) + public ClpTableLayoutHandle(@JsonProperty("table") ClpTableHandle table, @JsonProperty("kqlQuery") Optional kqlQuery, @JsonProperty("metadataFilterQuery") Optional metadataSql) { this.table = table; this.kqlQuery = kqlQuery; + this.metadataSql = metadataSql; } @JsonProperty @@ -47,6 +49,12 @@ public Optional getKqlQuery() return kqlQuery; } + @JsonProperty + public Optional getMetadataSql() + { + return metadataSql; + } + @Override public boolean equals(Object o) { @@ -58,13 +66,14 @@ public boolean equals(Object o) } ClpTableLayoutHandle that = (ClpTableLayoutHandle) o; return Objects.equals(table, that.table) && - Objects.equals(kqlQuery, that.kqlQuery); + Objects.equals(kqlQuery, that.kqlQuery) && + Objects.equals(metadataSql, that.metadataSql); } @Override public int hashCode() { - return Objects.hash(table, kqlQuery); + return Objects.hash(table, kqlQuery, metadataSql); } @Override @@ -73,6 +82,7 @@ public String toString() return toStringHelper(this) .add("table", table) .add("kqlQuery", kqlQuery) + .add("metadataSql", metadataSql) .toString(); } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpSchemaTree.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpSchemaTree.java index 90d0b911d0d41..c07137d1eb7dc 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpSchemaTree.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpSchemaTree.java @@ -15,6 +15,7 @@ import com.facebook.presto.common.type.ArrayType; import com.facebook.presto.common.type.RowType; +import com.facebook.presto.common.type.TimestampType; import com.facebook.presto.common.type.Type; import com.facebook.presto.plugin.clp.ClpColumnHandle; import com.facebook.presto.spi.PrestoException; @@ -119,9 +120,10 @@ private Type mapColumnType(byte type) return DOUBLE; case ClpString: case VarString: - case DateString: case NullValue: return VARCHAR; + case DateString: + return TimestampType.TIMESTAMP; case UnstructuredArray: return new ArrayType(VARCHAR); case Boolean: diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java index 964a992230e36..bba65420e46d1 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java @@ -41,7 +41,7 @@ public class ClpMySqlSplitProvider public static final String ARCHIVES_TABLE_SUFFIX = "_archives"; // SQL templates - private static final String SQL_SELECT_ARCHIVES_TEMPLATE = format("SELECT `%s` FROM `%%s%%s%s`", ARCHIVES_TABLE_COLUMN_ID, ARCHIVES_TABLE_SUFFIX); + private static final String SQL_SELECT_ARCHIVES_TEMPLATE = format("SELECT `%s` FROM `%%s%%s%s` WHERE 1 = 1", ARCHIVES_TABLE_COLUMN_ID, ARCHIVES_TABLE_SUFFIX); private static final Logger log = Logger.get(ClpMySqlSplitProvider.class); @@ -69,6 +69,12 @@ public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle) String tableName = clpTableHandle.getSchemaTableName().getTableName(); String archivePathQuery = format(SQL_SELECT_ARCHIVES_TEMPLATE, config.getMetadataTablePrefix(), tableName); + if (clpTableLayoutHandle.getMetadataSql().isPresent()) { + String metadataFilterQuery = clpTableLayoutHandle.getMetadataSql().get(); + archivePathQuery += " AND (" + metadataFilterQuery + ")"; + } + log.debug("Query for archive: %s", archivePathQuery); + try (Connection connection = getConnection()) { // Fetch archive IDs and create splits try (PreparedStatement statement = connection.prepareStatement(archivePathQuery); ResultSet resultSet = statement.executeQuery()) { @@ -83,7 +89,9 @@ public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle) log.warn("Database error while processing splits for %s: %s", tableName, e); } - return splits.build(); + ImmutableList filteredSplits = splits.build(); + log.debug("Number of splits: %s", filteredSplits.size()); + return filteredSplits; } private Connection getConnection() diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java index 74c36cf934579..91fc3c780d941 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java @@ -48,11 +48,13 @@ public final class ClpMetadataDbSetUp public static final String METADATA_DB_TABLE_PREFIX = "clp_"; public static final String METADATA_DB_URL_TEMPLATE = "jdbc:h2:file:%s;MODE=MySQL;DATABASE_TO_UPPER=FALSE"; public static final String METADATA_DB_USER = "sa"; - public static final String ARCHIVE_STORAGE_DIRECTORY_BASE = "/tmp/archives/"; + public static final String ARCHIVES_STORAGE_DIRECTORY_BASE = "/tmp/archives/"; private static final Logger log = Logger.get(ClpMetadataDbSetUp.class); private static final String DATASETS_TABLE_NAME = METADATA_DB_TABLE_PREFIX + DATASETS_TABLE_SUFFIX; + private static final String ARCHIVES_TABLE_COLUMN_BEGIN_TIMESTAMP = "begin_timestamp"; private static final String ARCHIVES_TABLE_COLUMN_PAGINATION_ID = "pagination_id"; + private static final String ARCHIVES_TABLE_COLUMN_END_TIMESTAMP = "end_timestamp"; private ClpMetadataDbSetUp() { @@ -118,7 +120,7 @@ public static ClpMetadata setupMetadata(DbHandle dbHandle, Map> splits) + public static ClpMySqlSplitProvider setupSplit(DbHandle dbHandle, Map> splits) { final String metadataDbUrl = format(METADATA_DB_URL_TEMPLATE, dbHandle.dbPath); final String archiveTableFormat = METADATA_DB_TABLE_PREFIX + "%s" + ARCHIVES_TABLE_SUFFIX; @@ -127,7 +129,7 @@ public static ClpMySqlSplitProvider setupSplit(DbHandle dbHandle, Map> tableSplits : splits.entrySet()) { + for (Map.Entry> tableSplits : splits.entrySet()) { String tableName = tableSplits.getKey(); updateDatasetsTable(conn, tableName); @@ -135,17 +137,28 @@ public static ClpMySqlSplitProvider setupSplit(DbHandle dbHandle, Map 0", "fare > 0", null, sessionHolder); - testFilter("fare >= 0", "fare >= 0", null, sessionHolder); - testFilter("fare < 0", "fare < 0", null, sessionHolder); - testFilter("fare <= 0", "fare <= 0", null, sessionHolder); - testFilter("fare = 0", "fare: 0", null, sessionHolder); - testFilter("fare != 0", "NOT fare: 0", null, sessionHolder); - testFilter("fare <> 0", "NOT fare: 0", null, sessionHolder); - testFilter("0 < fare", "fare > 0", null, sessionHolder); - testFilter("0 <= fare", "fare >= 0", null, sessionHolder); - testFilter("0 > fare", "fare < 0", null, sessionHolder); - testFilter("0 >= fare", "fare <= 0", null, sessionHolder); - testFilter("0 = fare", "fare: 0", null, sessionHolder); - testFilter("0 != fare", "NOT fare: 0", null, sessionHolder); - testFilter("0 <> fare", "NOT fare: 0", null, sessionHolder); + // Numeric comparisons + testPushDown(sessionHolder, "fare > 0", "fare > 0", null); + testPushDown(sessionHolder, "fare >= 0", "fare >= 0", null); + testPushDown(sessionHolder, "fare < 0", "fare < 0", null); + testPushDown(sessionHolder, "fare <= 0", "fare <= 0", null); + testPushDown(sessionHolder, "fare = 0", "fare: 0", null); + testPushDown(sessionHolder, "fare != 0", "NOT fare: 0", null); + testPushDown(sessionHolder, "fare <> 0", "NOT fare: 0", null); + testPushDown(sessionHolder, "0 < fare", "fare > 0", null); + testPushDown(sessionHolder, "0 <= fare", "fare >= 0", null); + testPushDown(sessionHolder, "0 > fare", "fare < 0", null); + testPushDown(sessionHolder, "0 >= fare", "fare <= 0", null); + testPushDown(sessionHolder, "0 = fare", "fare: 0", null); + testPushDown(sessionHolder, "0 != fare", "NOT fare: 0", null); + testPushDown(sessionHolder, "0 <> fare", "NOT fare: 0", null); } @Test - public void testOrPushdown() + public void testBetweenPushDown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter("fare > 0 OR city.Name like 'b%'", "(fare > 0 OR city.Name: \"b*\")", null, sessionHolder); - testFilter( + // Normal cases + testPushDown(sessionHolder, "fare BETWEEN 0 AND 5", "fare >= 0 AND fare <= 5", null); + testPushDown(sessionHolder, "fare BETWEEN 5 AND 0", "fare >= 5 AND fare <= 0", null); + + // No push down for non-constant expressions + testPushDown( + sessionHolder, + "fare BETWEEN (city.Region.Id - 5) AND (city.Region.Id + 5)", + null, + "fare BETWEEN (city.Region.Id - 5) AND (city.Region.Id + 5)"); + + // If the last two arguments of BETWEEN are not numeric constants, then the CLP connector + // won't push them down. + testPushDown(sessionHolder, "city.Name BETWEEN 'a' AND 'b'", null, "city.Name BETWEEN 'a' AND 'b'"); + } + + @Test + public void testOrPushDown() + { + SessionHolder sessionHolder = new SessionHolder(); + + // OR conditions with partial push down support + testPushDown(sessionHolder, "fare > 0 OR city.Name like 'b%'", "(fare > 0 OR city.Name: \"b*\")", null); + testPushDown( + sessionHolder, "lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1", null, - "(lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1)", - sessionHolder); + "(lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1)"); // Multiple ORs - testFilter( + testPushDown( + sessionHolder, "fare > 0 OR city.Name like 'b%' OR lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1", null, - "fare > 0 OR city.Name like 'b%' OR lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1", - sessionHolder); - testFilter( + "fare > 0 OR city.Name like 'b%' OR lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1"); + testPushDown( + sessionHolder, "fare > 0 OR city.Name like 'b%' OR city.Region.Id != 1", "((fare > 0 OR city.Name: \"b*\") OR NOT city.Region.Id: 1)", - null, - sessionHolder); + null); } @Test - public void testAndPushdown() + public void testAndPushDown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter("fare > 0 AND city.Name like 'b%'", "(fare > 0 AND city.Name: \"b*\")", null, sessionHolder); - testFilter( - "lower(city.Region.Name) = 'hello world' AND city.Region.Id != 1", - "(NOT city.Region.Id: 1)", - "lower(city.Region.Name) = 'hello world'", - sessionHolder); + // AND conditions with partial/full push down + testPushDown(sessionHolder, "fare > 0 AND city.Name like 'b%'", "(fare > 0 AND city.Name: \"b*\")", null); + + testPushDown(sessionHolder, "lower(city.Region.Name) = 'hello world' AND city.Region.Id != 1", "(NOT city.Region.Id: 1)", "lower(city.Region.Name) = 'hello world'"); // Multiple ANDs - testFilter( + testPushDown( + sessionHolder, "fare > 0 AND city.Name like 'b%' AND lower(city.Region.Name) = 'hello world' AND city.Region.Id != 1", "(((fare > 0 AND city.Name: \"b*\")) AND NOT city.Region.Id: 1)", - "(lower(city.Region.Name) = 'hello world')", - sessionHolder); - testFilter( + "(lower(city.Region.Name) = 'hello world')"); + + testPushDown( + sessionHolder, "fare > 0 AND city.Name like '%b%' AND lower(city.Region.Name) = 'hello world' AND city.Region.Id != 1", "(((fare > 0)) AND NOT city.Region.Id: 1)", - "city.Name like '%b%' AND lower(city.Region.Name) = 'hello world'", - sessionHolder); + "city.Name like '%b%' AND lower(city.Region.Name) = 'hello world'"); } @Test - public void testNotPushdown() + public void testNotPushDown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter("city.Region.Name NOT LIKE 'hello%'", "NOT city.Region.Name: \"hello*\"", null, sessionHolder); - testFilter("NOT (city.Region.Name LIKE 'hello%')", "NOT city.Region.Name: \"hello*\"", null, sessionHolder); - testFilter("city.Name != 'hello world'", "NOT city.Name: \"hello world\"", null, sessionHolder); - testFilter("city.Name <> 'hello world'", "NOT city.Name: \"hello world\"", null, sessionHolder); - testFilter("NOT (city.Name = 'hello world')", "NOT city.Name: \"hello world\"", null, sessionHolder); - testFilter("fare != 0", "NOT fare: 0", null, sessionHolder); - testFilter("fare <> 0", "NOT fare: 0", null, sessionHolder); - testFilter("NOT (fare = 0)", "NOT fare: 0", null, sessionHolder); + // NOT and inequality predicates + testPushDown(sessionHolder, "city.Region.Name NOT LIKE 'hello%'", "NOT city.Region.Name: \"hello*\"", null); + testPushDown(sessionHolder, "NOT (city.Region.Name LIKE 'hello%')", "NOT city.Region.Name: \"hello*\"", null); + testPushDown(sessionHolder, "city.Name != 'hello world'", "NOT city.Name: \"hello world\"", null); + testPushDown(sessionHolder, "city.Name <> 'hello world'", "NOT city.Name: \"hello world\"", null); + testPushDown(sessionHolder, "NOT (city.Name = 'hello world')", "NOT city.Name: \"hello world\"", null); + testPushDown(sessionHolder, "fare != 0", "NOT fare: 0", null); + testPushDown(sessionHolder, "fare <> 0", "NOT fare: 0", null); + testPushDown(sessionHolder, "NOT (fare = 0)", "NOT fare: 0", null); // Multiple NOTs - testFilter("NOT (NOT fare = 0)", "NOT NOT fare: 0", null, sessionHolder); - testFilter("NOT (fare = 0 AND city.Name = 'hello world')", "NOT (fare: 0 AND city.Name: \"hello world\")", null, sessionHolder); - testFilter("NOT (fare = 0 OR city.Name = 'hello world')", "NOT (fare: 0 OR city.Name: \"hello world\")", null, sessionHolder); + testPushDown(sessionHolder, "NOT (NOT fare = 0)", "NOT NOT fare: 0", null); + testPushDown(sessionHolder, "NOT (fare = 0 AND city.Name = 'hello world')", "NOT (fare: 0 AND city.Name: \"hello world\")", null); + testPushDown(sessionHolder, "NOT (fare = 0 OR city.Name = 'hello world')", "NOT (fare: 0 OR city.Name: \"hello world\")", null); } @Test - public void testInPushdown() + public void testInPushDown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter("city.Name IN ('hello world', 'hello world 2')", "(city.Name: \"hello world\" OR city.Name: \"hello world 2\")", null, sessionHolder); + // IN predicate + testPushDown(sessionHolder, "city.Name IN ('hello world', 'hello world 2')", "(city.Name: \"hello world\" OR city.Name: \"hello world 2\")", null); } @Test - public void testIsNullPushdown() + public void testIsNullPushDown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter("city.Name IS NULL", "NOT city.Name: *", null, sessionHolder); - testFilter("city.Name IS NOT NULL", "NOT NOT city.Name: *", null, sessionHolder); - testFilter("NOT (city.Name IS NULL)", "NOT NOT city.Name: *", null, sessionHolder); + // IS NULL / IS NOT NULL predicates + testPushDown(sessionHolder, "city.Name IS NULL", "NOT city.Name: *", null); + testPushDown(sessionHolder, "city.Name IS NOT NULL", "NOT NOT city.Name: *", null); + testPushDown(sessionHolder, "NOT (city.Name IS NULL)", "NOT NOT city.Name: *", null); } @Test - public void testComplexPushdown() + public void testComplexPushDown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter( + // Complex AND/OR with partial pushdown + testPushDown( + sessionHolder, "(fare > 0 OR city.Name like 'b%') AND (lower(city.Region.Name) = 'hello world' OR city.Name IS NULL)", "((fare > 0 OR city.Name: \"b*\"))", - "(lower(city.Region.Name) = 'hello world' OR city.Name IS NULL)", - sessionHolder); - testFilter( + "(lower(city.Region.Name) = 'hello world' OR city.Name IS NULL)"); + + testPushDown( + sessionHolder, "city.Region.Id = 1 AND (fare > 0 OR city.Name NOT like 'b%') AND (lower(city.Region.Name) = 'hello world' OR city.Name IS NULL)", "((city.Region.Id: 1 AND (fare > 0 OR NOT city.Name: \"b*\")))", - "lower(city.Region.Name) = 'hello world' OR city.Name IS NULL", - sessionHolder); + "lower(city.Region.Name) = 'hello world' OR city.Name IS NULL"); } - private void testFilter(String sqlExpression, String expectedKqlExpression, String expectedRemainingExpression, SessionHolder sessionHolder) + @Test + public void testMetadataSqlGeneration() + { + SessionHolder sessionHolder = new SessionHolder(); + Set testMetadataFilterColumns = ImmutableSet.of("fare"); + + // Normal case + testPushDown( + sessionHolder, + "(fare > 0 AND city.Name like 'b%')", + "(fare > 0 AND city.Name: \"b*\")", + "(\"fare\" > 0)", + testMetadataFilterColumns); + + // With BETWEEN + testPushDown( + sessionHolder, + "((fare BETWEEN 0 AND 5) AND city.Name like 'b%')", + "(fare >= 0 AND fare <= 5 AND city.Name: \"b*\")", + "(\"fare\" >= 0 AND \"fare\" <= 5)", + testMetadataFilterColumns); + + // The cases of that the metadata filter column exist but cannot be push down + testPushDown( + sessionHolder, + "(fare > 0 OR city.Name like 'b%')", + "(fare > 0 OR city.Name: \"b*\")", + null, + testMetadataFilterColumns); + testPushDown( + sessionHolder, + "(fare > 0 AND city.Name like 'b%') OR city.Region.Id = 1", + "((fare > 0 AND city.Name: \"b*\") OR city.Region.Id: 1)", + null, + testMetadataFilterColumns); + + // Complicated case + testPushDown( + sessionHolder, + "fare = 0 AND (city.Name like 'b%' OR city.Region.Id = 1)", + "(fare: 0 AND (city.Name: \"b*\" OR city.Region.Id: 1))", + "(\"fare\" = 0)", + testMetadataFilterColumns); + } + + private void testPushDown(SessionHolder sessionHolder, String sql, String expectedKql, String expectedRemaining) + { + ClpExpression clpExpression = tryPushDown(sql, sessionHolder, ImmutableSet.of()); + testFilter(clpExpression, expectedKql, expectedRemaining, sessionHolder); + } + + private void testPushDown(SessionHolder sessionHolder, String sql, String expectedKql, String expectedMetadataSqlQuery, Set metadataFilterColumns) + { + ClpExpression clpExpression = tryPushDown(sql, sessionHolder, metadataFilterColumns); + testFilter(clpExpression, expectedKql, null, sessionHolder); + if (expectedMetadataSqlQuery != null) { + assertTrue(clpExpression.getMetadataSqlQuery().isPresent()); + assertEquals(clpExpression.getMetadataSqlQuery().get(), expectedMetadataSqlQuery); + } + else { + assertFalse(clpExpression.getMetadataSqlQuery().isPresent()); + } + } + + private ClpExpression tryPushDown(String sqlExpression, SessionHolder sessionHolder, Set metadataFilterColumns) + { + RowExpression pushDownExpression = getRowExpression(sqlExpression, sessionHolder); + return pushDownExpression.accept( + new ClpFilterToKqlConverter( + standardFunctionResolution, + functionAndTypeManager, + variableToColumnHandleMap, + metadataFilterColumns), + null); + } + + private void testFilter( + ClpExpression clpExpression, + String expectedKqlExpression, + String expectedRemainingExpression, + SessionHolder sessionHolder) { - RowExpression actualExpression = getRowExpression(sqlExpression, sessionHolder); - ClpExpression clpExpression = actualExpression.accept(new ClpFilterToKqlConverter(standardFunctionResolution, functionAndTypeManager, variableToColumnHandleMap), null); Optional kqlExpression = clpExpression.getPushDownExpression(); Optional remainingExpression = clpExpression.getRemainingExpression(); if (expectedKqlExpression != null) { assertTrue(kqlExpression.isPresent()); assertEquals(kqlExpression.get(), expectedKqlExpression); } + else { + assertFalse(kqlExpression.isPresent()); + } if (expectedRemainingExpression != null) { assertTrue(remainingExpression.isPresent()); diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadataFilterConfig.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadataFilterConfig.java new file mode 100644 index 0000000000000..024d5fdade55a --- /dev/null +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadataFilterConfig.java @@ -0,0 +1,104 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp; + +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.SchemaTableName; +import com.google.common.collect.ImmutableSet; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Paths; +import java.util.Set; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertThrows; + +@Test(singleThreaded = true) +public class TestClpMetadataFilterConfig +{ + private String filterConfigPath; + + @BeforeMethod + public void setUp() throws IOException, URISyntaxException + { + URL resource = getClass().getClassLoader().getResource("test-metadata-filter.json"); + if (resource == null) { + throw new FileNotFoundException("test-metadata-filter.json not found in resources"); + } + + filterConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); + } + + @Test + public void checkRequiredFilters() + { + ClpConfig config = new ClpConfig(); + config.setMetadataFilterConfig(filterConfigPath); + ClpMetadataFilterProvider filterProvider = new ClpMetadataFilterProvider(config); + SchemaTableName testTableSchemaTableName = new SchemaTableName("default", "table_1"); + assertThrows(PrestoException.class, () -> filterProvider.checkContainsRequiredFilters( + testTableSchemaTableName, + "(\"level\" >= 1 AND \"level\" <= 3)")); + filterProvider.checkContainsRequiredFilters( + testTableSchemaTableName, + "(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)"); + } + + @Test + public void getFilterNames() + { + ClpConfig config = new ClpConfig(); + config.setMetadataFilterConfig(filterConfigPath); + ClpMetadataFilterProvider filterProvider = new ClpMetadataFilterProvider(config); + Set catalogFilterNames = filterProvider.getColumnNames("clp"); + assertEquals(ImmutableSet.of("level"), catalogFilterNames); + Set schemaFilterNames = filterProvider.getColumnNames("clp.default"); + assertEquals(ImmutableSet.of("level", "author"), schemaFilterNames); + Set tableFilterNames = filterProvider.getColumnNames("clp.default.table_1"); + assertEquals(ImmutableSet.of("level", "author", "msg.timestamp", "file_name"), tableFilterNames); + } + + @Test + public void remapSql() + { + ClpConfig config = new ClpConfig(); + config.setMetadataFilterConfig(filterConfigPath); + ClpMetadataFilterProvider filterProvider = new ClpMetadataFilterProvider(config); + + String metadataFilterSql1 = "(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)"; + String remappedSql1 = filterProvider.remapFilterSql("clp.default.table_1", metadataFilterSql1); + assertEquals(remappedSql1, "(end_timestamp > 1234 AND begin_timestamp < 5678)"); + + String metadataFilterSql2 = "(\"msg.timestamp\" >= 1234 AND \"msg.timestamp\" <= 5678)"; + String remappedSql2 = filterProvider.remapFilterSql("clp.default.table_1", metadataFilterSql2); + assertEquals(remappedSql2, "(end_timestamp >= 1234 AND begin_timestamp <= 5678)"); + + String metadataFilterSql3 = "(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" <= 5678)"; + String remappedSql3 = filterProvider.remapFilterSql("clp.default.table_1", metadataFilterSql3); + assertEquals(remappedSql3, "(end_timestamp > 1234 AND begin_timestamp <= 5678)"); + + String metadataFilterSql4 = "(\"msg.timestamp\" >= 1234 AND \"msg.timestamp\" < 5678)"; + String remappedSql4 = filterProvider.remapFilterSql("clp.default.table_1", metadataFilterSql4); + assertEquals(remappedSql4, "(end_timestamp >= 1234 AND begin_timestamp < 5678)"); + + String metadataFilterSql5 = "(\"msg.timestamp\" = 1234)"; + String remappedSql5 = filterProvider.remapFilterSql("clp.default.table_1", metadataFilterSql5); + assertEquals(remappedSql5, "((begin_timestamp <= 1234 AND end_timestamp >= 1234))"); + } +} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java index 7dcd3c768d4e9..fed281fef9453 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java @@ -15,6 +15,7 @@ import com.facebook.presto.plugin.clp.split.ClpSplitProvider; import com.facebook.presto.spi.SchemaTableName; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; @@ -27,10 +28,13 @@ import java.util.Optional; import static com.facebook.presto.plugin.clp.ClpMetadata.DEFAULT_SCHEMA_NAME; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.ARCHIVE_STORAGE_DIRECTORY_BASE; +import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.ARCHIVES_STORAGE_DIRECTORY_BASE; +import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.ArchivesTableRow; import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.DbHandle; import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.getDbHandle; import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.setupSplit; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableSet.toImmutableSet; import static org.testng.Assert.assertEquals; @Test(singleThreaded = true) @@ -38,7 +42,7 @@ public class TestClpSplit { private DbHandle dbHandle; private ClpSplitProvider clpSplitProvider; - private Map> tableSplits; + private Map> tableSplits; @BeforeMethod public void setUp() @@ -51,10 +55,16 @@ public void setUp() for (int i = 0; i < numKeys; i++) { String key = "test_" + i; - List values = new ArrayList<>(); + List values = new ArrayList<>(); for (int j = 0; j < numValuesPerKey; j++) { - values.add("id_" + j); + // We generate synthetic begin_timestamp and end_timestamp values for each split + // by offsetting two base timestamps (1700000000000L and 1705000000000L) with a + // fixed increment per split (10^10 * j). + values.add(new ArchivesTableRow( + "id_" + j, + 1700000000000L + 10000000000L * j, + 1705000000000L + 10000000000L * j)); } tableSplits.put(key, values); @@ -71,24 +81,67 @@ public void tearDown() @Test public void testListSplits() { - for (Map.Entry> entry : tableSplits.entrySet()) { - String tableName = entry.getKey(); - String tablePath = ARCHIVE_STORAGE_DIRECTORY_BASE + tableName; - List expectedSplits = entry.getValue(); - ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle( - new ClpTableHandle(new SchemaTableName(DEFAULT_SCHEMA_NAME, tableName), tablePath), Optional.empty()); - List splits = clpSplitProvider.listSplits(layoutHandle); - assertEquals(splits.size(), expectedSplits.size()); - - ImmutableSet actualSplitPaths = splits.stream() - .map(ClpSplit::getPath) - .collect(ImmutableSet.toImmutableSet()); - - ImmutableSet expectedSplitPaths = expectedSplits.stream() - .map(split -> tablePath + "/" + split) - .collect(ImmutableSet.toImmutableSet()); - - assertEquals(actualSplitPaths, expectedSplitPaths); + for (Map.Entry> entry : tableSplits.entrySet()) { + // Without metadata filters + compareListSplitsResult(entry, Optional.empty(), ImmutableList.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)); + + // query_begin_ts < archives_min_ts && query_end_ts > archives_max_ts + compareListSplitsResult(entry, Optional.of("(end_timestamp > 1699999999999 AND begin_timestamp < 1795000000001)"), ImmutableList.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)); + + // query_begin_ts < archives_min_ts && query_end_ts > archives_min_ts && query_end_ts < archives_max_ts + compareListSplitsResult(entry, Optional.of("(end_timestamp > 1699999999999 AND begin_timestamp < 1744999999999)"), ImmutableList.of(0, 1, 2, 3, 4)); + + // query_end_ts < archives_min_ts + compareListSplitsResult(entry, Optional.of("(begin_timestamp < 1699999999999)"), ImmutableList.of()); + + // query_begin_ts > archives_min_ts && query_begin_ts < archives_max_ts && query_end_ts > archives_max_ts + compareListSplitsResult(entry, Optional.of("(end_timestamp > 1745000000001 AND begin_timestamp < 1795000000001)"), ImmutableList.of(5, 6, 7, 8, 9)); + + // query_begin_ts > archives_min_ts && query_begin_ts < archives_max_ts && query_end_ts < archives_max_ts + compareListSplitsResult(entry, Optional.of("(end_timestamp > 1745000000001 AND begin_timestamp <= 1770000000000)"), ImmutableList.of(5, 6, 7)); + + // query_begin_ts > archives_max_ts + compareListSplitsResult(entry, Optional.of("(end_timestamp > 1795000000001)"), ImmutableList.of()); + + // query_begin_ts = archive_min_ts + compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1700000000000 AND begin_timestamp <= 1700000000000)"), ImmutableList.of(0)); + + // query_begin_ts = archive_max_ts + compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1795000000000 AND begin_timestamp <= 1795000000000)"), ImmutableList.of(9)); + + // query_ts = x && x > archive_min_ts && x < archive_max_ts (non-exist) + compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1715000000001 AND begin_timestamp <= 1715000000001)"), ImmutableList.of()); + + // query_ts = x && x > archive_min_ts && x < archive_max_ts + compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1715000000000 AND begin_timestamp <= 1715000000000)"), ImmutableList.of(1)); } } + + private void compareListSplitsResult( + Map.Entry> entry, + Optional metadataSql, + List expectedSplitIds) + { + String tableName = entry.getKey(); + String tablePath = ARCHIVES_STORAGE_DIRECTORY_BASE + tableName; + ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle( + new ClpTableHandle(new SchemaTableName(DEFAULT_SCHEMA_NAME, tableName), tablePath), + Optional.empty(), + metadataSql); + List expectedSplits = expectedSplitIds.stream() + .map(expectedSplitId -> entry.getValue().get(expectedSplitId)) + .collect(toImmutableList()); + List actualSplits = clpSplitProvider.listSplits(layoutHandle); + assertEquals(actualSplits.size(), expectedSplits.size()); + + ImmutableSet actualSplitPaths = actualSplits.stream() + .map(ClpSplit::getPath) + .collect(toImmutableSet()); + + ImmutableSet expectedSplitPaths = expectedSplits.stream() + .map(split -> tablePath + "/" + split.getId()) + .collect(toImmutableSet()); + + assertEquals(actualSplitPaths, expectedSplitPaths); + } } diff --git a/presto-clp/src/test/resources/test-metadata-filter.json b/presto-clp/src/test/resources/test-metadata-filter.json new file mode 100644 index 0000000000000..266a23b1a26e7 --- /dev/null +++ b/presto-clp/src/test/resources/test-metadata-filter.json @@ -0,0 +1,25 @@ +{ + "clp": [ + { + "columnName": "level" + } + ], + "clp.default": [ + { + "columnName": "author" + } + ], + "clp.default.table_1": [ + { + "columnName": "msg.timestamp", + "rangeMapping": { + "lowerBound": "begin_timestamp", + "upperBound": "end_timestamp" + }, + "required": true + }, + { + "columnName": "file_name" + } + ] +} diff --git a/presto-docs/src/main/sphinx/connector/clp.rst b/presto-docs/src/main/sphinx/connector/clp.rst index e1beb5c180c1b..666c601b85f5d 100644 --- a/presto-docs/src/main/sphinx/connector/clp.rst +++ b/presto-docs/src/main/sphinx/connector/clp.rst @@ -29,6 +29,7 @@ following contents, modifying the properties as appropriate: clp.metadata-db-name=clp_db clp.metadata-db-user=clp_user clp.metadata-db-password=clp_password + clp.metadata-filter-config=/path/to/metadata-filter-config.json clp.metadata-table-prefix=clp_ clp.split-provider-type=mysql @@ -68,6 +69,7 @@ Property Name Description database name is not specified in the URL. ``clp.metadata-db-password`` The password for the metadata database user. This option is required if ``clp.metadata-provider-type`` is set to ``mysql``. +``clp.metadata-filter-config`` The absolute path of the metadata filter config file. ``clp.metadata-table-prefix`` A string prefix prepended to all metadata table names when querying the database. Useful for namespacing or avoiding collisions. This option is required if ``clp.metadata-provider-type`` is set to ``mysql``. @@ -94,6 +96,118 @@ If you prefer to use a different source--or the same source with a custom implem implementations of the ``ClpMetadataProvider`` and ``ClpSplitProvider`` interfaces, and configure the connector accordingly. +Metadata Filter Config File +---------------------------- + +The metadata filter config file allows you to configure the set of columns that can be used to filter out irrelevant +splits (CLP archives) when querying CLP's metadata database. This can significantly improve performance by reducing the +amount of data that needs to be scanned. For a given query, the connector will translate any supported filter predicates +that involve the configured columns into a query against CLP's metadata database. + +The configuration is a JSON object where each key under the root represents a :ref:`scope` and each scope maps +to an array of :ref:`filter configs`. + + +.. _scopes: + +Scopes +^^^^^^ + +A *scope* can be one of the following: + +- A catalog name +- A fully-qualified schema name +- A fully-qualified table name + +Filter configs under a particular scope will apply to all child scopes. For example, filter configs at the schema level +will apply to all tables within that schema. + +.. _filter-configs: + +Filter Configs +^^^^^^^^^^^^^^ + +Each `filter config` indicates how a *data column*---a column in the Presto table---should be mapped to a *metadata +column*---a column in CLP's metadata database. In most cases, the data column and the metadata column will have the same +name; but in some cases, the data column may be remapped. + +For example, an integer data column (e.g., ``timestamp``), may be remapped to a pair of metadata columns that represent +the range of possible values (e.g., ``begin_timestamp`` and ``end_timestamp``) of the data column within a split. + +Each *filter config* has the following properties: + +- ``columnName``: The data column's name. + + .. note:: Currently, only numeric-type columns can be used as metadata filters. + +- ``rangeMapping`` *(optional)*: an object with the following properties: + + .. note:: This option is only valid if the column has a numeric type. + + - ``lowerBound``: The metadata column that represents the lower bound of values in a split for the data column. + - ``upperBound``: The metadata column that represents the upper bound of values in a split for the data column. + + +- ``required`` *(optional, defaults to false)*: indicates whether the filter **must** be present in the translated + metadata filter SQL query. If a required filter is missing or cannot be pushed down, the query will be rejected. + + +Example +^^^^^^^ + +The code block shows an example metadata filter config file: + +.. code-block:: json + + { + "clp": [ + { + "columnName": "level" + } + ], + "clp.default": [ + { + "columnName": "author" + } + ], + "clp.default.table_1": [ + { + "columnName": "msg.timestamp", + "rangeMapping": { + "lowerBound": "begin_timestamp", + "upperBound": "end_timestamp" + }, + "required": true + }, + { + "columnName": "file_name" + } + ] + } + +- The first key-value pair adds the following filter configs for all schemas and tables under the ``clp`` catalog: + + - The column ``level`` is used as-is without remapping. + +- The second key-value pair adds the following filter configs for all tables under the ``clp.default`` schema: + + - The column ``author`` is used as-is without remapping. + +- The third key-value pair adds two filter configs for the table ``clp.default.table_1``: + + - The column ``msg.timestamp`` is remapped via a ``rangeMapping`` to the metadata columns ``begin_timestamp`` and + ``end_timestamp``, and is required to exist in every query. + - The column ``file_name`` is used as-is without remapping. + +Supported SQL Expressions +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The connector supports translations from a Presto SQL query to the metadata filter query for the following expressions: + +- Comparisons between variables and constants (e.g., ``=``, ``!=``, ``<``, ``>``, ``<=``, ``>=``). +- Dereferencing fields from row-typed variables. +- Logical operators: ``AND``, ``OR``, and ``NOT``. + Data Types ----------