diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java index 7ffa6545b8ca0..121eb0d5ff17b 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java @@ -29,11 +29,12 @@ public class ClpConfig private String metadataDbName; private String metadataDbUser; private String metadataDbPassword; - private String metadataFilterConfig; private String metadataTablePrefix; private long metadataRefreshInterval = 60; private long metadataExpireInterval = 600; + private String splitFilterConfig; + private SplitFilterProviderType splitFilterProviderType = SplitFilterProviderType.MYSQL; private SplitProviderType splitProviderType = SplitProviderType.MYSQL; public boolean isPolymorphicTypeEnabled() @@ -108,18 +109,6 @@ public ClpConfig setMetadataDbPassword(String metadataDbPassword) return this; } - public String getMetadataFilterConfig() - { - return metadataFilterConfig; - } - - @Config("clp.metadata-filter-config") - public ClpConfig setMetadataFilterConfig(String metadataFilterConfig) - { - this.metadataFilterConfig = metadataFilterConfig; - return this; - } - public String getMetadataTablePrefix() { return metadataTablePrefix; @@ -162,6 +151,30 @@ public ClpConfig setMetadataExpireInterval(long metadataExpireInterval) return this; } + public String getSplitFilterConfig() + { + return splitFilterConfig; + } + + @Config("clp.split-filter-config") + public ClpConfig setSplitFilterConfig(String splitFilterConfig) + { + this.splitFilterConfig = splitFilterConfig; + return this; + } + + public SplitFilterProviderType getSplitFilterProviderType() + { + return splitFilterProviderType; + } + + @Config("clp.split-filter-provider-type") + public ClpConfig setSplitFilterProviderType(SplitFilterProviderType splitFilterProviderType) + { + this.splitFilterProviderType = splitFilterProviderType; + return this; + } + public SplitProviderType getSplitProviderType() { return splitProviderType; @@ -179,6 +192,11 @@ public enum MetadataProviderType MYSQL } + public enum SplitFilterProviderType + { + MYSQL + } + public enum SplitProviderType { MYSQL diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java index 50ba67c8d6200..7b24e7a650e3c 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java @@ -15,6 +15,7 @@ import com.facebook.airlift.bootstrap.LifeCycleManager; import com.facebook.airlift.log.Logger; +import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; import com.facebook.presto.spi.connector.Connector; import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider; @@ -40,7 +41,7 @@ public class ClpConnector private final ClpSplitManager splitManager; private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; - private final ClpMetadataFilterProvider metadataFilterProvider; + private final ClpSplitFilterProvider splitFilterProvider; @Inject public ClpConnector( @@ -50,7 +51,7 @@ public ClpConnector( ClpSplitManager splitManager, FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, - ClpMetadataFilterProvider metadataFilterProvider) + ClpSplitFilterProvider splitFilterProvider) { this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null"); this.metadata = requireNonNull(metadata, "metadata is null"); @@ -58,13 +59,13 @@ public ClpConnector( this.splitManager = requireNonNull(splitManager, "splitManager is null"); this.functionManager = requireNonNull(functionManager, "functionManager is null"); this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); - this.metadataFilterProvider = requireNonNull(metadataFilterProvider, "metadataFilterProvider is null"); + this.splitFilterProvider = requireNonNull(splitFilterProvider, "splitFilterProvider is null"); } @Override public ConnectorPlanOptimizerProvider getConnectorPlanOptimizerProvider() { - return new ClpPlanOptimizerProvider(functionManager, functionResolution, metadataFilterProvider); + return new ClpPlanOptimizerProvider(functionManager, functionResolution, splitFilterProvider); } @Override diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java index 94cc8bda560c4..2530c013455cc 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java @@ -29,8 +29,9 @@ public enum ClpErrorCode CLP_UNSUPPORTED_TYPE(3, EXTERNAL), CLP_UNSUPPORTED_CONFIG_OPTION(4, EXTERNAL), - CLP_METADATA_FILTER_CONFIG_NOT_FOUND(10, USER_ERROR), - CLP_MANDATORY_METADATA_FILTER_NOT_VALID(11, USER_ERROR); + CLP_SPLIT_FILTER_CONFIG_NOT_FOUND(10, USER_ERROR), + CLP_MANDATORY_SPLIT_FILTER_NOT_VALID(11, USER_ERROR), + CLP_UNSUPPORTED_SPLIT_FILTER_SOURCE(12, EXTERNAL); private final ErrorCode errorCode; diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadataFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadataFilterProvider.java deleted file mode 100644 index d4de6a6f3bd78..0000000000000 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadataFilterProvider.java +++ /dev/null @@ -1,268 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp; - -import com.facebook.presto.spi.PrestoException; -import com.facebook.presto.spi.SchemaTableName; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.inject.Inject; - -import java.io.File; -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.function.Function; - -import static com.facebook.presto.plugin.clp.ClpConnectorFactory.CONNECTOR_NAME; -import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_METADATA_FILTER_NOT_VALID; -import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_METADATA_FILTER_CONFIG_NOT_FOUND; -import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static java.lang.String.format; -import static java.util.Objects.requireNonNull; - -/** - * Loads and manages metadata filter configurations for the CLP connector. - *

- * The configuration file is specified by the {@code clp.metadata-filter-config} property - * and defines metadata filters used to optimize query execution through split pruning. - *

- * Each filter config indicates how a data column--a column in the Presto table--should be mapped to - * a metadata column--a column in CLP’s metadata database. - *

- * Filter configs can be declared at either a catalog, schema, or table scope. Filter configs under - * a particular scope will apply to all child scopes (e.g., schema-level filter configs will apply - * to all tables within that schema). - *

- * Each filter config includes the following fields: - * - */ -public class ClpMetadataFilterProvider -{ - private final Map> filterMap; - - @Inject - public ClpMetadataFilterProvider(ClpConfig config) - { - requireNonNull(config, "config is null"); - - if (null == config.getMetadataFilterConfig()) { - filterMap = ImmutableMap.of(); - return; - } - ObjectMapper mapper = new ObjectMapper(); - try { - filterMap = mapper.readValue( - new File(config.getMetadataFilterConfig()), - new TypeReference>>() {}); - } - catch (IOException e) { - throw new PrestoException(CLP_METADATA_FILTER_CONFIG_NOT_FOUND, "Failed to metadata filter config file open."); - } - } - - public void checkContainsRequiredFilters(SchemaTableName schemaTableName, String metadataFilterSql) - { - boolean hasRequiredMetadataFilterColumns = true; - ImmutableList.Builder notFoundListBuilder = ImmutableList.builder(); - for (String columnName : getRequiredColumnNames(format("%s.%s", CONNECTOR_NAME, schemaTableName))) { - if (!metadataFilterSql.contains(columnName)) { - hasRequiredMetadataFilterColumns = false; - notFoundListBuilder.add(columnName); - } - } - if (!hasRequiredMetadataFilterColumns) { - throw new PrestoException( - CLP_MANDATORY_METADATA_FILTER_NOT_VALID, - notFoundListBuilder.build() + " is a mandatory metadata filter column but not valid"); - } - } - - /** - * Rewrites the given SQL string to remap filter conditions based on the configured range - * mappings for the given scope. - * - *

The {@code scope} follows the format {@code catalog[.schema][.table]}, and determines - * which filter mappings to apply, since mappings from more specific scopes (e.g., table-level) - * override or supplement those from broader scopes (e.g., catalog-level). For each scope - * (catalog, schema, table), this method collects all range mappings defined in the metadata - * filter configuration. - * - *

This method performs regex-based replacements to convert numeric filter expressions such - * as: - *

    - *
  • {@code "msg.timestamp" >= 1234} → {@code end_timestamp >= 1234}
  • - *
  • {@code "msg.timestamp" <= 5678} → {@code begin_timestamp <= 5678}
  • - *
  • {@code "msg.timestamp" = 4567} → - * {@code (begin_timestamp <= 4567 AND end_timestamp >= 4567)}
  • - *
- * - * @param scope - * @param sql - * @return the rewritten SQL string - */ - public String remapFilterSql(String scope, String sql) - { - String[] splitScope = scope.split("\\."); - - Map mappings = new HashMap<>(getAllMappingsFromFilters(filterMap.get(splitScope[0]))); - - if (1 < splitScope.length) { - mappings.putAll(getAllMappingsFromFilters(filterMap.get(splitScope[0] + "." + splitScope[1]))); - } - - if (3 == splitScope.length) { - mappings.putAll(getAllMappingsFromFilters(filterMap.get(scope))); - } - - String remappedSql = sql; - for (Map.Entry entry : mappings.entrySet()) { - String key = entry.getKey(); - RangeMapping value = entry.getValue(); - remappedSql = remappedSql.replaceAll( - format("\"(%s)\"\\s(>=?)\\s([0-9]*)", key), - format("%s $2 $3", value.upperBound)); - remappedSql = remappedSql.replaceAll( - format("\"(%s)\"\\s(<=?)\\s([0-9]*)", key), - format("%s $2 $3", value.lowerBound)); - remappedSql = remappedSql.replaceAll( - format("\"(%s)\"\\s(=)\\s([0-9]*)", key), - format("(%s <= $3 AND %s >= $3)", value.lowerBound, value.upperBound)); - } - return remappedSql; - } - - public Set getColumnNames(String scope) - { - return collectColumnNamesFromScopes(scope, this::getAllColumnNamesFromFilters); - } - - private Set getRequiredColumnNames(String scope) - { - return collectColumnNamesFromScopes(scope, this::getRequiredColumnNamesFromFilters); - } - - private Set collectColumnNamesFromScopes(String scope, Function, Set> extractor) - { - String[] splitScope = scope.split("\\."); - ImmutableSet.Builder builder = ImmutableSet.builder(); - - builder.addAll(extractor.apply(filterMap.get(splitScope[0]))); - - if (splitScope.length > 1) { - builder.addAll(extractor.apply(filterMap.get(splitScope[0] + "." + splitScope[1]))); - } - - if (splitScope.length == 3) { - builder.addAll(extractor.apply(filterMap.get(scope))); - } - - return builder.build(); - } - - private Set getAllColumnNamesFromFilters(List filters) - { - return null != filters ? filters.stream() - .map(filter -> filter.columnName) - .collect(toImmutableSet()) : ImmutableSet.of(); - } - - private Set getRequiredColumnNamesFromFilters(List filters) - { - return null != filters ? filters.stream() - .filter(filter -> filter.required) - .map(filter -> filter.columnName) - .collect(toImmutableSet()) : ImmutableSet.of(); - } - - private Map getAllMappingsFromFilters(List filters) - { - return null != filters - ? filters.stream() - .filter(filter -> null != filter.rangeMapping) - .collect(toImmutableMap( - filter -> filter.columnName, - filter -> filter.rangeMapping)) - : ImmutableMap.of(); - } - - private static class RangeMapping - { - @JsonProperty("lowerBound") - public String lowerBound; - - @JsonProperty("upperBound") - public String upperBound; - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (!(o instanceof RangeMapping)) { - return false; - } - RangeMapping that = (RangeMapping) o; - return Objects.equals(lowerBound, that.lowerBound) && - Objects.equals(upperBound, that.upperBound); - } - - @Override - public int hashCode() - { - return Objects.hash(lowerBound, upperBound); - } - } - - private static class Filter - { - @JsonProperty("columnName") - public String columnName; - - @JsonProperty("rangeMapping") - public RangeMapping rangeMapping; - - @JsonProperty("required") - public boolean required; - } -} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java index 2a5144b61d812..bf801d0d87242 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java @@ -18,14 +18,18 @@ import com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider; import com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider; import com.facebook.presto.plugin.clp.split.ClpSplitProvider; +import com.facebook.presto.plugin.clp.split.filter.ClpMySqlSplitFilterProvider; +import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; import com.facebook.presto.spi.PrestoException; import com.google.inject.Binder; import com.google.inject.Scopes; import static com.facebook.airlift.configuration.ConfigBinder.configBinder; import static com.facebook.presto.plugin.clp.ClpConfig.MetadataProviderType; +import static com.facebook.presto.plugin.clp.ClpConfig.SplitFilterProviderType; import static com.facebook.presto.plugin.clp.ClpConfig.SplitProviderType; import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_UNSUPPORTED_METADATA_SOURCE; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_UNSUPPORTED_SPLIT_FILTER_SOURCE; import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_UNSUPPORTED_SPLIT_SOURCE; public class ClpModule @@ -37,11 +41,18 @@ protected void setup(Binder binder) binder.bind(ClpConnector.class).in(Scopes.SINGLETON); binder.bind(ClpMetadata.class).in(Scopes.SINGLETON); binder.bind(ClpRecordSetProvider.class).in(Scopes.SINGLETON); - binder.bind(ClpMetadataFilterProvider.class).in(Scopes.SINGLETON); binder.bind(ClpSplitManager.class).in(Scopes.SINGLETON); configBinder(binder).bindConfig(ClpConfig.class); ClpConfig config = buildConfigObject(ClpConfig.class); + + if (SplitFilterProviderType.MYSQL == config.getSplitFilterProviderType()) { + binder.bind(ClpSplitFilterProvider.class).to(ClpMySqlSplitFilterProvider.class).in(Scopes.SINGLETON); + } + else { + throw new PrestoException(CLP_UNSUPPORTED_SPLIT_FILTER_SOURCE, "Unsupported split filter provider type: " + config.getSplitFilterProviderType()); + } + if (config.getMetadataProviderType() == MetadataProviderType.MYSQL) { binder.bind(ClpMetadataProvider.class).to(ClpMySqlMetadataProvider.class).in(Scopes.SINGLETON); } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java index 7c907fc1d3191..f9b4f611fe7a3 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java @@ -14,6 +14,7 @@ package com.facebook.presto.plugin.clp; import com.facebook.airlift.log.Logger; +import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.ConnectorPlanRewriter; @@ -28,13 +29,17 @@ import com.facebook.presto.spi.plan.TableScanNode; import com.facebook.presto.spi.relation.RowExpression; import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.google.common.collect.ImmutableSet; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; import java.util.Optional; +import java.util.Set; import static com.facebook.presto.plugin.clp.ClpConnectorFactory.CONNECTOR_NAME; import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith; +import static java.lang.String.format; import static java.util.Objects.requireNonNull; public class ClpPlanOptimizer @@ -43,29 +48,49 @@ public class ClpPlanOptimizer private static final Logger log = Logger.get(ClpPlanOptimizer.class); private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; - private final ClpMetadataFilterProvider metadataFilterProvider; + private final ClpSplitFilterProvider splitFilterProvider; - public ClpPlanOptimizer(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpMetadataFilterProvider metadataFilterProvider) + public ClpPlanOptimizer(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpSplitFilterProvider splitFilterProvider) { this.functionManager = requireNonNull(functionManager, "functionManager is null"); this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); - this.metadataFilterProvider = requireNonNull(metadataFilterProvider, "metadataFilterProvider is null"); + this.splitFilterProvider = requireNonNull(splitFilterProvider, "splitFilterProvider is null"); } @Override public PlanNode optimize(PlanNode maxSubplan, ConnectorSession session, VariableAllocator variableAllocator, PlanNodeIdAllocator idAllocator) { - return rewriteWith(new Rewriter(idAllocator), maxSubplan); + Rewriter rewriter = new Rewriter(idAllocator); + PlanNode optimizedPlanNode = rewriteWith(rewriter, maxSubplan); + + // Throw exception if any required split filters are missing + if (!rewriter.tableScopeSet.isEmpty() && !rewriter.hasVisitedFilter) { + splitFilterProvider.checkContainsRequiredFilters(rewriter.tableScopeSet, ""); + } + return optimizedPlanNode; } private class Rewriter extends ConnectorPlanRewriter { private final PlanNodeIdAllocator idAllocator; + private final Set tableScopeSet; + private boolean hasVisitedFilter; public Rewriter(PlanNodeIdAllocator idAllocator) { this.idAllocator = idAllocator; + hasVisitedFilter = false; + tableScopeSet = new HashSet<>(); + } + + @Override + public PlanNode visitTableScan(TableScanNode node, RewriteContext context) + { + TableHandle tableHandle = node.getTable(); + ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); + tableScopeSet.add(format("%s.%s", CONNECTOR_NAME, clpTableHandle.getSchemaTableName())); + return super.visitTableScan(node, context); } @Override @@ -74,17 +99,18 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) if (!(node.getSource() instanceof TableScanNode)) { return node; } + hasVisitedFilter = true; TableScanNode tableScanNode = (TableScanNode) node.getSource(); Map assignments = new HashMap<>(tableScanNode.getAssignments()); TableHandle tableHandle = tableScanNode.getTable(); ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); - String scope = CONNECTOR_NAME + "." + clpTableHandle.getSchemaTableName().toString(); + String tableScope = CONNECTOR_NAME + "." + clpTableHandle.getSchemaTableName().toString(); ClpExpression clpExpression = node.getPredicate().accept( new ClpFilterToKqlConverter( functionResolution, functionManager, - metadataFilterProvider.getColumnNames(scope)), + splitFilterProvider.getColumnNames(tableScope)), assignments); Optional kqlQuery = clpExpression.getPushDownExpression(); Optional metadataSqlQuery = clpExpression.getMetadataSqlQuery(); @@ -92,9 +118,9 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) // Perform required metadata filter checks before handling the KQL query (if kqlQuery // isn't present, we'll return early, skipping subsequent checks). - metadataFilterProvider.checkContainsRequiredFilters(clpTableHandle.getSchemaTableName(), metadataSqlQuery.orElse("")); + splitFilterProvider.checkContainsRequiredFilters(ImmutableSet.of(tableScope), metadataSqlQuery.orElse("")); if (metadataSqlQuery.isPresent()) { - metadataSqlQuery = Optional.of(metadataFilterProvider.remapFilterSql(scope, metadataSqlQuery.get())); + metadataSqlQuery = Optional.of(splitFilterProvider.remapSplitFilterPushDownExpression(tableScope, metadataSqlQuery.get())); log.debug("Metadata SQL query: %s", metadataSqlQuery); } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizerProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizerProvider.java index a8fd1623d172b..2beeec1126987 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizerProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizerProvider.java @@ -13,6 +13,7 @@ */ package com.facebook.presto.plugin.clp; +import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider; import com.facebook.presto.spi.function.FunctionMetadataManager; @@ -28,14 +29,14 @@ public class ClpPlanOptimizerProvider { private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; - private final ClpMetadataFilterProvider metadataFilterProvider; + private final ClpSplitFilterProvider splitFilterProvider; @Inject - public ClpPlanOptimizerProvider(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpMetadataFilterProvider metadataFilterProvider) + public ClpPlanOptimizerProvider(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpSplitFilterProvider splitFilterProvider) { this.functionManager = functionManager; this.functionResolution = functionResolution; - this.metadataFilterProvider = metadataFilterProvider; + this.splitFilterProvider = splitFilterProvider; } @Override @@ -47,6 +48,6 @@ public Set getLogicalPlanOptimizers() @Override public Set getPhysicalPlanOptimizers() { - return ImmutableSet.of(new ClpPlanOptimizer(functionManager, functionResolution, metadataFilterProvider)); + return ImmutableSet.of(new ClpPlanOptimizer(functionManager, functionResolution, splitFilterProvider)); } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java new file mode 100644 index 0000000000000..31d24fd4df71c --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java @@ -0,0 +1,155 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split.filter; + +import com.facebook.presto.plugin.clp.ClpConfig; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableMap; +import com.google.inject.Inject; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterConfig.CustomSplitFilterOptions; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static java.lang.String.format; + +/** + * Implementation for the CLP package's MySQL metadata database. + */ +public class ClpMySqlSplitFilterProvider + extends ClpSplitFilterProvider +{ + @Inject + public ClpMySqlSplitFilterProvider(ClpConfig config) + { + super(config); + } + + /** + * Performs regex-based replacements to rewrite {@code pushDownExpression} according to the + * {@code "rangeMapping"} field in {@link ClpMySqlCustomSplitFilterOptions}. For example: + *
    + *
  • {@code "msg.timestamp" >= 1234} → {@code end_timestamp >= 1234}
  • + *
  • {@code "msg.timestamp" <= 5678} → {@code begin_timestamp <= 5678}
  • + *
  • {@code "msg.timestamp" = 4567} → + * {@code (begin_timestamp <= 4567 AND end_timestamp >= 4567)}
  • + *
+ * + * @param scope the filter's scope + * @param pushDownExpression the expression to be rewritten + * @return the rewritten expression + */ + @Override + public String remapSplitFilterPushDownExpression(String scope, String pushDownExpression) + { + String[] splitScope = scope.split("\\."); + + Map mappings = new HashMap<>(getAllMappingsFromFilters(filterMap.get(splitScope[0]))); + + if (1 < splitScope.length) { + mappings.putAll(getAllMappingsFromFilters(filterMap.get(splitScope[0] + "." + splitScope[1]))); + } + + if (3 == splitScope.length) { + mappings.putAll(getAllMappingsFromFilters(filterMap.get(scope))); + } + + String remappedSql = pushDownExpression; + for (Map.Entry entry : mappings.entrySet()) { + String key = entry.getKey(); + ClpMySqlCustomSplitFilterOptions.RangeMapping value = entry.getValue(); + remappedSql = remappedSql.replaceAll( + format("\"(%s)\"\\s(>=?)\\s(-?[0-9]+(?:\\.[0-9]+)?(?:[eE][+-]?[0-9]+)?)", key), + format("%s $2 $3", value.upperBound)); + remappedSql = remappedSql.replaceAll( + format("\"(%s)\"\\s(<=?)\\s(-?[0-9]+(?:\\.[0-9]+)?(?:[eE][+-]?[0-9]+)?)", key), + format("%s $2 $3", value.lowerBound)); + remappedSql = remappedSql.replaceAll( + format("\"(%s)\"\\s(=)\\s(-?[0-9]+(?:\\.[0-9]+)?(?:[eE][+-]?[0-9]+)?)", key), + format("(%s <= $3 AND %s >= $3)", value.lowerBound, value.upperBound)); + } + return remappedSql; + } + + @Override + protected Class getCustomSplitFilterOptionsClass() + { + return ClpMySqlCustomSplitFilterOptions.class; + } + + private Map getAllMappingsFromFilters(List filters) + { + return null != filters + ? filters.stream() + .filter(filter -> + filter.customOptions instanceof ClpMySqlCustomSplitFilterOptions && + ((ClpMySqlCustomSplitFilterOptions) filter.customOptions).rangeMapping != null) + .collect(toImmutableMap( + filter -> filter.columnName, + filter -> ((ClpMySqlCustomSplitFilterOptions) filter.customOptions).rangeMapping)) + : ImmutableMap.of(); + } + + /** + * Custom options: + *
    + *
  • {@code rangeMapping} (optional): an object with the following properties: + *
      + *
    • {@code lowerBound}: The numeric metadata column that represents the lower bound + * of values in a split for the numeric data column.
    • + *
    • {@code upperBound}: The numeric metadata column that represents the upper bound + * of values in a split for the numeric data column.
    • + *
    + *
  • + *
+ */ + protected static class ClpMySqlCustomSplitFilterOptions + implements CustomSplitFilterOptions + { + @JsonProperty("rangeMapping") + public RangeMapping rangeMapping; + + public static class RangeMapping + { + @JsonProperty("lowerBound") + public String lowerBound; + + @JsonProperty("upperBound") + public String upperBound; + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (!(o instanceof RangeMapping)) { + return false; + } + RangeMapping that = (RangeMapping) o; + return Objects.equals(lowerBound, that.lowerBound) && + Objects.equals(upperBound, that.upperBound); + } + + @Override + public int hashCode() + { + return Objects.hash(lowerBound, upperBound); + } + } + } +} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfig.java new file mode 100644 index 0000000000000..0d7e37a803515 --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfig.java @@ -0,0 +1,45 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split.filter; + +import com.fasterxml.jackson.annotation.JsonProperty; + +/** + * Options for a how a column in a Presto query should be pushed down into a query against CLP's + * metadata database (during split pruning): + *
    + *
  • {@code columnName}: The column's name in the Presto query.
  • + * + *
  • {@code customOptions}: Options specific to the current + * {@link ClpSplitFilterProvider}.
  • + * + *
  • {@code required} (optional, defaults to {@code false}): Whether the filter must be + * present in the generated metadata query. If a required filter is missing or cannot be added to + * the metadata query, the original query will be rejected.
  • + *
+ */ +public class ClpSplitFilterConfig +{ + @JsonProperty("columnName") + public String columnName; + + @JsonProperty("customOptions") + public CustomSplitFilterOptions customOptions; + + @JsonProperty("required") + public boolean required; + + public interface CustomSplitFilterOptions + {} +} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfigCustomOptionsDeserializer.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfigCustomOptionsDeserializer.java new file mode 100644 index 0000000000000..3464c14883e26 --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfigCustomOptionsDeserializer.java @@ -0,0 +1,49 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split.filter; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; + +import java.io.IOException; + +import static com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterConfig.CustomSplitFilterOptions; + +/** + * Uses the given {@link CustomSplitFilterOptions} implementation to deserialize the + * {@code "customOptions"} field in a {@link ClpSplitFilterConfig}. The implementation is determined + * by the implemented {@link ClpSplitFilterProvider}. + */ +public class ClpSplitFilterConfigCustomOptionsDeserializer + extends JsonDeserializer +{ + private final Class actualCustomSplitFilterOptionsClass; + + public ClpSplitFilterConfigCustomOptionsDeserializer(Class actualCustomSplitFilterOptionsClass) + { + this.actualCustomSplitFilterOptionsClass = actualCustomSplitFilterOptionsClass; + } + + @Override + public CustomSplitFilterOptions deserialize(JsonParser p, DeserializationContext ctxt) throws IOException + { + ObjectNode node = p.getCodec().readTree(p); + ObjectMapper mapper = (ObjectMapper) p.getCodec(); + + return mapper.treeToValue(node, actualCustomSplitFilterOptionsClass); + } +} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java new file mode 100644 index 0000000000000..0609843aaf22f --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java @@ -0,0 +1,171 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split.filter; + +import com.facebook.presto.plugin.clp.ClpConfig; +import com.facebook.presto.spi.PrestoException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.module.SimpleModule; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; + +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_SPLIT_FILTER_NOT_VALID; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_SPLIT_FILTER_CONFIG_NOT_FOUND; +import static com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterConfig.CustomSplitFilterOptions; +import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static java.util.Objects.requireNonNull; + +/** + * Loads and manages {@link ClpSplitFilterConfig}s from a config file. + *

+ * The config file is specified by the {@code clp.split-filter-config} property. + *

+ * Filter configs can be declared at either a catalog, schema, or table scope. Filter configs under + * a particular scope will apply to all child scopes (e.g., schema-level filter configs will apply + * to all tables within that schema). + *

+ * Implementations of this class can customize filter configs through the {@code "customOptions"} + * field within each {@link ClpSplitFilterConfig}. + */ +public abstract class ClpSplitFilterProvider +{ + protected final Map> filterMap; + + public ClpSplitFilterProvider(ClpConfig config) + { + requireNonNull(config, "config is null"); + + if (null == config.getSplitFilterConfig()) { + filterMap = ImmutableMap.of(); + return; + } + ObjectMapper mapper = new ObjectMapper(); + SimpleModule module = new SimpleModule(); + module.addDeserializer( + CustomSplitFilterOptions.class, + new ClpSplitFilterConfigCustomOptionsDeserializer(getCustomSplitFilterOptionsClass())); + mapper.registerModule(module); + try { + filterMap = mapper.readValue( + new File(config.getSplitFilterConfig()), + new TypeReference>>() {}); + } + catch (IOException e) { + throw new PrestoException(CLP_SPLIT_FILTER_CONFIG_NOT_FOUND, "Failed to open split filter config file", e); + } + } + + /** + * Rewrites {@code pushDownExpression} to remap filter conditions based on the + * {@code "customOptions"} for the given scope. + *

+ * {@code scope} follows the format {@code catalog[.schema][.table]}, and determines which + * filter mappings to apply, since mappings from more specific scopes (e.g., table-level) + * override or supplement those from broader scopes (e.g., catalog-level). For each scope + * (catalog, schema, table), this method collects all mappings defined in + * {@code "customOptions"}. + * + * @param scope the scope of the filter + * @param pushDownExpression the expression to be rewritten + * @return the rewritten expression + */ + public abstract String remapSplitFilterPushDownExpression(String scope, String pushDownExpression); + + /** + * Checks for the given table, if {@code splitFilterPushDownExpression} contains all required + * fields. + * + * @param tableScopeSet the set of scopes of the tables that are being queried + * @param splitFilterPushDownExpression the expression to be checked + */ + public void checkContainsRequiredFilters(Set tableScopeSet, String splitFilterPushDownExpression) + { + boolean hasRequiredSplitFilterColumns = true; + ImmutableList.Builder notFoundListBuilder = ImmutableList.builder(); + for (String tableScope : tableScopeSet) { + for (String columnName : getRequiredColumnNames(tableScope)) { + if (!splitFilterPushDownExpression.contains(columnName)) { + hasRequiredSplitFilterColumns = false; + notFoundListBuilder.add(columnName); + } + } + } + if (!hasRequiredSplitFilterColumns) { + throw new PrestoException( + CLP_MANDATORY_SPLIT_FILTER_NOT_VALID, + notFoundListBuilder.build() + " is a mandatory split filter column but not valid"); + } + } + + public Set getColumnNames(String scope) + { + return collectColumnNamesFromScopes(scope, this::getAllColumnNamesFromFilters); + } + + /** + * Returns the implemented {@link CustomSplitFilterOptions} class. To respect our code style, we + * recommend implementing a {@code protected static class} as an inner class in the implemented + * {@link ClpSplitFilterProvider} class. + * + * @return the implemented {@link CustomSplitFilterOptions} class + */ + protected abstract Class getCustomSplitFilterOptionsClass(); + + private Set getRequiredColumnNames(String scope) + { + return collectColumnNamesFromScopes(scope, this::getRequiredColumnNamesFromFilters); + } + + private Set collectColumnNamesFromScopes(String scope, Function, Set> extractor) + { + String[] splitScope = scope.split("\\."); + ImmutableSet.Builder builder = ImmutableSet.builder(); + + builder.addAll(extractor.apply(filterMap.get(splitScope[0]))); + + if (splitScope.length > 1) { + builder.addAll(extractor.apply(filterMap.get(splitScope[0] + "." + splitScope[1]))); + } + + if (splitScope.length == 3) { + builder.addAll(extractor.apply(filterMap.get(scope))); + } + + return builder.build(); + } + + private Set getAllColumnNamesFromFilters(List filters) + { + return null != filters ? filters.stream() + .map(filter -> filter.columnName) + .collect(toImmutableSet()) : ImmutableSet.of(); + } + + private Set getRequiredColumnNamesFromFilters(List filters) + { + return null != filters ? filters.stream() + .filter(filter -> filter.required) + .map(filter -> filter.columnName) + .collect(toImmutableSet()) : ImmutableSet.of(); + } +} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitFilterConfig.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitFilterConfig.java new file mode 100644 index 0000000000000..27d9c86ee0092 --- /dev/null +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitFilterConfig.java @@ -0,0 +1,87 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split.filter; + +import com.facebook.presto.plugin.clp.ClpConfig; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Paths; + +import static java.lang.String.format; +import static org.testng.Assert.assertEquals; + +@Test(singleThreaded = true) +public class TestClpMySqlSplitFilterConfig +{ + private String filterConfigPath; + + @BeforeMethod + public void setUp() throws IOException, URISyntaxException + { + URL resource = getClass().getClassLoader().getResource("test-mysql-split-filter.json"); + if (resource == null) { + throw new FileNotFoundException("test-mysql-split-filter.json not found in resources"); + } + + filterConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); + } + + @Test + public void remapSplitFilterPushDownExpression() + { + ClpConfig config = new ClpConfig(); + config.setSplitFilterConfig(filterConfigPath); + ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); + + // Integer + testRange(1234, 5678, filterProvider); + testRange(-5678, -1234, filterProvider); + + // Decimal + testRange(1234.001, 5678.999, filterProvider); + testRange(-5678.999, -1234.001, filterProvider); + + // Scientific + testRange("1.234E3", "5.678e3", filterProvider); + testRange("-1.234e-3", "-5.678E-3", filterProvider); + } + + private void testRange(T lowerBound, T upperBound, ClpMySqlSplitFilterProvider filterProvider) + { + String splitFilterSql1 = format("(\"msg.timestamp\" > %s AND \"msg.timestamp\" < %s)", lowerBound, upperBound); + String remappedSql1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", splitFilterSql1); + assertEquals(remappedSql1, format("(end_timestamp > %s AND begin_timestamp < %s)", lowerBound, upperBound)); + + String splitFilterSql2 = format("(\"msg.timestamp\" >= %s AND \"msg.timestamp\" <= %s)", lowerBound, upperBound); + String remappedSql2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", splitFilterSql2); + assertEquals(remappedSql2, format("(end_timestamp >= %s AND begin_timestamp <= %s)", lowerBound, upperBound)); + + String splitFilterSql3 = format("(\"msg.timestamp\" > %s AND \"msg.timestamp\" <= %s)", lowerBound, upperBound); + String remappedSql3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", splitFilterSql3); + assertEquals(remappedSql3, format("(end_timestamp > %s AND begin_timestamp <= %s)", lowerBound, upperBound)); + + String splitFilterSql4 = format("(\"msg.timestamp\" >= %s AND \"msg.timestamp\" < %s)", lowerBound, upperBound); + String remappedSql4 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", splitFilterSql4); + assertEquals(remappedSql4, format("(end_timestamp >= %s AND begin_timestamp < %s)", lowerBound, upperBound)); + + String splitFilterSql5 = format("(\"msg.timestamp\" = %s)", lowerBound); + String remappedSql5 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", splitFilterSql5); + assertEquals(remappedSql5, format("((begin_timestamp <= %s AND end_timestamp >= %s))", lowerBound, lowerBound)); + } +} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadataFilterConfig.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java similarity index 51% rename from presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadataFilterConfig.java rename to presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java index e3b8abf2f81d8..7a4058f617d0c 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadataFilterConfig.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java @@ -11,8 +11,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.plugin.clp; +package com.facebook.presto.plugin.clp.split.filter; +import com.facebook.presto.plugin.clp.ClpConfig; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.SchemaTableName; import com.google.common.collect.ImmutableSet; @@ -26,22 +27,24 @@ import java.nio.file.Paths; import java.util.Set; +import static com.facebook.presto.plugin.clp.ClpConnectorFactory.CONNECTOR_NAME; +import static java.lang.String.format; import static java.util.UUID.randomUUID; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertThrows; import static org.testng.Assert.assertTrue; @Test(singleThreaded = true) -public class TestClpMetadataFilterConfig +public class TestClpSplitFilterConfigCommon { private String filterConfigPath; @BeforeMethod public void setUp() throws IOException, URISyntaxException { - URL resource = getClass().getClassLoader().getResource("test-metadata-filter.json"); + URL resource = getClass().getClassLoader().getResource("test-mysql-split-filter.json"); if (resource == null) { - throw new FileNotFoundException("test-metadata-filter.json not found in resources"); + throw new FileNotFoundException("test-mysql-split-filter.json not found in resources"); } filterConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); @@ -51,14 +54,14 @@ public void setUp() throws IOException, URISyntaxException public void checkRequiredFilters() { ClpConfig config = new ClpConfig(); - config.setMetadataFilterConfig(filterConfigPath); - ClpMetadataFilterProvider filterProvider = new ClpMetadataFilterProvider(config); - SchemaTableName testTableSchemaTableName = new SchemaTableName("default", "table_1"); + config.setSplitFilterConfig(filterConfigPath); + ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); + Set testTableScopeSet = ImmutableSet.of(format("%s.%s", CONNECTOR_NAME, new SchemaTableName("default", "table_1"))); assertThrows(PrestoException.class, () -> filterProvider.checkContainsRequiredFilters( - testTableSchemaTableName, + testTableScopeSet, "(\"level\" >= 1 AND \"level\" <= 3)")); filterProvider.checkContainsRequiredFilters( - testTableSchemaTableName, + testTableScopeSet, "(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)"); } @@ -66,8 +69,8 @@ public void checkRequiredFilters() public void getFilterNames() { ClpConfig config = new ClpConfig(); - config.setMetadataFilterConfig(filterConfigPath); - ClpMetadataFilterProvider filterProvider = new ClpMetadataFilterProvider(config); + config.setSplitFilterConfig(filterConfigPath); + ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); Set catalogFilterNames = filterProvider.getColumnNames("clp"); assertEquals(ImmutableSet.of("level"), catalogFilterNames); Set schemaFilterNames = filterProvider.getColumnNames("clp.default"); @@ -77,46 +80,18 @@ public void getFilterNames() } @Test - public void handleEmptyAndInvalidMetadataFilterConfig() + public void handleEmptyAndInvalidSplitFilterConfig() { ClpConfig config = new ClpConfig(); // Empty config - ClpMetadataFilterProvider filterProvider = new ClpMetadataFilterProvider(config); + ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); assertTrue(filterProvider.getColumnNames("clp").isEmpty()); assertTrue(filterProvider.getColumnNames("abc.xyz").isEmpty()); assertTrue(filterProvider.getColumnNames("abc.opq.xyz").isEmpty()); // Invalid config - config.setMetadataFilterConfig(randomUUID().toString()); - assertThrows(PrestoException.class, () -> new ClpMetadataFilterProvider(config)); - } - - @Test - public void remapSql() - { - ClpConfig config = new ClpConfig(); - config.setMetadataFilterConfig(filterConfigPath); - ClpMetadataFilterProvider filterProvider = new ClpMetadataFilterProvider(config); - - String metadataFilterSql1 = "(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)"; - String remappedSql1 = filterProvider.remapFilterSql("clp.default.table_1", metadataFilterSql1); - assertEquals(remappedSql1, "(end_timestamp > 1234 AND begin_timestamp < 5678)"); - - String metadataFilterSql2 = "(\"msg.timestamp\" >= 1234 AND \"msg.timestamp\" <= 5678)"; - String remappedSql2 = filterProvider.remapFilterSql("clp.default.table_1", metadataFilterSql2); - assertEquals(remappedSql2, "(end_timestamp >= 1234 AND begin_timestamp <= 5678)"); - - String metadataFilterSql3 = "(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" <= 5678)"; - String remappedSql3 = filterProvider.remapFilterSql("clp.default.table_1", metadataFilterSql3); - assertEquals(remappedSql3, "(end_timestamp > 1234 AND begin_timestamp <= 5678)"); - - String metadataFilterSql4 = "(\"msg.timestamp\" >= 1234 AND \"msg.timestamp\" < 5678)"; - String remappedSql4 = filterProvider.remapFilterSql("clp.default.table_1", metadataFilterSql4); - assertEquals(remappedSql4, "(end_timestamp >= 1234 AND begin_timestamp < 5678)"); - - String metadataFilterSql5 = "(\"msg.timestamp\" = 1234)"; - String remappedSql5 = filterProvider.remapFilterSql("clp.default.table_1", metadataFilterSql5); - assertEquals(remappedSql5, "((begin_timestamp <= 1234 AND end_timestamp >= 1234))"); + config.setSplitFilterConfig(randomUUID().toString()); + assertThrows(PrestoException.class, () -> new ClpMySqlSplitFilterProvider(config)); } } diff --git a/presto-clp/src/test/resources/test-metadata-filter.json b/presto-clp/src/test/resources/test-mysql-split-filter.json similarity index 66% rename from presto-clp/src/test/resources/test-metadata-filter.json rename to presto-clp/src/test/resources/test-mysql-split-filter.json index 266a23b1a26e7..ea5be310aff63 100644 --- a/presto-clp/src/test/resources/test-metadata-filter.json +++ b/presto-clp/src/test/resources/test-mysql-split-filter.json @@ -12,9 +12,11 @@ "clp.default.table_1": [ { "columnName": "msg.timestamp", - "rangeMapping": { - "lowerBound": "begin_timestamp", - "upperBound": "end_timestamp" + "customOptions": { + "rangeMapping": { + "lowerBound": "begin_timestamp", + "upperBound": "end_timestamp" + } }, "required": true }, diff --git a/presto-docs/src/main/sphinx/connector/clp.rst b/presto-docs/src/main/sphinx/connector/clp.rst index 7573f77e857b2..613f1e2f46e4b 100644 --- a/presto-docs/src/main/sphinx/connector/clp.rst +++ b/presto-docs/src/main/sphinx/connector/clp.rst @@ -1,22 +1,23 @@ -============= +############# CLP Connector -============= +############# .. contents:: :local: :backlinks: none :depth: 1 +******** Overview --------- +******** The CLP Connector enables SQL-based querying of `CLP `_ archives via Presto. This document describes how to configure the CLP Connector for use with a CLP cluster, as well as essential details for understanding the CLP connector. - +************* Configuration -------------- +************* To configure the CLP connector, create a catalog properties file ``etc/catalog/clp.properties`` with at least the following contents, modifying the properties as appropriate: @@ -29,66 +30,70 @@ following contents, modifying the properties as appropriate: clp.metadata-db-name=clp_db clp.metadata-db-user=clp_user clp.metadata-db-password=clp_password - clp.metadata-filter-config=/path/to/metadata-filter-config.json clp.metadata-table-prefix=clp_ + clp.split-filter-config=/path/to/split-filter-config.json + clp.split-filter-provider-type=mysql clp.split-provider-type=mysql - Configuration Properties ------------------------- +======================== The following configuration properties are available: -================================== ======================================================================== ========= -Property Name Description Default -================================== ======================================================================== ========= -``clp.polymorphic-type-enabled`` Enables or disables support for polymorphic types in CLP, allowing the ``false`` - same field to have different types. This is useful for schema-less, - semi-structured data where the same field may appear with different - types. - - When enabled, type annotations are added to conflicting field names to - distinguish between types. For example, if an ``id`` column appears with - both an ``int`` and ``string`` types, the connector will create two - columns named ``id_bigint`` and ``id_varchar``. - - Supported type annotations include ``bigint``, ``varchar``, ``double``, - ``boolean``, and ``array(varchar)`` (See `Data Types`_ for details). For - columns with only one type, the original column name is used. -``clp.metadata-provider-type`` Specifies the metadata provider type. Currently, the only supported ``mysql`` - type is a MySQL database, which is also used by the CLP package to store - metadata. Additional providers can be supported by implementing the - ``ClpMetadataProvider`` interface. -``clp.metadata-db-url`` The JDBC URL used to connect to the metadata database. This property is - required if ``clp.metadata-provider-type`` is set to ``mysql``. -``clp.metadata-db-name`` The name of the metadata database. This option is required if - ``clp.metadata-provider-type`` is set to ``mysql`` and the database name - is not specified in the URL. -``clp.metadata-db-user`` The database user with access to the metadata database. This option is - required if ``clp.metadata-provider-type`` is set to ``mysql`` and the - database name is not specified in the URL. -``clp.metadata-db-password`` The password for the metadata database user. This option is required if - ``clp.metadata-provider-type`` is set to ``mysql``. -``clp.metadata-filter-config`` The absolute path to an optional metadata filter config file. See the - :ref:`Metadata Filter Config File` section - for details. -``clp.metadata-table-prefix`` A string prefix prepended to all metadata table names when querying the - database. Useful for namespacing or avoiding collisions. This option is - required if ``clp.metadata-provider-type`` is set to ``mysql``. -``clp.metadata-expire-interval`` Defines how long, in seconds, metadata entries remain valid before they 600 - need to be refreshed. -``clp.metadata-refresh-interval`` Specifies how frequently metadata is refreshed from the source, in 60 - seconds. Set this to a lower value for frequently changing datasets or - to a higher value to reduce load. -``clp.split-provider-type`` Specifies the split provider type. By default, it uses the same type as ``mysql`` - the metadata provider with the same connection parameters. Additional - types can be supported by implementing the ``ClpSplitProvider`` - interface. -================================== ======================================================================== ========= - - +==================================== ======================================================================== ========= +Property Name Description Default +==================================== ======================================================================== ========= +``clp.polymorphic-type-enabled`` Enables or disables support for polymorphic types in CLP, allowing the ``false`` + same field to have different types. This is useful for schema-less, + semi-structured data where the same field may appear with different + types. + + When enabled, type annotations are added to conflicting field names to + distinguish between types. For example, if an ``id`` column appears with + both an ``int`` and ``string`` types, the connector will create two + columns named ``id_bigint`` and ``id_varchar``. + + Supported type annotations include ``bigint``, ``varchar``, ``double``, + ``boolean``, and ``array(varchar)`` (See `Data Types`_ for details). For + columns with only one type, the original column name is used. +``clp.metadata-provider-type`` Specifies the metadata provider type. Currently, the only supported ``mysql`` + type is a MySQL database, which is also used by the CLP package to store + metadata. Additional providers can be supported by implementing the + ``ClpMetadataProvider`` interface. +``clp.metadata-db-url`` The JDBC URL used to connect to the metadata database. This property is + required if ``clp.metadata-provider-type`` is set to ``mysql``. +``clp.metadata-db-name`` The name of the metadata database. This option is required if + ``clp.metadata-provider-type`` is set to ``mysql`` and the database name + is not specified in the URL. +``clp.metadata-db-user`` The database user with access to the metadata database. This option is + required if ``clp.metadata-provider-type`` is set to ``mysql`` and the + database name is not specified in the URL. +``clp.metadata-db-password`` The password for the metadata database user. This option is required if + ``clp.metadata-provider-type`` is set to ``mysql``. +``clp.metadata-table-prefix`` A string prefix prepended to all metadata table names when querying the + database. Useful for namespacing or avoiding collisions. This option is + required if ``clp.metadata-provider-type`` is set to ``mysql``. +``clp.metadata-expire-interval`` Defines how long, in seconds, metadata entries remain valid before they 600 + need to be refreshed. +``clp.metadata-refresh-interval`` Specifies how frequently metadata is refreshed from the source, in 60 + seconds. Set this to a lower value for frequently changing datasets or + to a higher value to reduce load. +``clp.split-filter-config`` The absolute path to an optional split filter config file. See the + :ref:`Split Filter Config File` section for + details. +``clp.split-filter-provider-type`` Specifies the split filter provider type. Currently, the only supported ``mysql`` + type is a MySQL database, which is also used by the CLP package to + store metadata. Additional providers can be supported by implementing + the ``ClpSplitFilterProvider`` interface. +``clp.split-provider-type`` Specifies the split provider type. By default, it uses the same type as ``mysql`` + the metadata provider with the same connection parameters. Additional + types can be supported by implementing the ``ClpSplitProvider`` + interface. +==================================== ======================================================================== ========= + +**************************** Metadata and Split Providers ----------------------------- +**************************** The CLP connector relies on metadata and split providers to retrieve information from various sources. By default, it uses a MySQL database for both metadata and split storage. We recommend using the CLP package for log ingestion, which @@ -98,12 +103,13 @@ If you prefer to use a different source--or the same source with a custom implem implementations of the ``ClpMetadataProvider`` and ``ClpSplitProvider`` interfaces, and configure the connector accordingly. -.. _metadata-filter-config-file: +.. _split-filter-config-file: -Metadata Filter Config File ----------------------------- +*************************** +Split Filter Config File +*************************** -The metadata filter config file allows you to configure the set of columns that can be used to filter out irrelevant +The split filter config file allows you to configure the set of columns that can be used to filter out irrelevant splits (CLP archives) when querying CLP's metadata database. This can significantly improve performance by reducing the amount of data that needs to be scanned. For a given query, the connector will translate any supported filter predicates that involve the configured columns into a query against CLP's metadata database. @@ -111,11 +117,10 @@ that involve the configured columns into a query against CLP's metadata database The configuration is a JSON object where each key under the root represents a :ref:`scope` and each scope maps to an array of :ref:`filter configs`. - .. _scopes: Scopes -^^^^^^ +====== A *scope* can be one of the following: @@ -129,20 +134,30 @@ will apply to all tables within that schema. .. _filter-configs: Filter Configs -^^^^^^^^^^^^^^ +============== -Each `filter config` indicates how a *data column*---a column in the Presto table---should be mapped to a *metadata -column*---a column in CLP's metadata database. In most cases, the data column and the metadata column will have the same -name; but in some cases, the data column may be remapped. +Each filter config indicates how a *data column*---i.e., a column in the Presto table---should be mapped to one or +more *metadata columns*---i.e., columns in CLP's metadata database. For example, an integer data column (e.g., ``timestamp``), may be remapped to a pair of metadata columns that represent the range of possible values (e.g., ``begin_timestamp`` and ``end_timestamp``) of the data column within a split. -Each *filter config* has the following properties: +Each filter config has the following options: - ``columnName``: The data column's name. - .. note:: Currently, only numeric-type columns can be used as metadata filters. +- ``customOptions`` *(optional)*: Custom options for a split filter provider. Options for the default split filter + provider (``ClpMySqlSplitFilterProvider``) are :ref:`below`. + +- ``required`` *(optional, defaults to false)*: Whether the filter **must** be present in the generated metadata query. + If a required filter is missing or cannot be added to the metadata query, the original query will be rejected. + +.. _clp-mysql-split-filter-provider-config: + +ClpMySqlSplitFilterProvider-Specific Filter Config +----------------------------------------------- + +For ``ClpMySqlSplitFilterProvider``, the ``customOptions`` option of the filter config has the following sub-options: - ``rangeMapping`` *(optional)*: an object with the following properties: @@ -151,15 +166,10 @@ Each *filter config* has the following properties: - ``lowerBound``: The metadata column that represents the lower bound of values in a split for the data column. - ``upperBound``: The metadata column that represents the upper bound of values in a split for the data column. +Filter Config Example +--------------------- -- ``required`` *(optional, defaults to false)*: indicates whether the filter **must** be present in the translated - metadata filter SQL query. If a required filter is missing or cannot be pushed down, the query will be rejected. - - -Example -^^^^^^^ - -The code block shows an example metadata filter config file: +The code block shows an example filter config file: .. code-block:: json @@ -177,9 +187,11 @@ The code block shows an example metadata filter config file: "clp.default.table_1": [ { "columnName": "msg.timestamp", - "rangeMapping": { - "lowerBound": "begin_timestamp", - "upperBound": "end_timestamp" + "customOptions": { + "rangeMapping": { + "lowerBound": "begin_timestamp", + "upperBound": "end_timestamp" + } }, "required": true }, @@ -203,17 +215,21 @@ The code block shows an example metadata filter config file: ``end_timestamp``, and is required to exist in every query. - The column ``file_name`` is used as-is without remapping. +If you prefer to use a different format for ``customOptions``, you can provide your own implementation of the +``ClpSplitFilterProvider`` interface, and configure the connector accordingly. + Supported SQL Expressions -^^^^^^^^^^^^^^^^^^^^^^^^^ +========================= -The connector supports translations from a Presto SQL query to the metadata filter query for the following expressions: +The connector supports translations from a Presto SQL query to the split filter query for the following expressions: - Comparisons between variables and constants (e.g., ``=``, ``!=``, ``<``, ``>``, ``<=``, ``>=``). - Dereferencing fields from row-typed variables. - Logical operators: ``AND``, ``OR``, and ``NOT``. +********** Data Types ----------- +********** The data type mappings are as follows: @@ -232,21 +248,21 @@ CLP Type Presto Type ====================== ==================== String Types -^^^^^^^^^^^^ +============ CLP uses three distinct string types: ``ClpString`` (strings with whitespace), ``VarString`` (strings without whitespace), and ``DateString`` (strings representing dates). Currently, all three are mapped to Presto's ``VARCHAR`` type. Array Types -^^^^^^^^^^^ +=========== CLP supports two array types: ``UnstructuredArray`` and ``StructuredArray``. Unstructured arrays are stored as strings in CLP and elements can be any type. However, in Presto arrays are homogeneous, so the elements are converted to strings when read. ``StructuredArray`` type is not supported in Presto. Object Types -^^^^^^^^^^^^ +============ CLP stores metadata using a global schema tree structure that captures all possible fields from various log structures. Internal nodes may represent objects containing nested fields as their children. In Presto, we map these internal object @@ -290,8 +306,9 @@ Each JSON log maps to this unified ``ROW`` type, with absent fields represented ``status``, ``thread_num``, ``backtrace``) become fields within the ``ROW``, clearly reflecting the nested and varying structures of the original JSON logs. +*********** SQL support ------------ +*********** The connector only provides read access to data. It does not support DDL operations, such as creating or dropping tables. Currently, we only support one ``default`` schema.