notFoundListBuilder = ImmutableList.builder();
+ for (String columnName : getRequiredColumnNames(format("%s.%s", CONNECTOR_NAME, schemaTableName))) {
+ if (!metadataFilterSql.contains(columnName)) {
+ hasRequiredMetadataFilterColumns = false;
+ notFoundListBuilder.add(columnName);
+ }
+ }
+ if (!hasRequiredMetadataFilterColumns) {
+ throw new PrestoException(
+ CLP_MANDATORY_METADATA_FILTER_NOT_VALID,
+ notFoundListBuilder.build() + " is a mandatory metadata filter column but not valid");
+ }
+ }
+
+ /**
+ * Rewrites the given SQL string to remap filter conditions based on the configured range
+ * mappings for the given scope.
+ *
+ * The {@code scope} follows the format {@code catalog[.schema][.table]}, and determines
+ * which filter mappings to apply, since mappings from more specific scopes (e.g., table-level)
+ * override or supplement those from broader scopes (e.g., catalog-level). For each scope
+ * (catalog, schema, table), this method collects all range mappings defined in the metadata
+ * filter configuration.
+ *
+ *
This method performs regex-based replacements to convert numeric filter expressions such
+ * as:
+ *
+ * - {@code "msg.timestamp" >= 1234} → {@code end_timestamp >= 1234}
+ * - {@code "msg.timestamp" <= 5678} → {@code begin_timestamp <= 5678}
+ * - {@code "msg.timestamp" = 4567} →
+ * {@code (begin_timestamp <= 4567 AND end_timestamp >= 4567)}
+ *
+ *
+ * @param scope
+ * @param sql
+ * @return the rewritten SQL string
+ */
+ public String remapFilterSql(String scope, String sql)
+ {
+ String[] splitScope = scope.split("\\.");
+
+ Map mappings = new HashMap<>(getAllMappingsFromFilters(filterMap.get(splitScope[0])));
+
+ if (1 < splitScope.length) {
+ mappings.putAll(getAllMappingsFromFilters(filterMap.get(splitScope[0] + "." + splitScope[1])));
+ }
+
+ if (3 == splitScope.length) {
+ mappings.putAll(getAllMappingsFromFilters(filterMap.get(scope)));
+ }
+
+ String remappedSql = sql;
+ for (Map.Entry entry : mappings.entrySet()) {
+ String key = entry.getKey();
+ RangeMapping value = entry.getValue();
+ remappedSql = remappedSql.replaceAll(
+ format("\"(%s)\"\\s(>=?)\\s([0-9]*)", key),
+ format("%s $2 $3", value.upperBound));
+ remappedSql = remappedSql.replaceAll(
+ format("\"(%s)\"\\s(<=?)\\s([0-9]*)", key),
+ format("%s $2 $3", value.lowerBound));
+ remappedSql = remappedSql.replaceAll(
+ format("\"(%s)\"\\s(=)\\s([0-9]*)", key),
+ format("(%s <= $3 AND %s >= $3)", value.lowerBound, value.upperBound));
+ }
+ return remappedSql;
+ }
+
+ public Set getColumnNames(String scope)
+ {
+ return collectColumnNamesFromScopes(scope, this::getAllColumnNamesFromFilters);
+ }
+
+ private Set getRequiredColumnNames(String scope)
+ {
+ return collectColumnNamesFromScopes(scope, this::getRequiredColumnNamesFromFilters);
+ }
+
+ private Set collectColumnNamesFromScopes(String scope, Function, Set> extractor)
+ {
+ String[] splitScope = scope.split("\\.");
+ ImmutableSet.Builder builder = ImmutableSet.builder();
+
+ builder.addAll(extractor.apply(filterMap.get(splitScope[0])));
+
+ if (splitScope.length > 1) {
+ builder.addAll(extractor.apply(filterMap.get(splitScope[0] + "." + splitScope[1])));
+ }
+
+ if (splitScope.length == 3) {
+ builder.addAll(extractor.apply(filterMap.get(scope)));
+ }
+
+ return builder.build();
+ }
+
+ private Set getAllColumnNamesFromFilters(List filters)
+ {
+ return null != filters ? filters.stream()
+ .map(filter -> filter.columnName)
+ .collect(toImmutableSet()) : ImmutableSet.of();
+ }
+
+ private Set getRequiredColumnNamesFromFilters(List filters)
+ {
+ return null != filters ? filters.stream()
+ .filter(filter -> filter.required)
+ .map(filter -> filter.columnName)
+ .collect(toImmutableSet()) : ImmutableSet.of();
+ }
+
+ private Map getAllMappingsFromFilters(List filters)
+ {
+ return null != filters
+ ? filters.stream()
+ .filter(filter -> null != filter.rangeMapping)
+ .collect(toImmutableMap(
+ filter -> filter.columnName,
+ filter -> filter.rangeMapping))
+ : ImmutableMap.of();
+ }
+
+ private static class RangeMapping
+ {
+ @JsonProperty("lowerBound")
+ public String lowerBound;
+
+ @JsonProperty("upperBound")
+ public String upperBound;
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof RangeMapping)) {
+ return false;
+ }
+ RangeMapping that = (RangeMapping) o;
+ return Objects.equals(lowerBound, that.lowerBound) &&
+ Objects.equals(upperBound, that.upperBound);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(lowerBound, upperBound);
+ }
+ }
+
+ private static class Filter
+ {
+ @JsonProperty("columnName")
+ public String columnName;
+
+ @JsonProperty("rangeMapping")
+ public RangeMapping rangeMapping;
+
+ @JsonProperty("required")
+ public boolean required;
+ }
+}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java
index fa0386376f12a..2a5144b61d812 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java
@@ -37,6 +37,7 @@ protected void setup(Binder binder)
binder.bind(ClpConnector.class).in(Scopes.SINGLETON);
binder.bind(ClpMetadata.class).in(Scopes.SINGLETON);
binder.bind(ClpRecordSetProvider.class).in(Scopes.SINGLETON);
+ binder.bind(ClpMetadataFilterProvider.class).in(Scopes.SINGLETON);
binder.bind(ClpSplitManager.class).in(Scopes.SINGLETON);
configBinder(binder).bindConfig(ClpConfig.class);
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java
index adab0bf71c9a8..157af459cb80e 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java
@@ -32,6 +32,7 @@
import java.util.Map;
import java.util.Optional;
+import static com.facebook.presto.plugin.clp.ClpConnectorFactory.CONNECTOR_NAME;
import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith;
import static java.util.Objects.requireNonNull;
@@ -41,11 +42,13 @@ public class ClpPlanOptimizer
private static final Logger log = Logger.get(ClpPlanOptimizer.class);
private final FunctionMetadataManager functionManager;
private final StandardFunctionResolution functionResolution;
+ private final ClpMetadataFilterProvider metadataFilterProvider;
- public ClpPlanOptimizer(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution)
+ public ClpPlanOptimizer(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpMetadataFilterProvider metadataFilterProvider)
{
this.functionManager = requireNonNull(functionManager, "functionManager is null");
this.functionResolution = requireNonNull(functionResolution, "functionResolution is null");
+ this.metadataFilterProvider = requireNonNull(metadataFilterProvider, "metadataFilterProvider is null");
}
@Override
@@ -75,15 +78,31 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context)
Map assignments = tableScanNode.getAssignments();
TableHandle tableHandle = tableScanNode.getTable();
ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle();
- ClpExpression clpExpression = node.getPredicate()
- .accept(new ClpFilterToKqlConverter(functionResolution, functionManager, assignments), null);
+ String scope = CONNECTOR_NAME + "." + clpTableHandle.getSchemaTableName().toString();
+ ClpExpression clpExpression = node.getPredicate().accept(
+ new ClpFilterToKqlConverter(
+ functionResolution,
+ functionManager,
+ assignments,
+ metadataFilterProvider.getColumnNames(scope)), null);
Optional kqlQuery = clpExpression.getPushDownExpression();
+ Optional metadataSqlQuery = clpExpression.getMetadataSqlQuery();
Optional remainingPredicate = clpExpression.getRemainingExpression();
+
+ // Perform required metadata filter checks before handling the KQL query (if kqlQuery
+ // isn't present, we'll return early, skipping subsequent checks).
+ metadataFilterProvider.checkContainsRequiredFilters(clpTableHandle.getSchemaTableName(), metadataSqlQuery.orElse(""));
+ if (metadataSqlQuery.isPresent()) {
+ metadataSqlQuery = Optional.of(metadataFilterProvider.remapFilterSql(scope, metadataSqlQuery.get()));
+ log.debug("Metadata SQL query: %s", metadataSqlQuery);
+ }
+
if (!kqlQuery.isPresent()) {
return node;
}
- log.debug("KQL query: %s", kqlQuery.get());
- ClpTableLayoutHandle clpTableLayoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery);
+ log.debug("KQL query: %s", kqlQuery);
+
+ ClpTableLayoutHandle clpTableLayoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery, metadataSqlQuery);
TableScanNode newTableScanNode = new TableScanNode(
tableScanNode.getSourceLocation(),
idAllocator.getNextId(),
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizerProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizerProvider.java
index f6f166eb7f657..a8fd1623d172b 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizerProvider.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizerProvider.java
@@ -28,12 +28,14 @@ public class ClpPlanOptimizerProvider
{
private final FunctionMetadataManager functionManager;
private final StandardFunctionResolution functionResolution;
+ private final ClpMetadataFilterProvider metadataFilterProvider;
@Inject
- public ClpPlanOptimizerProvider(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution)
+ public ClpPlanOptimizerProvider(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpMetadataFilterProvider metadataFilterProvider)
{
this.functionManager = functionManager;
this.functionResolution = functionResolution;
+ this.metadataFilterProvider = metadataFilterProvider;
}
@Override
@@ -45,6 +47,6 @@ public Set getLogicalPlanOptimizers()
@Override
public Set getPhysicalPlanOptimizers()
{
- return ImmutableSet.of(new ClpPlanOptimizer(functionManager, functionResolution));
+ return ImmutableSet.of(new ClpPlanOptimizer(functionManager, functionResolution, metadataFilterProvider));
}
}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java
index f1f7d7c708815..b82932f0c30fd 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java
@@ -27,12 +27,14 @@ public class ClpTableLayoutHandle
{
private final ClpTableHandle table;
private final Optional kqlQuery;
+ private final Optional metadataSql;
@JsonCreator
- public ClpTableLayoutHandle(@JsonProperty("table") ClpTableHandle table, @JsonProperty("kqlQuery") Optional kqlQuery)
+ public ClpTableLayoutHandle(@JsonProperty("table") ClpTableHandle table, @JsonProperty("kqlQuery") Optional kqlQuery, @JsonProperty("metadataFilterQuery") Optional metadataSql)
{
this.table = table;
this.kqlQuery = kqlQuery;
+ this.metadataSql = metadataSql;
}
@JsonProperty
@@ -47,6 +49,12 @@ public Optional getKqlQuery()
return kqlQuery;
}
+ @JsonProperty
+ public Optional getMetadataSql()
+ {
+ return metadataSql;
+ }
+
@Override
public boolean equals(Object o)
{
@@ -58,13 +66,14 @@ public boolean equals(Object o)
}
ClpTableLayoutHandle that = (ClpTableLayoutHandle) o;
return Objects.equals(table, that.table) &&
- Objects.equals(kqlQuery, that.kqlQuery);
+ Objects.equals(kqlQuery, that.kqlQuery) &&
+ Objects.equals(metadataSql, that.metadataSql);
}
@Override
public int hashCode()
{
- return Objects.hash(table, kqlQuery);
+ return Objects.hash(table, kqlQuery, metadataSql);
}
@Override
@@ -73,6 +82,7 @@ public String toString()
return toStringHelper(this)
.add("table", table)
.add("kqlQuery", kqlQuery)
+ .add("metadataSql", metadataSql)
.toString();
}
}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpSchemaTree.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpSchemaTree.java
index 90d0b911d0d41..c07137d1eb7dc 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpSchemaTree.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpSchemaTree.java
@@ -15,6 +15,7 @@
import com.facebook.presto.common.type.ArrayType;
import com.facebook.presto.common.type.RowType;
+import com.facebook.presto.common.type.TimestampType;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.plugin.clp.ClpColumnHandle;
import com.facebook.presto.spi.PrestoException;
@@ -119,9 +120,10 @@ private Type mapColumnType(byte type)
return DOUBLE;
case ClpString:
case VarString:
- case DateString:
case NullValue:
return VARCHAR;
+ case DateString:
+ return TimestampType.TIMESTAMP;
case UnstructuredArray:
return new ArrayType(VARCHAR);
case Boolean:
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java
index 964a992230e36..bba65420e46d1 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java
@@ -41,7 +41,7 @@ public class ClpMySqlSplitProvider
public static final String ARCHIVES_TABLE_SUFFIX = "_archives";
// SQL templates
- private static final String SQL_SELECT_ARCHIVES_TEMPLATE = format("SELECT `%s` FROM `%%s%%s%s`", ARCHIVES_TABLE_COLUMN_ID, ARCHIVES_TABLE_SUFFIX);
+ private static final String SQL_SELECT_ARCHIVES_TEMPLATE = format("SELECT `%s` FROM `%%s%%s%s` WHERE 1 = 1", ARCHIVES_TABLE_COLUMN_ID, ARCHIVES_TABLE_SUFFIX);
private static final Logger log = Logger.get(ClpMySqlSplitProvider.class);
@@ -69,6 +69,12 @@ public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle)
String tableName = clpTableHandle.getSchemaTableName().getTableName();
String archivePathQuery = format(SQL_SELECT_ARCHIVES_TEMPLATE, config.getMetadataTablePrefix(), tableName);
+ if (clpTableLayoutHandle.getMetadataSql().isPresent()) {
+ String metadataFilterQuery = clpTableLayoutHandle.getMetadataSql().get();
+ archivePathQuery += " AND (" + metadataFilterQuery + ")";
+ }
+ log.debug("Query for archive: %s", archivePathQuery);
+
try (Connection connection = getConnection()) {
// Fetch archive IDs and create splits
try (PreparedStatement statement = connection.prepareStatement(archivePathQuery); ResultSet resultSet = statement.executeQuery()) {
@@ -83,7 +89,9 @@ public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle)
log.warn("Database error while processing splits for %s: %s", tableName, e);
}
- return splits.build();
+ ImmutableList filteredSplits = splits.build();
+ log.debug("Number of splits: %s", filteredSplits.size());
+ return filteredSplits;
}
private Connection getConnection()
diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java
index 74c36cf934579..91fc3c780d941 100644
--- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java
+++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java
@@ -48,11 +48,13 @@ public final class ClpMetadataDbSetUp
public static final String METADATA_DB_TABLE_PREFIX = "clp_";
public static final String METADATA_DB_URL_TEMPLATE = "jdbc:h2:file:%s;MODE=MySQL;DATABASE_TO_UPPER=FALSE";
public static final String METADATA_DB_USER = "sa";
- public static final String ARCHIVE_STORAGE_DIRECTORY_BASE = "/tmp/archives/";
+ public static final String ARCHIVES_STORAGE_DIRECTORY_BASE = "/tmp/archives/";
private static final Logger log = Logger.get(ClpMetadataDbSetUp.class);
private static final String DATASETS_TABLE_NAME = METADATA_DB_TABLE_PREFIX + DATASETS_TABLE_SUFFIX;
+ private static final String ARCHIVES_TABLE_COLUMN_BEGIN_TIMESTAMP = "begin_timestamp";
private static final String ARCHIVES_TABLE_COLUMN_PAGINATION_ID = "pagination_id";
+ private static final String ARCHIVES_TABLE_COLUMN_END_TIMESTAMP = "end_timestamp";
private ClpMetadataDbSetUp()
{
@@ -118,7 +120,7 @@ public static ClpMetadata setupMetadata(DbHandle dbHandle, Map> splits)
+ public static ClpMySqlSplitProvider setupSplit(DbHandle dbHandle, Map> splits)
{
final String metadataDbUrl = format(METADATA_DB_URL_TEMPLATE, dbHandle.dbPath);
final String archiveTableFormat = METADATA_DB_TABLE_PREFIX + "%s" + ARCHIVES_TABLE_SUFFIX;
@@ -127,7 +129,7 @@ public static ClpMySqlSplitProvider setupSplit(DbHandle dbHandle, Map> tableSplits : splits.entrySet()) {
+ for (Map.Entry> tableSplits : splits.entrySet()) {
String tableName = tableSplits.getKey();
updateDatasetsTable(conn, tableName);
@@ -135,17 +137,28 @@ public static ClpMySqlSplitProvider setupSplit(DbHandle dbHandle, Map 0", "fare > 0", null, sessionHolder);
- testFilter("fare >= 0", "fare >= 0", null, sessionHolder);
- testFilter("fare < 0", "fare < 0", null, sessionHolder);
- testFilter("fare <= 0", "fare <= 0", null, sessionHolder);
- testFilter("fare = 0", "fare: 0", null, sessionHolder);
- testFilter("fare != 0", "NOT fare: 0", null, sessionHolder);
- testFilter("fare <> 0", "NOT fare: 0", null, sessionHolder);
- testFilter("0 < fare", "fare > 0", null, sessionHolder);
- testFilter("0 <= fare", "fare >= 0", null, sessionHolder);
- testFilter("0 > fare", "fare < 0", null, sessionHolder);
- testFilter("0 >= fare", "fare <= 0", null, sessionHolder);
- testFilter("0 = fare", "fare: 0", null, sessionHolder);
- testFilter("0 != fare", "NOT fare: 0", null, sessionHolder);
- testFilter("0 <> fare", "NOT fare: 0", null, sessionHolder);
+ // Numeric comparisons
+ testPushDown(sessionHolder, "fare > 0", "fare > 0", null);
+ testPushDown(sessionHolder, "fare >= 0", "fare >= 0", null);
+ testPushDown(sessionHolder, "fare < 0", "fare < 0", null);
+ testPushDown(sessionHolder, "fare <= 0", "fare <= 0", null);
+ testPushDown(sessionHolder, "fare = 0", "fare: 0", null);
+ testPushDown(sessionHolder, "fare != 0", "NOT fare: 0", null);
+ testPushDown(sessionHolder, "fare <> 0", "NOT fare: 0", null);
+ testPushDown(sessionHolder, "0 < fare", "fare > 0", null);
+ testPushDown(sessionHolder, "0 <= fare", "fare >= 0", null);
+ testPushDown(sessionHolder, "0 > fare", "fare < 0", null);
+ testPushDown(sessionHolder, "0 >= fare", "fare <= 0", null);
+ testPushDown(sessionHolder, "0 = fare", "fare: 0", null);
+ testPushDown(sessionHolder, "0 != fare", "NOT fare: 0", null);
+ testPushDown(sessionHolder, "0 <> fare", "NOT fare: 0", null);
}
@Test
- public void testOrPushdown()
+ public void testBetweenPushDown()
{
SessionHolder sessionHolder = new SessionHolder();
- testFilter("fare > 0 OR city.Name like 'b%'", "(fare > 0 OR city.Name: \"b*\")", null, sessionHolder);
- testFilter(
+ // Normal cases
+ testPushDown(sessionHolder, "fare BETWEEN 0 AND 5", "fare >= 0 AND fare <= 5", null);
+ testPushDown(sessionHolder, "fare BETWEEN 5 AND 0", "fare >= 5 AND fare <= 0", null);
+
+ // No push down for non-constant expressions
+ testPushDown(
+ sessionHolder,
+ "fare BETWEEN (city.Region.Id - 5) AND (city.Region.Id + 5)",
+ null,
+ "fare BETWEEN (city.Region.Id - 5) AND (city.Region.Id + 5)");
+
+ // If the last two arguments of BETWEEN are not numeric constants, then the CLP connector
+ // won't push them down.
+ testPushDown(sessionHolder, "city.Name BETWEEN 'a' AND 'b'", null, "city.Name BETWEEN 'a' AND 'b'");
+ }
+
+ @Test
+ public void testOrPushDown()
+ {
+ SessionHolder sessionHolder = new SessionHolder();
+
+ // OR conditions with partial push down support
+ testPushDown(sessionHolder, "fare > 0 OR city.Name like 'b%'", "(fare > 0 OR city.Name: \"b*\")", null);
+ testPushDown(
+ sessionHolder,
"lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1",
null,
- "(lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1)",
- sessionHolder);
+ "(lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1)");
// Multiple ORs
- testFilter(
+ testPushDown(
+ sessionHolder,
"fare > 0 OR city.Name like 'b%' OR lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1",
null,
- "fare > 0 OR city.Name like 'b%' OR lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1",
- sessionHolder);
- testFilter(
+ "fare > 0 OR city.Name like 'b%' OR lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1");
+ testPushDown(
+ sessionHolder,
"fare > 0 OR city.Name like 'b%' OR city.Region.Id != 1",
"((fare > 0 OR city.Name: \"b*\") OR NOT city.Region.Id: 1)",
- null,
- sessionHolder);
+ null);
}
@Test
- public void testAndPushdown()
+ public void testAndPushDown()
{
SessionHolder sessionHolder = new SessionHolder();
- testFilter("fare > 0 AND city.Name like 'b%'", "(fare > 0 AND city.Name: \"b*\")", null, sessionHolder);
- testFilter(
- "lower(city.Region.Name) = 'hello world' AND city.Region.Id != 1",
- "(NOT city.Region.Id: 1)",
- "lower(city.Region.Name) = 'hello world'",
- sessionHolder);
+ // AND conditions with partial/full push down
+ testPushDown(sessionHolder, "fare > 0 AND city.Name like 'b%'", "(fare > 0 AND city.Name: \"b*\")", null);
+
+ testPushDown(sessionHolder, "lower(city.Region.Name) = 'hello world' AND city.Region.Id != 1", "(NOT city.Region.Id: 1)", "lower(city.Region.Name) = 'hello world'");
// Multiple ANDs
- testFilter(
+ testPushDown(
+ sessionHolder,
"fare > 0 AND city.Name like 'b%' AND lower(city.Region.Name) = 'hello world' AND city.Region.Id != 1",
"(((fare > 0 AND city.Name: \"b*\")) AND NOT city.Region.Id: 1)",
- "(lower(city.Region.Name) = 'hello world')",
- sessionHolder);
- testFilter(
+ "(lower(city.Region.Name) = 'hello world')");
+
+ testPushDown(
+ sessionHolder,
"fare > 0 AND city.Name like '%b%' AND lower(city.Region.Name) = 'hello world' AND city.Region.Id != 1",
"(((fare > 0)) AND NOT city.Region.Id: 1)",
- "city.Name like '%b%' AND lower(city.Region.Name) = 'hello world'",
- sessionHolder);
+ "city.Name like '%b%' AND lower(city.Region.Name) = 'hello world'");
}
@Test
- public void testNotPushdown()
+ public void testNotPushDown()
{
SessionHolder sessionHolder = new SessionHolder();
- testFilter("city.Region.Name NOT LIKE 'hello%'", "NOT city.Region.Name: \"hello*\"", null, sessionHolder);
- testFilter("NOT (city.Region.Name LIKE 'hello%')", "NOT city.Region.Name: \"hello*\"", null, sessionHolder);
- testFilter("city.Name != 'hello world'", "NOT city.Name: \"hello world\"", null, sessionHolder);
- testFilter("city.Name <> 'hello world'", "NOT city.Name: \"hello world\"", null, sessionHolder);
- testFilter("NOT (city.Name = 'hello world')", "NOT city.Name: \"hello world\"", null, sessionHolder);
- testFilter("fare != 0", "NOT fare: 0", null, sessionHolder);
- testFilter("fare <> 0", "NOT fare: 0", null, sessionHolder);
- testFilter("NOT (fare = 0)", "NOT fare: 0", null, sessionHolder);
+ // NOT and inequality predicates
+ testPushDown(sessionHolder, "city.Region.Name NOT LIKE 'hello%'", "NOT city.Region.Name: \"hello*\"", null);
+ testPushDown(sessionHolder, "NOT (city.Region.Name LIKE 'hello%')", "NOT city.Region.Name: \"hello*\"", null);
+ testPushDown(sessionHolder, "city.Name != 'hello world'", "NOT city.Name: \"hello world\"", null);
+ testPushDown(sessionHolder, "city.Name <> 'hello world'", "NOT city.Name: \"hello world\"", null);
+ testPushDown(sessionHolder, "NOT (city.Name = 'hello world')", "NOT city.Name: \"hello world\"", null);
+ testPushDown(sessionHolder, "fare != 0", "NOT fare: 0", null);
+ testPushDown(sessionHolder, "fare <> 0", "NOT fare: 0", null);
+ testPushDown(sessionHolder, "NOT (fare = 0)", "NOT fare: 0", null);
// Multiple NOTs
- testFilter("NOT (NOT fare = 0)", "NOT NOT fare: 0", null, sessionHolder);
- testFilter("NOT (fare = 0 AND city.Name = 'hello world')", "NOT (fare: 0 AND city.Name: \"hello world\")", null, sessionHolder);
- testFilter("NOT (fare = 0 OR city.Name = 'hello world')", "NOT (fare: 0 OR city.Name: \"hello world\")", null, sessionHolder);
+ testPushDown(sessionHolder, "NOT (NOT fare = 0)", "NOT NOT fare: 0", null);
+ testPushDown(sessionHolder, "NOT (fare = 0 AND city.Name = 'hello world')", "NOT (fare: 0 AND city.Name: \"hello world\")", null);
+ testPushDown(sessionHolder, "NOT (fare = 0 OR city.Name = 'hello world')", "NOT (fare: 0 OR city.Name: \"hello world\")", null);
}
@Test
- public void testInPushdown()
+ public void testInPushDown()
{
SessionHolder sessionHolder = new SessionHolder();
- testFilter("city.Name IN ('hello world', 'hello world 2')", "(city.Name: \"hello world\" OR city.Name: \"hello world 2\")", null, sessionHolder);
+ // IN predicate
+ testPushDown(sessionHolder, "city.Name IN ('hello world', 'hello world 2')", "(city.Name: \"hello world\" OR city.Name: \"hello world 2\")", null);
}
@Test
- public void testIsNullPushdown()
+ public void testIsNullPushDown()
{
SessionHolder sessionHolder = new SessionHolder();
- testFilter("city.Name IS NULL", "NOT city.Name: *", null, sessionHolder);
- testFilter("city.Name IS NOT NULL", "NOT NOT city.Name: *", null, sessionHolder);
- testFilter("NOT (city.Name IS NULL)", "NOT NOT city.Name: *", null, sessionHolder);
+ // IS NULL / IS NOT NULL predicates
+ testPushDown(sessionHolder, "city.Name IS NULL", "NOT city.Name: *", null);
+ testPushDown(sessionHolder, "city.Name IS NOT NULL", "NOT NOT city.Name: *", null);
+ testPushDown(sessionHolder, "NOT (city.Name IS NULL)", "NOT NOT city.Name: *", null);
}
@Test
- public void testComplexPushdown()
+ public void testComplexPushDown()
{
SessionHolder sessionHolder = new SessionHolder();
- testFilter(
+ // Complex AND/OR with partial pushdown
+ testPushDown(
+ sessionHolder,
"(fare > 0 OR city.Name like 'b%') AND (lower(city.Region.Name) = 'hello world' OR city.Name IS NULL)",
"((fare > 0 OR city.Name: \"b*\"))",
- "(lower(city.Region.Name) = 'hello world' OR city.Name IS NULL)",
- sessionHolder);
- testFilter(
+ "(lower(city.Region.Name) = 'hello world' OR city.Name IS NULL)");
+
+ testPushDown(
+ sessionHolder,
"city.Region.Id = 1 AND (fare > 0 OR city.Name NOT like 'b%') AND (lower(city.Region.Name) = 'hello world' OR city.Name IS NULL)",
"((city.Region.Id: 1 AND (fare > 0 OR NOT city.Name: \"b*\")))",
- "lower(city.Region.Name) = 'hello world' OR city.Name IS NULL",
- sessionHolder);
+ "lower(city.Region.Name) = 'hello world' OR city.Name IS NULL");
}
- private void testFilter(String sqlExpression, String expectedKqlExpression, String expectedRemainingExpression, SessionHolder sessionHolder)
+ @Test
+ public void testMetadataSqlGeneration()
+ {
+ SessionHolder sessionHolder = new SessionHolder();
+ Set testMetadataFilterColumns = ImmutableSet.of("fare");
+
+ // Normal case
+ testPushDown(
+ sessionHolder,
+ "(fare > 0 AND city.Name like 'b%')",
+ "(fare > 0 AND city.Name: \"b*\")",
+ "(\"fare\" > 0)",
+ testMetadataFilterColumns);
+
+ // With BETWEEN
+ testPushDown(
+ sessionHolder,
+ "((fare BETWEEN 0 AND 5) AND city.Name like 'b%')",
+ "(fare >= 0 AND fare <= 5 AND city.Name: \"b*\")",
+ "(\"fare\" >= 0 AND \"fare\" <= 5)",
+ testMetadataFilterColumns);
+
+ // The cases of that the metadata filter column exist but cannot be push down
+ testPushDown(
+ sessionHolder,
+ "(fare > 0 OR city.Name like 'b%')",
+ "(fare > 0 OR city.Name: \"b*\")",
+ null,
+ testMetadataFilterColumns);
+ testPushDown(
+ sessionHolder,
+ "(fare > 0 AND city.Name like 'b%') OR city.Region.Id = 1",
+ "((fare > 0 AND city.Name: \"b*\") OR city.Region.Id: 1)",
+ null,
+ testMetadataFilterColumns);
+
+ // Complicated case
+ testPushDown(
+ sessionHolder,
+ "fare = 0 AND (city.Name like 'b%' OR city.Region.Id = 1)",
+ "(fare: 0 AND (city.Name: \"b*\" OR city.Region.Id: 1))",
+ "(\"fare\" = 0)",
+ testMetadataFilterColumns);
+ }
+
+ private void testPushDown(SessionHolder sessionHolder, String sql, String expectedKql, String expectedRemaining)
+ {
+ ClpExpression clpExpression = tryPushDown(sql, sessionHolder, ImmutableSet.of());
+ testFilter(clpExpression, expectedKql, expectedRemaining, sessionHolder);
+ }
+
+ private void testPushDown(SessionHolder sessionHolder, String sql, String expectedKql, String expectedMetadataSqlQuery, Set metadataFilterColumns)
+ {
+ ClpExpression clpExpression = tryPushDown(sql, sessionHolder, metadataFilterColumns);
+ testFilter(clpExpression, expectedKql, null, sessionHolder);
+ if (expectedMetadataSqlQuery != null) {
+ assertTrue(clpExpression.getMetadataSqlQuery().isPresent());
+ assertEquals(clpExpression.getMetadataSqlQuery().get(), expectedMetadataSqlQuery);
+ }
+ else {
+ assertFalse(clpExpression.getMetadataSqlQuery().isPresent());
+ }
+ }
+
+ private ClpExpression tryPushDown(String sqlExpression, SessionHolder sessionHolder, Set metadataFilterColumns)
+ {
+ RowExpression pushDownExpression = getRowExpression(sqlExpression, sessionHolder);
+ return pushDownExpression.accept(
+ new ClpFilterToKqlConverter(
+ standardFunctionResolution,
+ functionAndTypeManager,
+ variableToColumnHandleMap,
+ metadataFilterColumns),
+ null);
+ }
+
+ private void testFilter(
+ ClpExpression clpExpression,
+ String expectedKqlExpression,
+ String expectedRemainingExpression,
+ SessionHolder sessionHolder)
{
- RowExpression actualExpression = getRowExpression(sqlExpression, sessionHolder);
- ClpExpression clpExpression = actualExpression.accept(new ClpFilterToKqlConverter(standardFunctionResolution, functionAndTypeManager, variableToColumnHandleMap), null);
Optional kqlExpression = clpExpression.getPushDownExpression();
Optional remainingExpression = clpExpression.getRemainingExpression();
if (expectedKqlExpression != null) {
assertTrue(kqlExpression.isPresent());
assertEquals(kqlExpression.get(), expectedKqlExpression);
}
+ else {
+ assertFalse(kqlExpression.isPresent());
+ }
if (expectedRemainingExpression != null) {
assertTrue(remainingExpression.isPresent());
diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadataFilterConfig.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadataFilterConfig.java
new file mode 100644
index 0000000000000..024d5fdade55a
--- /dev/null
+++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadataFilterConfig.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.plugin.clp;
+
+import com.facebook.presto.spi.PrestoException;
+import com.facebook.presto.spi.SchemaTableName;
+import com.google.common.collect.ImmutableSet;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.file.Paths;
+import java.util.Set;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertThrows;
+
+@Test(singleThreaded = true)
+public class TestClpMetadataFilterConfig
+{
+ private String filterConfigPath;
+
+ @BeforeMethod
+ public void setUp() throws IOException, URISyntaxException
+ {
+ URL resource = getClass().getClassLoader().getResource("test-metadata-filter.json");
+ if (resource == null) {
+ throw new FileNotFoundException("test-metadata-filter.json not found in resources");
+ }
+
+ filterConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString();
+ }
+
+ @Test
+ public void checkRequiredFilters()
+ {
+ ClpConfig config = new ClpConfig();
+ config.setMetadataFilterConfig(filterConfigPath);
+ ClpMetadataFilterProvider filterProvider = new ClpMetadataFilterProvider(config);
+ SchemaTableName testTableSchemaTableName = new SchemaTableName("default", "table_1");
+ assertThrows(PrestoException.class, () -> filterProvider.checkContainsRequiredFilters(
+ testTableSchemaTableName,
+ "(\"level\" >= 1 AND \"level\" <= 3)"));
+ filterProvider.checkContainsRequiredFilters(
+ testTableSchemaTableName,
+ "(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)");
+ }
+
+ @Test
+ public void getFilterNames()
+ {
+ ClpConfig config = new ClpConfig();
+ config.setMetadataFilterConfig(filterConfigPath);
+ ClpMetadataFilterProvider filterProvider = new ClpMetadataFilterProvider(config);
+ Set catalogFilterNames = filterProvider.getColumnNames("clp");
+ assertEquals(ImmutableSet.of("level"), catalogFilterNames);
+ Set schemaFilterNames = filterProvider.getColumnNames("clp.default");
+ assertEquals(ImmutableSet.of("level", "author"), schemaFilterNames);
+ Set tableFilterNames = filterProvider.getColumnNames("clp.default.table_1");
+ assertEquals(ImmutableSet.of("level", "author", "msg.timestamp", "file_name"), tableFilterNames);
+ }
+
+ @Test
+ public void remapSql()
+ {
+ ClpConfig config = new ClpConfig();
+ config.setMetadataFilterConfig(filterConfigPath);
+ ClpMetadataFilterProvider filterProvider = new ClpMetadataFilterProvider(config);
+
+ String metadataFilterSql1 = "(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)";
+ String remappedSql1 = filterProvider.remapFilterSql("clp.default.table_1", metadataFilterSql1);
+ assertEquals(remappedSql1, "(end_timestamp > 1234 AND begin_timestamp < 5678)");
+
+ String metadataFilterSql2 = "(\"msg.timestamp\" >= 1234 AND \"msg.timestamp\" <= 5678)";
+ String remappedSql2 = filterProvider.remapFilterSql("clp.default.table_1", metadataFilterSql2);
+ assertEquals(remappedSql2, "(end_timestamp >= 1234 AND begin_timestamp <= 5678)");
+
+ String metadataFilterSql3 = "(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" <= 5678)";
+ String remappedSql3 = filterProvider.remapFilterSql("clp.default.table_1", metadataFilterSql3);
+ assertEquals(remappedSql3, "(end_timestamp > 1234 AND begin_timestamp <= 5678)");
+
+ String metadataFilterSql4 = "(\"msg.timestamp\" >= 1234 AND \"msg.timestamp\" < 5678)";
+ String remappedSql4 = filterProvider.remapFilterSql("clp.default.table_1", metadataFilterSql4);
+ assertEquals(remappedSql4, "(end_timestamp >= 1234 AND begin_timestamp < 5678)");
+
+ String metadataFilterSql5 = "(\"msg.timestamp\" = 1234)";
+ String remappedSql5 = filterProvider.remapFilterSql("clp.default.table_1", metadataFilterSql5);
+ assertEquals(remappedSql5, "((begin_timestamp <= 1234 AND end_timestamp >= 1234))");
+ }
+}
diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java
index 7dcd3c768d4e9..fed281fef9453 100644
--- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java
+++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java
@@ -15,6 +15,7 @@
import com.facebook.presto.plugin.clp.split.ClpSplitProvider;
import com.facebook.presto.spi.SchemaTableName;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
@@ -27,10 +28,13 @@
import java.util.Optional;
import static com.facebook.presto.plugin.clp.ClpMetadata.DEFAULT_SCHEMA_NAME;
-import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.ARCHIVE_STORAGE_DIRECTORY_BASE;
+import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.ARCHIVES_STORAGE_DIRECTORY_BASE;
+import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.ArchivesTableRow;
import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.DbHandle;
import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.getDbHandle;
import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.setupSplit;
+import static com.google.common.collect.ImmutableList.toImmutableList;
+import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static org.testng.Assert.assertEquals;
@Test(singleThreaded = true)
@@ -38,7 +42,7 @@ public class TestClpSplit
{
private DbHandle dbHandle;
private ClpSplitProvider clpSplitProvider;
- private Map> tableSplits;
+ private Map> tableSplits;
@BeforeMethod
public void setUp()
@@ -51,10 +55,16 @@ public void setUp()
for (int i = 0; i < numKeys; i++) {
String key = "test_" + i;
- List values = new ArrayList<>();
+ List values = new ArrayList<>();
for (int j = 0; j < numValuesPerKey; j++) {
- values.add("id_" + j);
+ // We generate synthetic begin_timestamp and end_timestamp values for each split
+ // by offsetting two base timestamps (1700000000000L and 1705000000000L) with a
+ // fixed increment per split (10^10 * j).
+ values.add(new ArchivesTableRow(
+ "id_" + j,
+ 1700000000000L + 10000000000L * j,
+ 1705000000000L + 10000000000L * j));
}
tableSplits.put(key, values);
@@ -71,24 +81,67 @@ public void tearDown()
@Test
public void testListSplits()
{
- for (Map.Entry> entry : tableSplits.entrySet()) {
- String tableName = entry.getKey();
- String tablePath = ARCHIVE_STORAGE_DIRECTORY_BASE + tableName;
- List expectedSplits = entry.getValue();
- ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(
- new ClpTableHandle(new SchemaTableName(DEFAULT_SCHEMA_NAME, tableName), tablePath), Optional.empty());
- List splits = clpSplitProvider.listSplits(layoutHandle);
- assertEquals(splits.size(), expectedSplits.size());
-
- ImmutableSet actualSplitPaths = splits.stream()
- .map(ClpSplit::getPath)
- .collect(ImmutableSet.toImmutableSet());
-
- ImmutableSet expectedSplitPaths = expectedSplits.stream()
- .map(split -> tablePath + "/" + split)
- .collect(ImmutableSet.toImmutableSet());
-
- assertEquals(actualSplitPaths, expectedSplitPaths);
+ for (Map.Entry> entry : tableSplits.entrySet()) {
+ // Without metadata filters
+ compareListSplitsResult(entry, Optional.empty(), ImmutableList.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
+
+ // query_begin_ts < archives_min_ts && query_end_ts > archives_max_ts
+ compareListSplitsResult(entry, Optional.of("(end_timestamp > 1699999999999 AND begin_timestamp < 1795000000001)"), ImmutableList.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
+
+ // query_begin_ts < archives_min_ts && query_end_ts > archives_min_ts && query_end_ts < archives_max_ts
+ compareListSplitsResult(entry, Optional.of("(end_timestamp > 1699999999999 AND begin_timestamp < 1744999999999)"), ImmutableList.of(0, 1, 2, 3, 4));
+
+ // query_end_ts < archives_min_ts
+ compareListSplitsResult(entry, Optional.of("(begin_timestamp < 1699999999999)"), ImmutableList.of());
+
+ // query_begin_ts > archives_min_ts && query_begin_ts < archives_max_ts && query_end_ts > archives_max_ts
+ compareListSplitsResult(entry, Optional.of("(end_timestamp > 1745000000001 AND begin_timestamp < 1795000000001)"), ImmutableList.of(5, 6, 7, 8, 9));
+
+ // query_begin_ts > archives_min_ts && query_begin_ts < archives_max_ts && query_end_ts < archives_max_ts
+ compareListSplitsResult(entry, Optional.of("(end_timestamp > 1745000000001 AND begin_timestamp <= 1770000000000)"), ImmutableList.of(5, 6, 7));
+
+ // query_begin_ts > archives_max_ts
+ compareListSplitsResult(entry, Optional.of("(end_timestamp > 1795000000001)"), ImmutableList.of());
+
+ // query_begin_ts = archive_min_ts
+ compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1700000000000 AND begin_timestamp <= 1700000000000)"), ImmutableList.of(0));
+
+ // query_begin_ts = archive_max_ts
+ compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1795000000000 AND begin_timestamp <= 1795000000000)"), ImmutableList.of(9));
+
+ // query_ts = x && x > archive_min_ts && x < archive_max_ts (non-exist)
+ compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1715000000001 AND begin_timestamp <= 1715000000001)"), ImmutableList.of());
+
+ // query_ts = x && x > archive_min_ts && x < archive_max_ts
+ compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1715000000000 AND begin_timestamp <= 1715000000000)"), ImmutableList.of(1));
}
}
+
+ private void compareListSplitsResult(
+ Map.Entry> entry,
+ Optional metadataSql,
+ List expectedSplitIds)
+ {
+ String tableName = entry.getKey();
+ String tablePath = ARCHIVES_STORAGE_DIRECTORY_BASE + tableName;
+ ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(
+ new ClpTableHandle(new SchemaTableName(DEFAULT_SCHEMA_NAME, tableName), tablePath),
+ Optional.empty(),
+ metadataSql);
+ List expectedSplits = expectedSplitIds.stream()
+ .map(expectedSplitId -> entry.getValue().get(expectedSplitId))
+ .collect(toImmutableList());
+ List actualSplits = clpSplitProvider.listSplits(layoutHandle);
+ assertEquals(actualSplits.size(), expectedSplits.size());
+
+ ImmutableSet actualSplitPaths = actualSplits.stream()
+ .map(ClpSplit::getPath)
+ .collect(toImmutableSet());
+
+ ImmutableSet expectedSplitPaths = expectedSplits.stream()
+ .map(split -> tablePath + "/" + split.getId())
+ .collect(toImmutableSet());
+
+ assertEquals(actualSplitPaths, expectedSplitPaths);
+ }
}
diff --git a/presto-clp/src/test/resources/test-metadata-filter.json b/presto-clp/src/test/resources/test-metadata-filter.json
new file mode 100644
index 0000000000000..266a23b1a26e7
--- /dev/null
+++ b/presto-clp/src/test/resources/test-metadata-filter.json
@@ -0,0 +1,25 @@
+{
+ "clp": [
+ {
+ "columnName": "level"
+ }
+ ],
+ "clp.default": [
+ {
+ "columnName": "author"
+ }
+ ],
+ "clp.default.table_1": [
+ {
+ "columnName": "msg.timestamp",
+ "rangeMapping": {
+ "lowerBound": "begin_timestamp",
+ "upperBound": "end_timestamp"
+ },
+ "required": true
+ },
+ {
+ "columnName": "file_name"
+ }
+ ]
+}
diff --git a/presto-docs/src/main/sphinx/connector/clp.rst b/presto-docs/src/main/sphinx/connector/clp.rst
index e1beb5c180c1b..666c601b85f5d 100644
--- a/presto-docs/src/main/sphinx/connector/clp.rst
+++ b/presto-docs/src/main/sphinx/connector/clp.rst
@@ -29,6 +29,7 @@ following contents, modifying the properties as appropriate:
clp.metadata-db-name=clp_db
clp.metadata-db-user=clp_user
clp.metadata-db-password=clp_password
+ clp.metadata-filter-config=/path/to/metadata-filter-config.json
clp.metadata-table-prefix=clp_
clp.split-provider-type=mysql
@@ -68,6 +69,7 @@ Property Name Description
database name is not specified in the URL.
``clp.metadata-db-password`` The password for the metadata database user. This option is required if
``clp.metadata-provider-type`` is set to ``mysql``.
+``clp.metadata-filter-config`` The absolute path of the metadata filter config file.
``clp.metadata-table-prefix`` A string prefix prepended to all metadata table names when querying the
database. Useful for namespacing or avoiding collisions. This option is
required if ``clp.metadata-provider-type`` is set to ``mysql``.
@@ -94,6 +96,118 @@ If you prefer to use a different source--or the same source with a custom implem
implementations of the ``ClpMetadataProvider`` and ``ClpSplitProvider`` interfaces, and configure the connector
accordingly.
+Metadata Filter Config File
+----------------------------
+
+The metadata filter config file allows you to configure the set of columns that can be used to filter out irrelevant
+splits (CLP archives) when querying CLP's metadata database. This can significantly improve performance by reducing the
+amount of data that needs to be scanned. For a given query, the connector will translate any supported filter predicates
+that involve the configured columns into a query against CLP's metadata database.
+
+The configuration is a JSON object where each key under the root represents a :ref:`scope` and each scope maps
+to an array of :ref:`filter configs`.
+
+
+.. _scopes:
+
+Scopes
+^^^^^^
+
+A *scope* can be one of the following:
+
+- A catalog name
+- A fully-qualified schema name
+- A fully-qualified table name
+
+Filter configs under a particular scope will apply to all child scopes. For example, filter configs at the schema level
+will apply to all tables within that schema.
+
+.. _filter-configs:
+
+Filter Configs
+^^^^^^^^^^^^^^
+
+Each `filter config` indicates how a *data column*---a column in the Presto table---should be mapped to a *metadata
+column*---a column in CLP's metadata database. In most cases, the data column and the metadata column will have the same
+name; but in some cases, the data column may be remapped.
+
+For example, an integer data column (e.g., ``timestamp``), may be remapped to a pair of metadata columns that represent
+the range of possible values (e.g., ``begin_timestamp`` and ``end_timestamp``) of the data column within a split.
+
+Each *filter config* has the following properties:
+
+- ``columnName``: The data column's name.
+
+ .. note:: Currently, only numeric-type columns can be used as metadata filters.
+
+- ``rangeMapping`` *(optional)*: an object with the following properties:
+
+ .. note:: This option is only valid if the column has a numeric type.
+
+ - ``lowerBound``: The metadata column that represents the lower bound of values in a split for the data column.
+ - ``upperBound``: The metadata column that represents the upper bound of values in a split for the data column.
+
+
+- ``required`` *(optional, defaults to false)*: indicates whether the filter **must** be present in the translated
+ metadata filter SQL query. If a required filter is missing or cannot be pushed down, the query will be rejected.
+
+
+Example
+^^^^^^^
+
+The code block shows an example metadata filter config file:
+
+.. code-block:: json
+
+ {
+ "clp": [
+ {
+ "columnName": "level"
+ }
+ ],
+ "clp.default": [
+ {
+ "columnName": "author"
+ }
+ ],
+ "clp.default.table_1": [
+ {
+ "columnName": "msg.timestamp",
+ "rangeMapping": {
+ "lowerBound": "begin_timestamp",
+ "upperBound": "end_timestamp"
+ },
+ "required": true
+ },
+ {
+ "columnName": "file_name"
+ }
+ ]
+ }
+
+- The first key-value pair adds the following filter configs for all schemas and tables under the ``clp`` catalog:
+
+ - The column ``level`` is used as-is without remapping.
+
+- The second key-value pair adds the following filter configs for all tables under the ``clp.default`` schema:
+
+ - The column ``author`` is used as-is without remapping.
+
+- The third key-value pair adds two filter configs for the table ``clp.default.table_1``:
+
+ - The column ``msg.timestamp`` is remapped via a ``rangeMapping`` to the metadata columns ``begin_timestamp`` and
+ ``end_timestamp``, and is required to exist in every query.
+ - The column ``file_name`` is used as-is without remapping.
+
+Supported SQL Expressions
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The connector supports translations from a Presto SQL query to the metadata filter query for the following expressions:
+
+- Comparisons between variables and constants (e.g., ``=``, ``!=``, ``<``, ``>``, ``<=``, ``>=``).
+- Dereferencing fields from row-typed variables.
+- Logical operators: ``AND``, ``OR``, and ``NOT``.
+
Data Types
----------