Skip to content

Commit 3d3eda4

Browse files
Apply suggestions from code review
Co-authored-by: kirkrodrigues <[email protected]>
1 parent 7b7811b commit 3d3eda4

File tree

5 files changed

+99
-64
lines changed

5 files changed

+99
-64
lines changed

presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,21 @@
1818
import java.util.Optional;
1919

2020
/**
21-
* Represents the result of converting a Presto RowExpression into a CLP-compatible KQL query and
22-
* a SQL query for filtering splits using a metadata database. In every case, `pushDownExpression`
23-
* represents the part of the RowExpression that could be converted to a KQL expression, and
24-
* `remainingExpression` represents the part that could not be converted.
21+
* Represents the result of:
22+
* <ul>
23+
* <li>splitting a {@link RowExpression} into an expression that can be pushed down to CLP
24+
* (`pushDownExpression`) and any remaining expression that cannot be
25+
* (`remainingExpression`).</li>
26+
* <li>a SQL query for filtering splits using CLP's metadata database.</li>
27+
* </ul>
2528
*/
2629
public class ClpExpression
2730
{
2831
// Optional KQL query or column name representing the fully or partially translatable part of the expression.
2932
private final Optional<String> pushDownExpression;
3033

31-
// Optional SQL string extracted from the query plan, which is only made of given metadata columns.
34+
// Optional SQL string extracted from the query plan, which is only made of up of columns in
35+
// CLP's metadata database.
3236
private final Optional<String> metadataSqlQuery;
3337

3438
// The remaining (non-translatable) portion of the RowExpression, if any.

presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import com.facebook.presto.common.type.RowType;
1919
import com.facebook.presto.common.type.Type;
2020
import com.facebook.presto.common.type.VarcharType;
21-
import com.facebook.presto.plugin.clp.metadata.ClpSchemaTree;
2221
import com.facebook.presto.spi.ColumnHandle;
2322
import com.facebook.presto.spi.PrestoException;
2423
import com.facebook.presto.spi.function.FunctionHandle;
@@ -64,13 +63,16 @@
6463
import static java.util.Objects.requireNonNull;
6564

6665
/**
67-
* A translator to translate Presto RowExpressions into KQL (Kibana Query Language) filters used as
68-
* CLP queries and SQL filters used for filtering splits by metadata database. This is used
69-
* primarily for pushing down supported filters to the CLP engine. This class implements the
70-
* {@link RowExpressionVisitor} interface and recursively walks Presto filter expressions,
71-
* attempting to convert supported expressions into corresponding KQL filter strings and SQL
72-
* filter strings for metadata filtering. Any part of the expression that cannot be translated to
73-
* KQL is preserved as a "remaining expression" for potential fallback processing.
66+
* A translator to translate Presto {@link RowExpression}s into:
67+
* <ul>
68+
* <li>KQL (Kibana Query Language) filters used to push down supported filters to the CLP
69+
* engine.</li>
70+
* <li>SQL filters used for filtering splits in CLP's metadata database.</li>
71+
* </ul>
72+
* This class implements the {@link RowExpressionVisitor} interface and recursively walks Presto
73+
* filter expressions, attempting to convert supported expressions into corresponding KQL filter
74+
* strings and SQL filter strings for metadata filtering. Any part of the expression that cannot be
75+
* translated to KQL is preserved as a "remaining expression" for potential fallback processing.
7476
* <p></p>
7577
* Supported translations for KQL include:
7678
* <ul>
@@ -84,8 +86,8 @@
8486
* <li>Dereferencing fields from row-typed variables.</li>
8587
* <li>Logical operators AND, OR, and NOT.</li>
8688
* </ul>
87-
*
88-
* Supported translations for Metadata SQL include:
89+
* <p></p>
90+
* Supported translations for SQL include:
8991
* <ul>
9092
* <li>Comparisons between variables and constants (e.g., =, !=, <, >, <=, >=).</li>
9193
* <li>Dereferencing fields from row-typed variables.</li>
@@ -209,9 +211,11 @@ private String getVariableName(VariableReferenceExpression variable)
209211
* Handles the <code>BETWEEN</code> expression.
210212
* <p></p>
211213
* The translation is only performed if:
212-
* <ul>All arguments must have numeric types.</ul>
213-
* <ul>The first argument is a variable reference expression.</ul>
214-
* <ul>The second and third arguments are constant expressions.</ul>
214+
* <ul>
215+
* <li>all arguments have numeric types.</li>
216+
* <li>the first argument is a variable reference expression.</li>
217+
* <li>the second and third arguments are constant expressions.</li>
218+
* </ul>
215219
* <p></p>
216220
* Example: <code>col1 BETWEEN 0 AND 5</code> → <code>col1 >= 0 AND col1 <= 5</code>
217221
*
@@ -234,9 +238,9 @@ private ClpExpression handleBetween(CallExpression node)
234238
|| !(third instanceof ConstantExpression)) {
235239
return new ClpExpression(node);
236240
}
237-
if (!isNumericType(first.getType())
238-
|| !isNumericType(second.getType())
239-
|| !isNumericType(third.getType())) {
241+
if (!isClpCompatibleNumericType(first.getType())
242+
|| !isClpCompatibleNumericType(second.getType())
243+
|| !isClpCompatibleNumericType(third.getType())) {
240244
return new ClpExpression(node);
241245
}
242246
Optional<String> variableOpt = first.accept(this, null).getPushDownExpression();
@@ -866,12 +870,12 @@ private ClpExpression handleDereference(RowExpression expression)
866870
}
867871

868872
/**
869-
* Refer to
873+
* See
870874
* <a href="https://docs.yscope.com/clp/main/user-guide/reference-json-search-syntax">here
871875
* </a> for all special chars in the string value that need to be escaped.
872876
*
873-
* @param literalString the target string to escape special chars '\', '"', '?' and '*'
874-
* @return the escaped string
877+
* @param literalString
878+
* @return the string with special characters escaped
875879
*/
876880
public static String escapeKqlSpecialCharsForStringValue(String literalString)
877881
{
@@ -884,13 +888,12 @@ public static String escapeKqlSpecialCharsForStringValue(String literalString)
884888
}
885889

886890
/**
887-
* Check if the type is one of the numeric types that CLP will handle. Refer to
888-
* {@link ClpSchemaTree} for all types that CLP will handle.
891+
* Checks if the type is one of the numeric types that can be pushed down to CLP.
889892
*
890893
* @param type the type to check
891-
* @return is the type numeric or not
894+
* @return whether the type can be pushed down.
892895
*/
893-
public static boolean isNumericType(Type type)
896+
public static boolean isClpCompatibleNumericType(Type type)
894897
{
895898
return type.equals(BIGINT)
896899
|| type.equals(INTEGER)

presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadataFilterProvider.java

Lines changed: 51 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -42,36 +42,54 @@
4242

4343
/**
4444
* Loads and manages metadata filter configurations for the CLP connector.
45-
* <p>
45+
* <p></p>
4646
* The configuration file is specified by the {@code clp.metadata-filter-config} property
4747
* and defines metadata filters used to optimize query execution through split pruning.
48-
* Filters can be declared at different scopes:
48+
* <p></p>
49+
* Filter configs can be declared at either a catalog, schema, or table scope. Filter configs under
50+
* a particular scope will apply to all child scopes (e.g., schema-level filter configs will apply
51+
* to all tables within that schema).
52+
* <p></p>
53+
* Each filter config includes the following fields:
4954
* <ul>
50-
* <li><b>Catalog-level</b>: applies to all schemas and tables within a catalog.</li>
51-
* <li><b>Schema-level</b>: applies to all tables within a specific catalog and schema.</li>
52-
* <li><b>Table-level</b>: applies to a fully qualified table {@code catalog.schema.table}.</li>
53-
* </ul>
55+
* <li><b>{@code columnName}</b>: the name of a column in the table's logical schema. Currently,
56+
* only numeric-type columns can be used as metadata filters.</li>
5457
*
55-
* <p>Each scope maps to a list of filter definitions. Each filter includes the following fields:
56-
* <ul>
57-
* <li><b>{@code columnName}</b> (required): the name of a column in the table's logical schema.
58-
* Only columns of numeric type are currently supported as metadata filters.</li>
58+
* <li><b>{@code rangeMapping}</b> <i>(optional)</i>: an object with the following properties:
59+
*
60+
* <br><br>
61+
* <b>Note:</b> This option is only valid if the column has a numeric type.
62+
*
63+
* <ul>
64+
* <li>{@code lowerBound}: The metadata column that represents the lower bound of values
65+
* in a split for the data column.</li>
66+
* <li>{@code upperBound}: The metadata column that represents the upper bound of values
67+
* in a split for the data column.</li>
68+
* </ul>
5969
*
60-
* <li><b>{@code rangeMapping}</b> (optional): remaps a logical filter to physical metadata-only columns.
61-
* This field is valid only for numeric-type columns.
62-
* For example, a condition such as:
63-
* <pre>{@code
64-
* "msg.timestamp" > 1234 AND "msg.timestamp" < 5678
65-
* }</pre>
66-
* will be rewritten as:
67-
* <pre>{@code
68-
* "end_timestamp" > 1234 AND "begin_timestamp" < 5678
69-
* }</pre>
70-
* This ensures the filter applies to a superset of the actual result set, enabling safe pruning.</li>
70+
* <p>
71+
* For example, a condition such as:
72+
* </p>
73+
* <pre>{@code
74+
* "msg.timestamp" > 1234 AND "msg.timestamp" < 5678
75+
* }</pre>
7176
*
72-
* <li><b>{@code required}</b> (optional, default: {@code false}): indicates whether the filter must be present
73-
* in the extracted metadata filter SQL query. If a required filter is missing or cannot be pushed down,
74-
* the query will be rejected.</li>
77+
* <p>
78+
* will be rewritten as:
79+
* </p>
80+
* <pre>{@code
81+
* "end_timestamp" > 1234 AND "begin_timestamp" < 5678
82+
* }</pre>
83+
*
84+
* <p>
85+
* This ensures the filter applies to a superset of the actual result set, enabling safe
86+
* pruning.
87+
* </p>
88+
* </li>
89+
*
90+
* <li><b>{@code required}</b> (optional, defaults to {@code false}): indicates whether the
91+
* filter must be present in the translated metadata filter SQL query. If a required filter
92+
* is missing or cannot be pushed down, the query will be rejected.</li>
7593
* </ul>
7694
*/
7795
public class ClpMetadataFilterProvider
@@ -112,29 +130,27 @@ public void checkContainsRequiredFilters(SchemaTableName schemaTableName, String
112130
}
113131

114132
/**
115-
* Rewrites the input SQL string by remapping filter conditions based on the configured
116-
* metadata filter range mappings for the given scope.
133+
* Rewrites the given SQL string to remap filter conditions based on the configured range
134+
* mappings for the given scope.
117135
*
118136
* <p>The {@code scope} follows the format {@code catalog[.schema][.table]}, and determines
119-
* which filter mappings to apply. For each level of scope (catalog, schema, table), this
120-
* method collects all range mappings defined in the metadata filter configuration. Mappings
121-
* from more specific scopes (e.g., table-level) override or supplement those from broader
122-
* scopes (e.g., catalog-level).
137+
* which filter mappings to apply, since mappings from more specific scopes (e.g., table-level)
138+
* override or supplement those from broader scopes (e.g., catalog-level). For each scope
139+
* (catalog, schema, table), this method collects all range mappings defined in the metadata
140+
* filter configuration.
123141
*
124142
* <p>This method performs regex-based replacements to convert numeric filter expressions such
125143
* as:
126-
*
127144
* <ul>
128145
* <li>{@code "msg.timestamp" >= 1234} → {@code end_timestamp >= 1234}</li>
129146
* <li>{@code "msg.timestamp" <= 5678} → {@code begin_timestamp <= 5678}</li>
130147
* <li>{@code "msg.timestamp" = 4567} →
131148
* {@code (begin_timestamp <= 4567 AND end_timestamp >= 4567)}</li>
132149
* </ul>
133150
*
134-
* @param scope the catalog.schema.table scope used to resolve applicable filter mappings
135-
* @param sql the original SQL expression to be remapped
136-
* @return the rewritten SQL string with metadata filter expressions remapped according to the
137-
* configured range mappings
151+
* @param scope
152+
* @param sql
153+
* @return the rewritten SQL string
138154
*/
139155
public String remapFilterSql(String scope, String sql)
140156
{

presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,9 @@ public void testBetweenPushDown()
105105
"fare BETWEEN (city.Region.Id - 5) AND (city.Region.Id + 5)",
106106
null,
107107
"fare BETWEEN (city.Region.Id - 5) AND (city.Region.Id + 5)");
108-
// If the last two arguments of BETWEEN are not numeric constants, then CLP connector won't do push down
108+
109+
// If the last two arguments of BETWEEN are not numeric constants, then the CLP connector
110+
// won't push them down.
109111
testPushDown(sessionHolder, "city.Name BETWEEN 'a' AND 'b'", null, "city.Name BETWEEN 'a' AND 'b'");
110112
}
111113

presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,24 +84,34 @@ public void testListSplits()
8484
for (Map.Entry<String, List<ArchivesTableRow>> entry : tableSplits.entrySet()) {
8585
// Without metadata filters
8686
compareListSplitsResult(entry, Optional.empty(), ImmutableList.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
87+
8788
// query_begin_ts < archives_min_ts && query_end_ts > archives_max_ts
8889
compareListSplitsResult(entry, Optional.of("(end_timestamp > 1699999999999 AND begin_timestamp < 1795000000001)"), ImmutableList.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
90+
8991
// query_begin_ts < archives_min_ts && query_end_ts > archives_min_ts && query_end_ts < archives_max_ts
9092
compareListSplitsResult(entry, Optional.of("(end_timestamp > 1699999999999 AND begin_timestamp < 1744999999999)"), ImmutableList.of(0, 1, 2, 3, 4));
93+
9194
// query_end_ts < archives_min_ts
9295
compareListSplitsResult(entry, Optional.of("(begin_timestamp < 1699999999999)"), ImmutableList.of());
96+
9397
// query_begin_ts > archives_min_ts && query_begin_ts < archives_max_ts && query_end_ts > archives_max_ts
9498
compareListSplitsResult(entry, Optional.of("(end_timestamp > 1745000000001 AND begin_timestamp < 1795000000001)"), ImmutableList.of(5, 6, 7, 8, 9));
99+
95100
// query_begin_ts > archives_min_ts && query_begin_ts < archives_max_ts && query_end_ts < archives_max_ts
96101
compareListSplitsResult(entry, Optional.of("(end_timestamp > 1745000000001 AND begin_timestamp <= 1770000000000)"), ImmutableList.of(5, 6, 7));
102+
97103
// query_begin_ts > archives_max_ts
98104
compareListSplitsResult(entry, Optional.of("(end_timestamp > 1795000000001)"), ImmutableList.of());
105+
99106
// query_begin_ts = archive_min_ts
100107
compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1700000000000 AND begin_timestamp <= 1700000000000)"), ImmutableList.of(0));
108+
101109
// query_begin_ts = archive_max_ts
102110
compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1795000000000 AND begin_timestamp <= 1795000000000)"), ImmutableList.of(9));
111+
103112
// query_ts = x && x > archive_min_ts && x < archive_max_ts (non-exist)
104113
compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1715000000001 AND begin_timestamp <= 1715000000001)"), ImmutableList.of());
114+
105115
// query_ts = x && x > archive_min_ts && x < archive_max_ts
106116
compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1715000000000 AND begin_timestamp <= 1715000000000)"), ImmutableList.of(1));
107117
}

0 commit comments

Comments
 (0)