Skip to content

Commit a6341a3

Browse files
Apply suggestions from code review
Co-authored-by: kirkrodrigues <[email protected]>
1 parent 7b7811b commit a6341a3

File tree

5 files changed

+101
-63
lines changed

5 files changed

+101
-63
lines changed

presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,21 @@
1818
import java.util.Optional;
1919

2020
/**
21-
* Represents the result of converting a Presto RowExpression into a CLP-compatible KQL query and
22-
* a SQL query for filtering splits using a metadata database. In every case, `pushDownExpression`
23-
* represents the part of the RowExpression that could be converted to a KQL expression, and
24-
* `remainingExpression` represents the part that could not be converted.
21+
* Represents the result of:
22+
* <ul>
23+
* <li>splitting a {@link RowExpression} into an expression that can be pushed down to CLP
24+
* (`pushDownExpression`) and any remaining expression that cannot be
25+
* (`remainingExpression`).</li>
26+
* <li>a SQL query for filtering splits using CLP's metadata database.</li>
27+
* </ul>
2528
*/
2629
public class ClpExpression
2730
{
2831
// Optional KQL query or column name representing the fully or partially translatable part of the expression.
2932
private final Optional<String> pushDownExpression;
3033

31-
// Optional SQL string extracted from the query plan, which is only made of given metadata columns.
34+
// Optional SQL string extracted from the query plan, which is only made of up of columns in
35+
// CLP's metadata database.
3236
private final Optional<String> metadataSqlQuery;
3337

3438
// The remaining (non-translatable) portion of the RowExpression, if any.

presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,16 @@
6464
import static java.util.Objects.requireNonNull;
6565

6666
/**
67-
* A translator to translate Presto RowExpressions into KQL (Kibana Query Language) filters used as
68-
* CLP queries and SQL filters used for filtering splits by metadata database. This is used
69-
* primarily for pushing down supported filters to the CLP engine. This class implements the
70-
* {@link RowExpressionVisitor} interface and recursively walks Presto filter expressions,
71-
* attempting to convert supported expressions into corresponding KQL filter strings and SQL
72-
* filter strings for metadata filtering. Any part of the expression that cannot be translated to
73-
* KQL is preserved as a "remaining expression" for potential fallback processing.
67+
* A translator to translate Presto {@link RowExpression}s into:
68+
* <ul>
69+
* <li>KQL (Kibana Query Language) filters used to push down supported filters to the CLP
70+
* engine.</li>
71+
* <li>SQL filters used for filtering splits in CLP's metadata database.</li>
72+
* </ul>
73+
* This class implements the {@link RowExpressionVisitor} interface and recursively walks Presto
74+
* filter expressions, attempting to convert supported expressions into corresponding KQL filter
75+
* strings and SQL filter strings for metadata filtering. Any part of the expression that cannot be
76+
* translated to KQL is preserved as a "remaining expression" for potential fallback processing.
7477
* <p></p>
7578
* Supported translations for KQL include:
7679
* <ul>
@@ -84,8 +87,8 @@
8487
* <li>Dereferencing fields from row-typed variables.</li>
8588
* <li>Logical operators AND, OR, and NOT.</li>
8689
* </ul>
87-
*
88-
* Supported translations for Metadata SQL include:
90+
* <p></p>
91+
* Supported translations for SQL include:
8992
* <ul>
9093
* <li>Comparisons between variables and constants (e.g., =, !=, <, >, <=, >=).</li>
9194
* <li>Dereferencing fields from row-typed variables.</li>
@@ -209,9 +212,11 @@ private String getVariableName(VariableReferenceExpression variable)
209212
* Handles the <code>BETWEEN</code> expression.
210213
* <p></p>
211214
* The translation is only performed if:
212-
* <ul>All arguments must have numeric types.</ul>
213-
* <ul>The first argument is a variable reference expression.</ul>
214-
* <ul>The second and third arguments are constant expressions.</ul>
215+
* <ul>
216+
* <li>all arguments have numeric types.</li>
217+
* <li>the first argument is a variable reference expression.</li>
218+
* <li>the second and third arguments are constant expressions.</li>
219+
* </ul>
215220
* <p></p>
216221
* Example: <code>col1 BETWEEN 0 AND 5</code> → <code>col1 >= 0 AND col1 <= 5</code>
217222
*
@@ -234,9 +239,9 @@ private ClpExpression handleBetween(CallExpression node)
234239
|| !(third instanceof ConstantExpression)) {
235240
return new ClpExpression(node);
236241
}
237-
if (!isNumericType(first.getType())
238-
|| !isNumericType(second.getType())
239-
|| !isNumericType(third.getType())) {
242+
if (!isClpCompatibleNumericType(first.getType())
243+
|| !isClpCompatibleNumericType(second.getType())
244+
|| !isClpCompatibleNumericType(third.getType())) {
240245
return new ClpExpression(node);
241246
}
242247
Optional<String> variableOpt = first.accept(this, null).getPushDownExpression();
@@ -866,12 +871,12 @@ private ClpExpression handleDereference(RowExpression expression)
866871
}
867872

868873
/**
869-
* Refer to
874+
* See
870875
* <a href="https://docs.yscope.com/clp/main/user-guide/reference-json-search-syntax">here
871876
* </a> for all special chars in the string value that need to be escaped.
872877
*
873-
* @param literalString the target string to escape special chars '\', '"', '?' and '*'
874-
* @return the escaped string
878+
* @param literalString
879+
* @return the string with special characters escaped
875880
*/
876881
public static String escapeKqlSpecialCharsForStringValue(String literalString)
877882
{
@@ -884,13 +889,12 @@ public static String escapeKqlSpecialCharsForStringValue(String literalString)
884889
}
885890

886891
/**
887-
* Check if the type is one of the numeric types that CLP will handle. Refer to
888-
* {@link ClpSchemaTree} for all types that CLP will handle.
892+
* Checks if the type is one of the numeric types that can be pushed down to CLP.
889893
*
890894
* @param type the type to check
891-
* @return is the type numeric or not
895+
* @return whether the type can be pushed down.
892896
*/
893-
public static boolean isNumericType(Type type)
897+
public static boolean isClpCompatibleNumericType(Type type)
894898
{
895899
return type.equals(BIGINT)
896900
|| type.equals(INTEGER)

presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadataFilterProvider.java

Lines changed: 51 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -42,36 +42,54 @@
4242

4343
/**
4444
* Loads and manages metadata filter configurations for the CLP connector.
45-
* <p>
45+
* <p></p>
4646
* The configuration file is specified by the {@code clp.metadata-filter-config} property
4747
* and defines metadata filters used to optimize query execution through split pruning.
48-
* Filters can be declared at different scopes:
48+
* <p></p>
49+
* Filter configs can be declared at either a catalog, schema, or table scope. Filter configs under
50+
* a particular scope will apply to all child scopes (e.g., schema-level filter configs will apply
51+
* to all tables within that schema).
52+
* <p></p>
53+
* Each filter config includes the following fields:
4954
* <ul>
50-
* <li><b>Catalog-level</b>: applies to all schemas and tables within a catalog.</li>
51-
* <li><b>Schema-level</b>: applies to all tables within a specific catalog and schema.</li>
52-
* <li><b>Table-level</b>: applies to a fully qualified table {@code catalog.schema.table}.</li>
53-
* </ul>
55+
* <li><b>{@code columnName}</b>: the name of a column in the table's logical schema. Currently,
56+
* only numeric-type columns can be used as metadata filters.</li>
5457
*
55-
* <p>Each scope maps to a list of filter definitions. Each filter includes the following fields:
56-
* <ul>
57-
* <li><b>{@code columnName}</b> (required): the name of a column in the table's logical schema.
58-
* Only columns of numeric type are currently supported as metadata filters.</li>
58+
* <li><b>{@code rangeMapping}</b> <i>(optional)</i>: an object with the following properties:
59+
*
60+
* <br><br>
61+
* <b>Note:</b> This option is only valid if the column has a numeric type.
62+
*
63+
* <ul>
64+
* <li>{@code lowerBound}: The metadata column that represents the lower bound of values
65+
* in a split for the data column.</li>
66+
* <li>{@code upperBound}: The metadata column that represents the upper bound of values
67+
* in a split for the data column.</li>
68+
* </ul>
5969
*
60-
* <li><b>{@code rangeMapping}</b> (optional): remaps a logical filter to physical metadata-only columns.
61-
* This field is valid only for numeric-type columns.
62-
* For example, a condition such as:
63-
* <pre>{@code
64-
* "msg.timestamp" > 1234 AND "msg.timestamp" < 5678
65-
* }</pre>
66-
* will be rewritten as:
67-
* <pre>{@code
68-
* "end_timestamp" > 1234 AND "begin_timestamp" < 5678
69-
* }</pre>
70-
* This ensures the filter applies to a superset of the actual result set, enabling safe pruning.</li>
70+
* <p>
71+
* For example, a condition such as:
72+
* </p>
73+
* <pre>{@code
74+
* "msg.timestamp" > 1234 AND "msg.timestamp" < 5678
75+
* }</pre>
7176
*
72-
* <li><b>{@code required}</b> (optional, default: {@code false}): indicates whether the filter must be present
73-
* in the extracted metadata filter SQL query. If a required filter is missing or cannot be pushed down,
74-
* the query will be rejected.</li>
77+
* <p>
78+
* will be rewritten as:
79+
* </p>
80+
* <pre>{@code
81+
* "end_timestamp" > 1234 AND "begin_timestamp" < 5678
82+
* }</pre>
83+
*
84+
* <p>
85+
* This ensures the filter applies to a superset of the actual result set, enabling safe
86+
* pruning.
87+
* </p>
88+
* </li>
89+
*
90+
* <li><b>{@code required}</b> (optional, defaults to {@code false}): indicates whether the
91+
* filter must be present in the translated metadata filter SQL query. If a required filter
92+
* is missing or cannot be pushed down, the query will be rejected.</li>
7593
* </ul>
7694
*/
7795
public class ClpMetadataFilterProvider
@@ -112,29 +130,27 @@ public void checkContainsRequiredFilters(SchemaTableName schemaTableName, String
112130
}
113131

114132
/**
115-
* Rewrites the input SQL string by remapping filter conditions based on the configured
116-
* metadata filter range mappings for the given scope.
133+
* Rewrites the given SQL string to remap filter conditions based on the configured range
134+
* mappings for the given scope.
117135
*
118136
* <p>The {@code scope} follows the format {@code catalog[.schema][.table]}, and determines
119-
* which filter mappings to apply. For each level of scope (catalog, schema, table), this
120-
* method collects all range mappings defined in the metadata filter configuration. Mappings
121-
* from more specific scopes (e.g., table-level) override or supplement those from broader
122-
* scopes (e.g., catalog-level).
137+
* which filter mappings to apply, since mappings from more specific scopes (e.g., table-level)
138+
* override or supplement those from broader scopes (e.g., catalog-level). For each scope
139+
* (catalog, schema, table), this method collects all range mappings defined in the metadata
140+
* filter configuration.
123141
*
124142
* <p>This method performs regex-based replacements to convert numeric filter expressions such
125143
* as:
126-
*
127144
* <ul>
128145
* <li>{@code "msg.timestamp" >= 1234} → {@code end_timestamp >= 1234}</li>
129146
* <li>{@code "msg.timestamp" <= 5678} → {@code begin_timestamp <= 5678}</li>
130147
* <li>{@code "msg.timestamp" = 4567} →
131148
* {@code (begin_timestamp <= 4567 AND end_timestamp >= 4567)}</li>
132149
* </ul>
133150
*
134-
* @param scope the catalog.schema.table scope used to resolve applicable filter mappings
135-
* @param sql the original SQL expression to be remapped
136-
* @return the rewritten SQL string with metadata filter expressions remapped according to the
137-
* configured range mappings
151+
* @param scope
152+
* @param sql
153+
* @return the rewritten SQL string
138154
*/
139155
public String remapFilterSql(String scope, String sql)
140156
{

presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,9 @@ public void testBetweenPushDown()
105105
"fare BETWEEN (city.Region.Id - 5) AND (city.Region.Id + 5)",
106106
null,
107107
"fare BETWEEN (city.Region.Id - 5) AND (city.Region.Id + 5)");
108-
// If the last two arguments of BETWEEN are not numeric constants, then CLP connector won't do push down
108+
109+
// If the last two arguments of BETWEEN are not numeric constants, then the CLP connector
110+
// won't push them down.
109111
testPushDown(sessionHolder, "city.Name BETWEEN 'a' AND 'b'", null, "city.Name BETWEEN 'a' AND 'b'");
110112
}
111113

presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ public void setUp()
5858
List<ArchivesTableRow> values = new ArrayList<>();
5959

6060
for (int j = 0; j < numValuesPerKey; j++) {
61+
6162
// We generate synthetic begin_timestamp and end_timestamp values for each split
6263
// by offsetting two base timestamps (1700000000000L and 1705000000000L) with a
6364
// fixed increment per split (10^10 * j).
@@ -82,26 +83,37 @@ public void tearDown()
8283
public void testListSplits()
8384
{
8485
for (Map.Entry<String, List<ArchivesTableRow>> entry : tableSplits.entrySet()) {
86+
8587
// Without metadata filters
8688
compareListSplitsResult(entry, Optional.empty(), ImmutableList.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
89+
8790
// query_begin_ts < archives_min_ts && query_end_ts > archives_max_ts
8891
compareListSplitsResult(entry, Optional.of("(end_timestamp > 1699999999999 AND begin_timestamp < 1795000000001)"), ImmutableList.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
92+
8993
// query_begin_ts < archives_min_ts && query_end_ts > archives_min_ts && query_end_ts < archives_max_ts
9094
compareListSplitsResult(entry, Optional.of("(end_timestamp > 1699999999999 AND begin_timestamp < 1744999999999)"), ImmutableList.of(0, 1, 2, 3, 4));
95+
9196
// query_end_ts < archives_min_ts
9297
compareListSplitsResult(entry, Optional.of("(begin_timestamp < 1699999999999)"), ImmutableList.of());
98+
9399
// query_begin_ts > archives_min_ts && query_begin_ts < archives_max_ts && query_end_ts > archives_max_ts
94100
compareListSplitsResult(entry, Optional.of("(end_timestamp > 1745000000001 AND begin_timestamp < 1795000000001)"), ImmutableList.of(5, 6, 7, 8, 9));
101+
95102
// query_begin_ts > archives_min_ts && query_begin_ts < archives_max_ts && query_end_ts < archives_max_ts
96103
compareListSplitsResult(entry, Optional.of("(end_timestamp > 1745000000001 AND begin_timestamp <= 1770000000000)"), ImmutableList.of(5, 6, 7));
104+
97105
// query_begin_ts > archives_max_ts
98106
compareListSplitsResult(entry, Optional.of("(end_timestamp > 1795000000001)"), ImmutableList.of());
107+
99108
// query_begin_ts = archive_min_ts
100109
compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1700000000000 AND begin_timestamp <= 1700000000000)"), ImmutableList.of(0));
110+
101111
// query_begin_ts = archive_max_ts
102112
compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1795000000000 AND begin_timestamp <= 1795000000000)"), ImmutableList.of(9));
113+
103114
// query_ts = x && x > archive_min_ts && x < archive_max_ts (non-exist)
104115
compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1715000000001 AND begin_timestamp <= 1715000000001)"), ImmutableList.of());
116+
105117
// query_ts = x && x > archive_min_ts && x < archive_max_ts
106118
compareListSplitsResult(entry, Optional.of("(end_timestamp >= 1715000000000 AND begin_timestamp <= 1715000000000)"), ImmutableList.of(1));
107119
}

0 commit comments

Comments
 (0)