Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions presto-clp/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@
<scope>provided</scope>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-expressions</artifactId>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>units</artifactId>
Expand Down Expand Up @@ -129,6 +134,19 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-tests</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-main-base</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import com.facebook.presto.spi.relation.RowExpressionVisitor;
import com.facebook.presto.spi.relation.SpecialFormExpression;
import com.facebook.presto.spi.relation.VariableReferenceExpression;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import io.airlift.slice.Slice;

Expand Down Expand Up @@ -85,6 +86,7 @@
* constant.</li>
* <li>Dereferencing fields from row-typed variables.</li>
* <li>Logical operators AND, OR, and NOT.</li>
* <li><code>CLP_GET_*</code> UDFs.</li>
* </ul>
* <p></p>
* Supported translations for SQL include:
Expand Down Expand Up @@ -117,28 +119,29 @@ public ClpFilterToKqlConverter(
@Override
public ClpExpression visitCall(CallExpression node, Map<VariableReferenceExpression, ColumnHandle> context)
{
FunctionHandle functionHandle = node.getFunctionHandle();
CallExpression newNode = (CallExpression) maybeReplaceClpUdfArgument(node, context);
FunctionHandle functionHandle = newNode.getFunctionHandle();
if (standardFunctionResolution.isNotFunction(functionHandle)) {
return handleNot(node, context);
return handleNot(newNode, context);
}

if (standardFunctionResolution.isLikeFunction(functionHandle)) {
return handleLike(node, context);
return handleLike(newNode, context);
}

FunctionMetadata functionMetadata = functionMetadataManager.getFunctionMetadata(node.getFunctionHandle());
FunctionMetadata functionMetadata = functionMetadataManager.getFunctionMetadata(newNode.getFunctionHandle());
Optional<OperatorType> operatorTypeOptional = functionMetadata.getOperatorType();
if (operatorTypeOptional.isPresent()) {
OperatorType operatorType = operatorTypeOptional.get();
if (operatorType.isComparisonOperator() && operatorType != IS_DISTINCT_FROM) {
return handleLogicalBinary(operatorType, node, context);
return handleLogicalBinary(operatorType, newNode, context);
}
if (BETWEEN == operatorType) {
return handleBetween(node, context);
return handleBetween(newNode, context);
}
}

return new ClpExpression(node);
return new ClpExpression(newNode);
}

@Override
Expand Down Expand Up @@ -205,6 +208,71 @@ private String getVariableName(VariableReferenceExpression variable, Map<Variabl
return ((ClpColumnHandle) context.get(variable)).getOriginalColumnName();
}

/**
* Rewrites CLP UDFs (e.g., CLP_GET_*) in a RowExpression tree into VariableReferenceExpressions,
* enabling them to be used as pushdown filters.
* <p></p>
* Traverses the expression tree recursively, replacing supported CLP UDFs with uniquely named
* variables and tracking these variables in the assignments and context. If the CLP UDF takes
* a constant string argument, that string is used as the new variable name. Unsupported
* argument types (non-constants) or invalid expressions will throw an exception.
* <p></p>
* Examples:
* <ul>
* <li><code>CLP_GET_STRING('field')</code> → <code>field</code> (as a VariableReferenceExpression)</li>
* <li><code>CLP_GET_INT('field')</code> → <code>field</code> (not mapped to a KQL column)</li>
* </ul>
*
* @param rowExpression the input expression to analyze and possibly rewrite
* @param context a mapping from variable references to column handles used for pushdown. Newly created ones
* will be added here
* @return a possibly rewritten RowExpression with CLP UDFs replaced
*/
private RowExpression maybeReplaceClpUdfArgument(RowExpression rowExpression, Map<VariableReferenceExpression, ColumnHandle> context)
{
requireNonNull(context);
if (!(rowExpression instanceof CallExpression)) {
return rowExpression;
}

CallExpression callExpression = (CallExpression) rowExpression;

// Recursively process the arguments of this CallExpression
List<RowExpression> newArgs = callExpression.getArguments().stream()
.map(childArg -> maybeReplaceClpUdfArgument(childArg, context))
.collect(ImmutableList.toImmutableList());

FunctionMetadata metadata = functionMetadataManager.getFunctionMetadata(callExpression.getFunctionHandle());
String functionName = metadata.getName().getObjectName().toUpperCase();

if (functionName.startsWith("CLP_GET")) {
// Replace CLP UDF with VariableReferenceExpression
int numArguments = callExpression.getArguments().size();
if (numArguments == 1) {
RowExpression argument = callExpression.getArguments().get(0);
if (!(argument instanceof ConstantExpression)) {
throw new PrestoException(
CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION,
"The argument of " + functionName + " must be a ConstantExpression");
}
Optional<String> definition = argument.accept(this, context).getPushDownExpression();
if (definition.isPresent()) {
VariableReferenceExpression newVar = new VariableReferenceExpression(
Optional.empty(),
definition.get(),
callExpression.getType());
context.put(newVar, new ClpColumnHandle(definition.get(), callExpression.getType(), true));
return newVar;
}
else {
throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "Unrecognized parameter in " + functionName);
}
}
}

return new CallExpression(callExpression.getDisplayName(), callExpression.getFunctionHandle(), callExpression.getType(), newArgs);
}

/**
* Handles the <code>BETWEEN</code> expression.
* <p></p>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.plugin.clp;

import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.spi.function.Description;
import com.facebook.presto.spi.function.ScalarFunction;
import com.facebook.presto.spi.function.SqlType;
import io.airlift.slice.Slice;

Comment on lines +16 to +22
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Use PrestoException with NOT_SUPPORTED for clearer user-facing errors

Throwing UnsupportedOperationException surfaces as an internal error to users. Prefer a PrestoException with NOT_SUPPORTED and a clear action message.

 import com.facebook.presto.common.block.Block;
 import com.facebook.presto.common.type.StandardTypes;
 import com.facebook.presto.spi.function.Description;
 import com.facebook.presto.spi.function.ScalarFunction;
 import com.facebook.presto.spi.function.SqlType;
 import io.airlift.slice.Slice;
+import com.facebook.presto.spi.PrestoException;
+import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.spi.function.Description;
import com.facebook.presto.spi.function.ScalarFunction;
import com.facebook.presto.spi.function.SqlType;
import io.airlift.slice.Slice;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.spi.function.Description;
import com.facebook.presto.spi.function.ScalarFunction;
import com.facebook.presto.spi.function.SqlType;
import io.airlift.slice.Slice;
import com.facebook.presto.spi.PrestoException;
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
🤖 Prompt for AI Agents
In presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFunctions.java
around lines 16 to 22, the code currently throws UnsupportedOperationException
which surfaces as an internal error; replace those throws with throwing a
PrestoException using StandardErrorCode.NOT_SUPPORTED (NOT_SUPPORTED) and a
clear, actionable message for the user. Add the necessary imports for
PrestoException and StandardErrorCode.NOT_SUPPORTED, and when a function is not
implemented return throw new PrestoException(NOT_SUPPORTED, "Feature X is not
supported; please use Y or file a ticket"), substituting a concise description
of the unsupported operation and suggested action.

public final class ClpFunctions
{
private ClpFunctions()
{
}

Comment on lines +23 to +28
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Centralise placeholder throwing and improve messages

Consolidate the placeholder exception and use consistent, actionable messages so accidental execution fails clearly.

 public final class ClpFunctions
 {
     private ClpFunctions()
     {
     }
 
+    private static RuntimeException placeholderInvocation(String name)
+    {
+        return new PrestoException(
+                NOT_SUPPORTED,
+                name + " must be rewritten by the CLP query rewriter and should not be executed at runtime.");
+    }
+
     @ScalarFunction(value = "CLP_GET_BIGINT", deterministic = false)
     @Description("Retrieves an integer value corresponding to the given JSON path.")
     @SqlType(StandardTypes.BIGINT)
     public static long clpGetBigint(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
     {
-        throw new UnsupportedOperationException("CLP_GET_BIGINT is a placeholder function without implementation.");
+        throw placeholderInvocation("CLP_GET_BIGINT");
     }
 
     @ScalarFunction(value = "CLP_GET_DOUBLE", deterministic = false)
-    @Description("Retrieves a floating point value corresponding to the given JSON path.")
+    @Description("Retrieves a double value corresponding to the given JSON path.")
     @SqlType(StandardTypes.DOUBLE)
     public static double clpGetDouble(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
     {
-        throw new UnsupportedOperationException("CLP_GET_DOUBLE is a placeholder function without implementation.");
+        throw placeholderInvocation("CLP_GET_DOUBLE");
     }
 
     @ScalarFunction(value = "CLP_GET_BOOL", deterministic = false)
     @Description("Retrieves a boolean value corresponding to the given JSON path.")
     @SqlType(StandardTypes.BOOLEAN)
     public static boolean clpGetBool(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
     {
-        throw new UnsupportedOperationException("CLP_GET_BOOL is a placeholder function without implementation.");
+        throw placeholderInvocation("CLP_GET_BOOL");
     }
 
     @ScalarFunction(value = "CLP_GET_STRING", deterministic = false)
     @Description("Retrieves a string value corresponding to the given JSON path.")
     @SqlType(StandardTypes.VARCHAR)
     public static Slice clpGetString(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
     {
-        throw new UnsupportedOperationException("CLP_GET_STRING is a placeholder function without implementation.");
+        throw placeholderInvocation("CLP_GET_STRING");
     }
 
     @ScalarFunction(value = "CLP_GET_STRING_ARRAY", deterministic = false)
     @Description("Retrieves an array value corresponding to the given JSON path and converts each element into a string.")
-    @SqlType("ARRAY(VARCHAR)")
+    @SqlType("array(varchar)")
     public static Block clpGetStringArray(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
     {
-        throw new UnsupportedOperationException("CLP_GET_STRING_ARRAY is a placeholder function without implementation.");
+        throw placeholderInvocation("CLP_GET_STRING_ARRAY");
     }
 }

Also applies to: 29-36, 37-44, 45-52, 53-60, 61-67

🤖 Prompt for AI Agents
In presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFunctions.java
around lines 23-28 (and similarly for 29-36, 37-44, 45-52, 53-60, 61-67),
replace ad-hoc placeholder throws with a single centralized helper that always
throws a clear, consistent exception message; add a private static method (e.g.,
failPlaceholderExecution()) that throws an UnsupportedOperationException with a
descriptive actionable message like "CLP function placeholder - should not be
executed at runtime; ensure function wiring or implementation is provided" and
call that helper from each placeholder location so all placeholders fail
uniformly and with an actionable message.

@ScalarFunction(value = "CLP_GET_INT", deterministic = false)
@Description("Retrieves an integer value corresponding to the given JSON path.")
@SqlType(StandardTypes.BIGINT)
public static long clpGetInt(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
{
throw new UnsupportedOperationException("CLP_GET_INT is a placeholder function without implementation.");
}

@ScalarFunction(value = "CLP_GET_FLOAT", deterministic = false)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just find that in our RFC:

we propose adding a handful of CLP_GET_<TYPE>(<json-path>) UDFs, where <TYPE> refers to the Presto type of the column and <json_path> refers to the column's JSON path.

So the should be a Presto type. Shall we rename this UDF to CLP_GET_DOUBLE then?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or maybe we should update the RFC

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a table in RFC

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know we have the table. I mean the description in the RFC that I quoted is not precise what it means.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. If that's the case, we also need to rename CLP_GET_INT to CLP_GET_BIGINT

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we going to rename this CLP_GET_INT to CLP_GET_BIGINT?

@Description("Retrieves a floating point value corresponding to the given JSON path.")
@SqlType(StandardTypes.DOUBLE)
public static double clpGetFloat(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
{
throw new UnsupportedOperationException("CLP_GET_FLOAT is a placeholder function without implementation.");
}

@ScalarFunction(value = "CLP_GET_BOOL", deterministic = false)
@Description("Retrieves a boolean value corresponding to the given JSON path.")
@SqlType(StandardTypes.BOOLEAN)
public static boolean clpGetBool(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
{
throw new UnsupportedOperationException("CLP_GET_BOOL is a placeholder function without implementation.");
}

@ScalarFunction(value = "CLP_GET_STRING", deterministic = false)
@Description("Retrieves a string value corresponding to the given JSON path.")
@SqlType(StandardTypes.VARCHAR)
public static Slice clpGetString(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
{
throw new UnsupportedOperationException("CLP_GET_STRING is a placeholder function without implementation.");
}

@ScalarFunction(value = "CLP_GET_STRING_ARRAY", deterministic = false)
@Description("Retrieves an array value corresponding to the given JSON path and converts each element into a string.")
@SqlType("ARRAY(VARCHAR)")
public static Block clpGetStringArray(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
Comment on lines +61 to +64
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Normalise type literal casing in @SqlType

Presto annotations typically use lower-case type literals (array(varchar)). The parser is case-insensitive, but lower-case improves consistency.

🤖 Prompt for AI Agents
In presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFunctions.java
around lines 61 to 64, the @SqlType annotation uses an upper-case type literal
"ARRAY(VARCHAR)"; change it to the normalized lower-case form "array(varchar)"
to match Presto annotation style and improve consistency.

{
throw new UnsupportedOperationException("CLP_GET_STRING_ARRAY is a placeholder function without implementation.");
}
}
Loading
Loading