Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions presto-clp/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,13 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-main-base</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import com.facebook.airlift.bootstrap.LifeCycleManager;
import com.facebook.airlift.log.Logger;
import com.facebook.presto.plugin.clp.optimization.ClpPlanOptimizerProvider;
import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider;
import com.facebook.presto.spi.connector.Connector;
import com.facebook.presto.spi.connector.ConnectorMetadata;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.plugin.clp;

import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.spi.function.Description;
import com.facebook.presto.spi.function.ScalarFunction;
import com.facebook.presto.spi.function.SqlType;
import io.airlift.slice.Slice;

Comment on lines +16 to +22
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Use PrestoException with NOT_SUPPORTED for clearer user-facing errors

Throwing UnsupportedOperationException surfaces as an internal error to users. Prefer a PrestoException with NOT_SUPPORTED and a clear action message.

 import com.facebook.presto.common.block.Block;
 import com.facebook.presto.common.type.StandardTypes;
 import com.facebook.presto.spi.function.Description;
 import com.facebook.presto.spi.function.ScalarFunction;
 import com.facebook.presto.spi.function.SqlType;
 import io.airlift.slice.Slice;
+import com.facebook.presto.spi.PrestoException;
+import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.spi.function.Description;
import com.facebook.presto.spi.function.ScalarFunction;
import com.facebook.presto.spi.function.SqlType;
import io.airlift.slice.Slice;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.spi.function.Description;
import com.facebook.presto.spi.function.ScalarFunction;
import com.facebook.presto.spi.function.SqlType;
import io.airlift.slice.Slice;
import com.facebook.presto.spi.PrestoException;
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
🤖 Prompt for AI Agents
In presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFunctions.java
around lines 16 to 22, the code currently throws UnsupportedOperationException
which surfaces as an internal error; replace those throws with throwing a
PrestoException using StandardErrorCode.NOT_SUPPORTED (NOT_SUPPORTED) and a
clear, actionable message for the user. Add the necessary imports for
PrestoException and StandardErrorCode.NOT_SUPPORTED, and when a function is not
implemented return throw new PrestoException(NOT_SUPPORTED, "Feature X is not
supported; please use Y or file a ticket"), substituting a concise description
of the unsupported operation and suggested action.

public final class ClpFunctions
{
private ClpFunctions()
{
}

Comment on lines +23 to +28
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Centralise placeholder throwing and improve messages

Consolidate the placeholder exception and use consistent, actionable messages so accidental execution fails clearly.

 public final class ClpFunctions
 {
     private ClpFunctions()
     {
     }
 
+    private static RuntimeException placeholderInvocation(String name)
+    {
+        return new PrestoException(
+                NOT_SUPPORTED,
+                name + " must be rewritten by the CLP query rewriter and should not be executed at runtime.");
+    }
+
     @ScalarFunction(value = "CLP_GET_BIGINT", deterministic = false)
     @Description("Retrieves an integer value corresponding to the given JSON path.")
     @SqlType(StandardTypes.BIGINT)
     public static long clpGetBigint(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
     {
-        throw new UnsupportedOperationException("CLP_GET_BIGINT is a placeholder function without implementation.");
+        throw placeholderInvocation("CLP_GET_BIGINT");
     }
 
     @ScalarFunction(value = "CLP_GET_DOUBLE", deterministic = false)
-    @Description("Retrieves a floating point value corresponding to the given JSON path.")
+    @Description("Retrieves a double value corresponding to the given JSON path.")
     @SqlType(StandardTypes.DOUBLE)
     public static double clpGetDouble(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
     {
-        throw new UnsupportedOperationException("CLP_GET_DOUBLE is a placeholder function without implementation.");
+        throw placeholderInvocation("CLP_GET_DOUBLE");
     }
 
     @ScalarFunction(value = "CLP_GET_BOOL", deterministic = false)
     @Description("Retrieves a boolean value corresponding to the given JSON path.")
     @SqlType(StandardTypes.BOOLEAN)
     public static boolean clpGetBool(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
     {
-        throw new UnsupportedOperationException("CLP_GET_BOOL is a placeholder function without implementation.");
+        throw placeholderInvocation("CLP_GET_BOOL");
     }
 
     @ScalarFunction(value = "CLP_GET_STRING", deterministic = false)
     @Description("Retrieves a string value corresponding to the given JSON path.")
     @SqlType(StandardTypes.VARCHAR)
     public static Slice clpGetString(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
     {
-        throw new UnsupportedOperationException("CLP_GET_STRING is a placeholder function without implementation.");
+        throw placeholderInvocation("CLP_GET_STRING");
     }
 
     @ScalarFunction(value = "CLP_GET_STRING_ARRAY", deterministic = false)
     @Description("Retrieves an array value corresponding to the given JSON path and converts each element into a string.")
-    @SqlType("ARRAY(VARCHAR)")
+    @SqlType("array(varchar)")
     public static Block clpGetStringArray(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
     {
-        throw new UnsupportedOperationException("CLP_GET_STRING_ARRAY is a placeholder function without implementation.");
+        throw placeholderInvocation("CLP_GET_STRING_ARRAY");
     }
 }

Also applies to: 29-36, 37-44, 45-52, 53-60, 61-67

🤖 Prompt for AI Agents
In presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFunctions.java
around lines 23-28 (and similarly for 29-36, 37-44, 45-52, 53-60, 61-67),
replace ad-hoc placeholder throws with a single centralized helper that always
throws a clear, consistent exception message; add a private static method (e.g.,
failPlaceholderExecution()) that throws an UnsupportedOperationException with a
descriptive actionable message like "CLP function placeholder - should not be
executed at runtime; ensure function wiring or implementation is provided" and
call that helper from each placeholder location so all placeholders fail
uniformly and with an actionable message.

@ScalarFunction(value = "CLP_GET_INT", deterministic = false)
@Description("Retrieves an integer value corresponding to the given JSON path.")
@SqlType(StandardTypes.BIGINT)
public static long clpGetInt(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
{
throw new UnsupportedOperationException("CLP_GET_INT is a placeholder function without implementation.");
}

@ScalarFunction(value = "CLP_GET_FLOAT", deterministic = false)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just find that in our RFC:

we propose adding a handful of CLP_GET_<TYPE>(<json-path>) UDFs, where <TYPE> refers to the Presto type of the column and <json_path> refers to the column's JSON path.

So the should be a Presto type. Shall we rename this UDF to CLP_GET_DOUBLE then?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or maybe we should update the RFC

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a table in RFC

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know we have the table. I mean the description in the RFC that I quoted is not precise what it means.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. If that's the case, we also need to rename CLP_GET_INT to CLP_GET_BIGINT

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we going to rename this CLP_GET_INT to CLP_GET_BIGINT?

@Description("Retrieves a floating point value corresponding to the given JSON path.")
@SqlType(StandardTypes.DOUBLE)
public static double clpGetFloat(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
{
throw new UnsupportedOperationException("CLP_GET_FLOAT is a placeholder function without implementation.");
}

@ScalarFunction(value = "CLP_GET_BOOL", deterministic = false)
@Description("Retrieves a boolean value corresponding to the given JSON path.")
@SqlType(StandardTypes.BOOLEAN)
public static boolean clpGetBool(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
{
throw new UnsupportedOperationException("CLP_GET_BOOL is a placeholder function without implementation.");
}

@ScalarFunction(value = "CLP_GET_STRING", deterministic = false)
@Description("Retrieves a string value corresponding to the given JSON path.")
@SqlType(StandardTypes.VARCHAR)
public static Slice clpGetString(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
{
throw new UnsupportedOperationException("CLP_GET_STRING is a placeholder function without implementation.");
}

@ScalarFunction(value = "CLP_GET_STRING_ARRAY", deterministic = false)
@Description("Retrieves an array value corresponding to the given JSON path and converts each element into a string.")
@SqlType("ARRAY(VARCHAR)")
public static Block clpGetStringArray(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
Comment on lines +61 to +64
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Normalise type literal casing in @SqlType

Presto annotations typically use lower-case type literals (array(varchar)). The parser is case-insensitive, but lower-case improves consistency.

🤖 Prompt for AI Agents
In presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFunctions.java
around lines 61 to 64, the @SqlType annotation uses an upper-case type literal
"ARRAY(VARCHAR)"; change it to the normalized lower-case form "array(varchar)"
to match Presto annotation style and improve consistency.

{
throw new UnsupportedOperationException("CLP_GET_STRING_ARRAY is a placeholder function without implementation.");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
import com.facebook.presto.spi.Plugin;
import com.facebook.presto.spi.connector.ConnectorFactory;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;

import java.util.Set;

public class ClpPlugin
implements Plugin
Expand All @@ -25,4 +28,10 @@ public Iterable<ConnectorFactory> getConnectorFactories()
{
return ImmutableList.of(new ClpConnectorFactory());
}

@Override
public Set<Class<?>> getFunctions()
{
return ImmutableSet.of(ClpFunctions.class);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.plugin.clp;
package com.facebook.presto.plugin.clp.optimization;

import com.facebook.airlift.log.Logger;
import com.facebook.presto.plugin.clp.ClpExpression;
import com.facebook.presto.plugin.clp.ClpTableHandle;
import com.facebook.presto.plugin.clp.ClpTableLayoutHandle;
import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ConnectorPlanOptimizer;
Expand All @@ -31,7 +34,6 @@
import com.facebook.presto.spi.relation.VariableReferenceExpression;
import com.google.common.collect.ImmutableSet;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
Expand All @@ -42,15 +44,15 @@
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;

public class ClpPlanOptimizer
public class ClpComputePushDown
implements ConnectorPlanOptimizer
{
private static final Logger log = Logger.get(ClpPlanOptimizer.class);
private static final Logger log = Logger.get(ClpComputePushDown.class);
private final FunctionMetadataManager functionManager;
private final StandardFunctionResolution functionResolution;
private final ClpSplitFilterProvider splitFilterProvider;

public ClpPlanOptimizer(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpSplitFilterProvider splitFilterProvider)
public ClpComputePushDown(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpSplitFilterProvider splitFilterProvider)
{
this.functionManager = requireNonNull(functionManager, "functionManager is null");
this.functionResolution = requireNonNull(functionResolution, "functionResolution is null");
Expand Down Expand Up @@ -99,59 +101,75 @@ public PlanNode visitFilter(FilterNode node, RewriteContext<Void> context)
if (!(node.getSource() instanceof TableScanNode)) {
return node;
}

return processFilter(node, (TableScanNode) node.getSource());
}

private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNode)
{
hasVisitedFilter = true;

TableScanNode tableScanNode = (TableScanNode) node.getSource();
Map<VariableReferenceExpression, ColumnHandle> assignments = new HashMap<>(tableScanNode.getAssignments());
TableHandle tableHandle = tableScanNode.getTable();
ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle();

String tableScope = CONNECTOR_NAME + "." + clpTableHandle.getSchemaTableName().toString();
ClpExpression clpExpression = node.getPredicate().accept(
Map<VariableReferenceExpression, ColumnHandle> assignments = tableScanNode.getAssignments();

Comment on lines 110 to +117
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Remove boolean gate and mark the table as “handled” in processFilter

Drop hasVisitedFilter and remove the tableScope from tablesWithoutFilter.

Apply this diff:

-            hasVisitedFilter = true;
-
             TableHandle tableHandle = tableScanNode.getTable();
             ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle();
 
-            String tableScope = CONNECTOR_NAME + "." + clpTableHandle.getSchemaTableName().toString();
+            String tableScope = format("%s.%s", CONNECTOR_NAME, clpTableHandle.getSchemaTableName());
+            tablesWithoutFilter.remove(tableScope);
             Map<VariableReferenceExpression, ColumnHandle> assignments = tableScanNode.getAssignments();
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
hasVisitedFilter = true;
TableScanNode tableScanNode = (TableScanNode) node.getSource();
Map<VariableReferenceExpression, ColumnHandle> assignments = new HashMap<>(tableScanNode.getAssignments());
TableHandle tableHandle = tableScanNode.getTable();
ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle();
String tableScope = CONNECTOR_NAME + "." + clpTableHandle.getSchemaTableName().toString();
ClpExpression clpExpression = node.getPredicate().accept(
Map<VariableReferenceExpression, ColumnHandle> assignments = tableScanNode.getAssignments();
TableHandle tableHandle = tableScanNode.getTable();
ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle();
String tableScope = format("%s.%s", CONNECTOR_NAME, clpTableHandle.getSchemaTableName());
tablesWithoutFilter.remove(tableScope);
Map<VariableReferenceExpression, ColumnHandle> assignments = tableScanNode.getAssignments();
🤖 Prompt for AI Agents
In
presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java
around lines 110-117, remove the boolean hasVisitedFilter and stop adding the
tableScope to tablesWithoutFilter in this block; instead ensure the table is
recorded as handled inside processFilter (e.g., add the table identifier to the
set that tracks tables with an applied filter or mark the
TableHandle/ClpTableHandle as handled there). Update the surrounding logic so
tablesWithoutFilter is no longer mutated here and processFilter is the sole
place that marks a table as handled.

ClpExpression clpExpression = filterNode.getPredicate().accept(
new ClpFilterToKqlConverter(
functionResolution,
functionManager,
assignments,
splitFilterProvider.getColumnNames(tableScope)),
assignments);
null);
Optional<String> kqlQuery = clpExpression.getPushDownExpression();
Optional<String> metadataSqlQuery = clpExpression.getMetadataSqlQuery();
Optional<RowExpression> remainingPredicate = clpExpression.getRemainingExpression();

// Perform required metadata filter checks before handling the KQL query (if kqlQuery
// isn't present, we'll return early, skipping subsequent checks).
splitFilterProvider.checkContainsRequiredFilters(ImmutableSet.of(tableScope), metadataSqlQuery.orElse(""));
if (metadataSqlQuery.isPresent()) {
boolean hasMetadataFilter = metadataSqlQuery.isPresent() && !metadataSqlQuery.get().isEmpty();
if (hasMetadataFilter) {
metadataSqlQuery = Optional.of(splitFilterProvider.remapSplitFilterPushDownExpression(tableScope, metadataSqlQuery.get()));
log.debug("Metadata SQL query: %s", metadataSqlQuery);
}

if (!kqlQuery.isPresent()) {
return node;
}
log.debug("KQL query: %s", kqlQuery);

ClpTableLayoutHandle clpTableLayoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery, metadataSqlQuery);
TableScanNode newTableScanNode = new TableScanNode(
tableScanNode.getSourceLocation(),
idAllocator.getNextId(),
new TableHandle(
tableHandle.getConnectorId(),
clpTableHandle,
tableHandle.getTransaction(),
Optional.of(clpTableLayoutHandle)),
tableScanNode.getOutputVariables(),
tableScanNode.getAssignments(),
tableScanNode.getTableConstraints(),
tableScanNode.getCurrentConstraint(),
tableScanNode.getEnforcedConstraint(),
tableScanNode.getCteMaterializationInfo());
if (!remainingPredicate.isPresent()) {
return newTableScanNode;
if (kqlQuery.isPresent() || hasMetadataFilter) {
if (kqlQuery.isPresent()) {
log.debug("KQL query: %s", kqlQuery);
}

ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery, metadataSqlQuery);
TableHandle newTableHandle = new TableHandle(
tableHandle.getConnectorId(),
clpTableHandle,
tableHandle.getTransaction(),
Optional.of(layoutHandle));

tableScanNode = new TableScanNode(
tableScanNode.getSourceLocation(),
idAllocator.getNextId(),
newTableHandle,
tableScanNode.getOutputVariables(),
tableScanNode.getAssignments(),
tableScanNode.getTableConstraints(),
tableScanNode.getCurrentConstraint(),
tableScanNode.getEnforcedConstraint(),
tableScanNode.getCteMaterializationInfo());
}

return new FilterNode(node.getSourceLocation(),
idAllocator.getNextId(),
newTableScanNode,
remainingPredicate.get());
if (remainingPredicate.isPresent()) {
// Not all predicate pushed down, need new FilterNode
return new FilterNode(
filterNode.getSourceLocation(),
idAllocator.getNextId(),
tableScanNode,
remainingPredicate.get());
}
else {
return tableScanNode;
}
}
}
}
Loading
Loading