Skip to content

Commit ffb5724

Browse files
committed
feat(isthmus): configurable fallback to dynamic function mapping
1 parent acaecb5 commit ffb5724

File tree

10 files changed

+641
-41
lines changed

10 files changed

+641
-41
lines changed

isthmus/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ dependencies {
105105
testImplementation(platform(libs.junit.bom))
106106
testImplementation(libs.junit.jupiter)
107107
testRuntimeOnly(libs.junit.platform.launcher)
108+
testRuntimeOnly(libs.slf4j.jdk14)
108109
implementation(libs.guava)
109110
implementation(libs.protobuf.java.util) {
110111
exclude("com.google.guava", "guava")

isthmus/src/main/java/io/substrait/isthmus/FeatureBoard.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,22 @@ public Casing unquotedCasing() {
3232
public boolean allowDynamicUdfs() {
3333
return false;
3434
}
35+
36+
/**
37+
* Controls whether to automatically create mappings for all unmapped functions using
38+
* SimpleExtensionToSqlOperator.
39+
*
40+
* <p>When enabled, functions from extension YAML files that are not explicitly mapped in
41+
* FunctionMappings will be automatically mapped to Calcite SqlOperators. This allows custom and
42+
* dynamic functions to be used in SQL queries without manual mapping configuration.
43+
*
44+
* <p>This feature is disabled by default for backward compatibility.
45+
*
46+
* @return true if automatic fallback to dynamic function mapping should be enabled; false
47+
* otherwise (default)
48+
*/
49+
@Value.Default
50+
public boolean autoFallbackToDynamicFunctionMapping() {
51+
return false;
52+
}
3553
}

isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,45 @@ public static List<SqlOperator> from(
4545
SimpleExtension.ExtensionCollection collection,
4646
RelDataTypeFactory typeFactory,
4747
TypeConverter typeConverter) {
48-
// TODO: add support for windows functions
4948
return Stream.concat(
50-
collection.scalarFunctions().stream(), collection.aggregateFunctions().stream())
49+
Stream.concat(
50+
collection.scalarFunctions().stream(), collection.aggregateFunctions().stream()),
51+
collection.windowFunctions().stream())
52+
.map(function -> toSqlFunction(function, typeFactory, typeConverter))
53+
.collect(Collectors.toList());
54+
}
55+
56+
/**
57+
* Converts a list of functions to SqlOperators. Handles scalar, aggregate, and window functions.
58+
*
59+
* @param functions list of functions to convert
60+
* @param typeFactory the Calcite type factory
61+
* @return list of SqlOperators
62+
*/
63+
public static List<SqlOperator> from(
64+
List<? extends SimpleExtension.Function> functions, RelDataTypeFactory typeFactory) {
65+
return from(functions, typeFactory, TypeConverter.DEFAULT);
66+
}
67+
68+
/**
69+
* Converts a list of functions to SqlOperators. Handles scalar, aggregate, and window functions.
70+
*
71+
* <p>Each function variant is converted to a separate SqlOperator. Functions with the same base
72+
* name but different type signatures (e.g., strftime:ts_str, strftime:ts_string) are ALL added to
73+
* the operator table. Calcite will try to match the function call arguments against all available
74+
* operators and select the one that matches. This allows functions with multiple signatures to be
75+
* used correctly without explicit deduplication.
76+
*
77+
* @param functions list of functions to convert
78+
* @param typeFactory the Calcite type factory
79+
* @param typeConverter the type converter
80+
* @return list of SqlOperators
81+
*/
82+
public static List<SqlOperator> from(
83+
List<? extends SimpleExtension.Function> functions,
84+
RelDataTypeFactory typeFactory,
85+
TypeConverter typeConverter) {
86+
return functions.stream()
5187
.map(function -> toSqlFunction(function, typeFactory, typeConverter))
5288
.collect(Collectors.toList());
5389
}

isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,52 @@ public SqlToSubstrait(FeatureBoard features) {
3232
public SqlToSubstrait(SimpleExtension.ExtensionCollection extensions, FeatureBoard features) {
3333
super(features, extensions);
3434

35+
List<SqlOperator> dynamicOperators = new java.util.ArrayList<>();
36+
3537
if (featureBoard.allowDynamicUdfs()) {
3638
SimpleExtension.ExtensionCollection dynamicExtensionCollection =
3739
ExtensionUtils.getDynamicExtensions(extensions);
3840
if (!dynamicExtensionCollection.scalarFunctions().isEmpty()
3941
|| !dynamicExtensionCollection.aggregateFunctions().isEmpty()) {
4042
List<SqlOperator> generatedDynamicOperators =
4143
SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, this.factory);
42-
this.operatorTable =
43-
SqlOperatorTables.chain(
44-
SubstraitOperatorTable.INSTANCE, SqlOperatorTables.of(generatedDynamicOperators));
45-
return;
44+
dynamicOperators.addAll(generatedDynamicOperators);
45+
}
46+
}
47+
48+
if (featureBoard.autoFallbackToDynamicFunctionMapping()) {
49+
List<SimpleExtension.ScalarFunctionVariant> unmappedScalars =
50+
io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions(
51+
extensions.scalarFunctions(),
52+
io.substrait.isthmus.expression.FunctionMappings.SCALAR_SIGS);
53+
List<SimpleExtension.AggregateFunctionVariant> unmappedAggregates =
54+
io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions(
55+
extensions.aggregateFunctions(),
56+
io.substrait.isthmus.expression.FunctionMappings.AGGREGATE_SIGS);
57+
List<SimpleExtension.WindowFunctionVariant> unmappedWindows =
58+
io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions(
59+
extensions.windowFunctions(),
60+
io.substrait.isthmus.expression.FunctionMappings.WINDOW_SIGS);
61+
62+
if (!unmappedScalars.isEmpty()) {
63+
dynamicOperators.addAll(SimpleExtensionToSqlOperator.from(unmappedScalars, this.factory));
4664
}
65+
if (!unmappedAggregates.isEmpty()) {
66+
dynamicOperators.addAll(
67+
SimpleExtensionToSqlOperator.from(unmappedAggregates, this.factory));
68+
}
69+
if (!unmappedWindows.isEmpty()) {
70+
dynamicOperators.addAll(SimpleExtensionToSqlOperator.from(unmappedWindows, this.factory));
71+
}
72+
}
73+
74+
if (!dynamicOperators.isEmpty()) {
75+
this.operatorTable =
76+
SqlOperatorTables.chain(
77+
SubstraitOperatorTable.INSTANCE, SqlOperatorTables.of(dynamicOperators));
78+
} else {
79+
this.operatorTable = SubstraitOperatorTable.INSTANCE;
4780
}
48-
this.operatorTable = SubstraitOperatorTable.INSTANCE;
4981
}
5082

5183
/**

isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java

Lines changed: 113 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@
8787
import org.apache.calcite.sql.parser.SqlParser;
8888
import org.apache.calcite.tools.Frameworks;
8989
import org.apache.calcite.tools.RelBuilder;
90+
import org.slf4j.Logger;
91+
import org.slf4j.LoggerFactory;
9092

9193
/**
9294
* RelVisitor to convert Substrait Rel plan to Calcite RelNode plan. Unsupported Rel node will call
@@ -95,6 +97,8 @@
9597
public class SubstraitRelNodeConverter
9698
extends AbstractRelVisitor<RelNode, SubstraitRelNodeConverter.Context, RuntimeException> {
9799

100+
private static final Logger LOGGER = LoggerFactory.getLogger(SubstraitRelNodeConverter.class);
101+
98102
protected final RelDataTypeFactory typeFactory;
99103

100104
protected final ScalarFunctionConverter scalarFunctionConverter;
@@ -120,9 +124,9 @@ public SubstraitRelNodeConverter(
120124
this(
121125
typeFactory,
122126
relBuilder,
123-
createScalarFunctionConverter(extensions, typeFactory, featureBoard.allowDynamicUdfs()),
124-
new AggregateFunctionConverter(extensions.aggregateFunctions(), typeFactory),
125-
new WindowFunctionConverter(extensions.windowFunctions(), typeFactory),
127+
createScalarFunctionConverter(extensions, typeFactory, featureBoard),
128+
createAggregateFunctionConverter(extensions, typeFactory, featureBoard),
129+
createWindowFunctionConverter(extensions, typeFactory, featureBoard),
126130
TypeConverter.DEFAULT);
127131
}
128132

@@ -165,11 +169,11 @@ public SubstraitRelNodeConverter(
165169
private static ScalarFunctionConverter createScalarFunctionConverter(
166170
SimpleExtension.ExtensionCollection extensions,
167171
RelDataTypeFactory typeFactory,
168-
boolean allowDynamicUdfs) {
172+
FeatureBoard featureBoard) {
169173

170-
List<FunctionMappings.Sig> additionalSignatures;
174+
List<FunctionMappings.Sig> additionalSignatures = new ArrayList<>();
171175

172-
if (allowDynamicUdfs) {
176+
if (featureBoard.allowDynamicUdfs()) {
173177
java.util.Set<String> knownFunctionNames =
174178
FunctionMappings.SCALAR_SIGS.stream()
175179
.map(FunctionMappings.Sig::name)
@@ -180,28 +184,124 @@ private static ScalarFunctionConverter createScalarFunctionConverter(
180184
.filter(f -> !knownFunctionNames.contains(f.name().toLowerCase()))
181185
.collect(Collectors.toList());
182186

183-
if (dynamicFunctions.isEmpty()) {
184-
additionalSignatures = Collections.emptyList();
185-
} else {
187+
if (!dynamicFunctions.isEmpty()) {
186188
SimpleExtension.ExtensionCollection dynamicExtensionCollection =
187189
SimpleExtension.ExtensionCollection.builder().scalarFunctions(dynamicFunctions).build();
188190

189191
List<SqlOperator> dynamicOperators =
190192
SimpleExtensionToSqlOperator.from(dynamicExtensionCollection, typeFactory);
191193

192-
additionalSignatures =
194+
additionalSignatures.addAll(
193195
dynamicOperators.stream()
194196
.map(op -> FunctionMappings.s(op, op.getName()))
195-
.collect(Collectors.toList());
197+
.collect(Collectors.toList()));
198+
}
199+
}
200+
201+
if (featureBoard.autoFallbackToDynamicFunctionMapping()) {
202+
List<SimpleExtension.ScalarFunctionVariant> unmappedFunctions =
203+
io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions(
204+
extensions.scalarFunctions(), FunctionMappings.SCALAR_SIGS);
205+
206+
if (!unmappedFunctions.isEmpty()) {
207+
LOGGER.info(
208+
"Dynamically mapping {} unmapped scalar functions: {}",
209+
unmappedFunctions.size(),
210+
unmappedFunctions.stream().map(f -> f.name()).collect(Collectors.toList()));
211+
212+
List<SqlOperator> dynamicOperators =
213+
SimpleExtensionToSqlOperator.from(unmappedFunctions, typeFactory);
214+
215+
// Note: We use last-wins deduplication here because:
216+
// 1. Multiple variants of the same function create separate SqlOperator instances
217+
// 2. Calcite's SqlOperator equality is based on name and kind, not identity
218+
// 3. RexCalls may use any one of these equivalent operators
219+
// 4. We only need ONE SqlOperator registered per function name as a key in signatures map
220+
// 5. The FunctionFinder will match all variants based on type signatures
221+
java.util.Map<String, SqlOperator> operatorsByName = new java.util.LinkedHashMap<>();
222+
for (SqlOperator op : dynamicOperators) {
223+
operatorsByName.put(op.getName().toLowerCase(), op);
224+
}
225+
226+
additionalSignatures.addAll(
227+
operatorsByName.values().stream()
228+
.map(op -> FunctionMappings.s(op, op.getName().toLowerCase()))
229+
.collect(Collectors.toList()));
196230
}
197-
} else {
198-
additionalSignatures = Collections.emptyList();
199231
}
200232

201233
return new ScalarFunctionConverter(
202234
extensions.scalarFunctions(), additionalSignatures, typeFactory, TypeConverter.DEFAULT);
203235
}
204236

237+
private static AggregateFunctionConverter createAggregateFunctionConverter(
238+
SimpleExtension.ExtensionCollection extensions,
239+
RelDataTypeFactory typeFactory,
240+
FeatureBoard featureBoard) {
241+
242+
List<FunctionMappings.Sig> additionalSignatures = new ArrayList<>();
243+
244+
if (featureBoard.autoFallbackToDynamicFunctionMapping()) {
245+
List<SimpleExtension.AggregateFunctionVariant> unmappedFunctions =
246+
io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions(
247+
extensions.aggregateFunctions(), FunctionMappings.AGGREGATE_SIGS);
248+
249+
if (!unmappedFunctions.isEmpty()) {
250+
List<SqlOperator> dynamicOperators =
251+
SimpleExtensionToSqlOperator.from(unmappedFunctions, typeFactory);
252+
253+
// Deduplicate operators by name (last-wins precedence) since multiple variants
254+
// of the same function create multiple SqlOperator objects
255+
java.util.Map<String, SqlOperator> operatorsByName = new java.util.LinkedHashMap<>();
256+
for (SqlOperator op : dynamicOperators) {
257+
operatorsByName.put(op.getName().toLowerCase(), op);
258+
}
259+
260+
additionalSignatures.addAll(
261+
operatorsByName.values().stream()
262+
.map(op -> FunctionMappings.s(op, op.getName().toLowerCase()))
263+
.collect(Collectors.toList()));
264+
}
265+
}
266+
267+
return new AggregateFunctionConverter(
268+
extensions.aggregateFunctions(), additionalSignatures, typeFactory, TypeConverter.DEFAULT);
269+
}
270+
271+
private static WindowFunctionConverter createWindowFunctionConverter(
272+
SimpleExtension.ExtensionCollection extensions,
273+
RelDataTypeFactory typeFactory,
274+
FeatureBoard featureBoard) {
275+
276+
List<FunctionMappings.Sig> additionalSignatures = new ArrayList<>();
277+
278+
if (featureBoard.autoFallbackToDynamicFunctionMapping()) {
279+
List<SimpleExtension.WindowFunctionVariant> unmappedFunctions =
280+
io.substrait.isthmus.expression.FunctionConverter.getUnmappedFunctions(
281+
extensions.windowFunctions(), FunctionMappings.WINDOW_SIGS);
282+
283+
if (!unmappedFunctions.isEmpty()) {
284+
List<SqlOperator> dynamicOperators =
285+
SimpleExtensionToSqlOperator.from(unmappedFunctions, typeFactory);
286+
287+
// Deduplicate operators by name (last-wins precedence) since multiple variants
288+
// of the same function create multiple SqlOperator objects
289+
java.util.Map<String, SqlOperator> operatorsByName = new java.util.LinkedHashMap<>();
290+
for (SqlOperator op : dynamicOperators) {
291+
operatorsByName.put(op.getName().toLowerCase(), op);
292+
}
293+
294+
additionalSignatures.addAll(
295+
operatorsByName.values().stream()
296+
.map(op -> FunctionMappings.s(op, op.getName().toLowerCase()))
297+
.collect(Collectors.toList()));
298+
}
299+
}
300+
301+
return new WindowFunctionConverter(
302+
extensions.windowFunctions(), additionalSignatures, typeFactory, TypeConverter.DEFAULT);
303+
}
304+
205305
public static RelNode convert(
206306
Rel relRoot,
207307
RelOptCluster relOptCluster,

0 commit comments

Comments
 (0)