Skip to content

Commit a20cb19

Browse files
authored
feat!: Add multi-column support to AggFormula (#6206)
### Python examples: ``` from deephaven import empty_table from deephaven import agg source = empty_table(20).update( ["X = i", "Y = 2 * i", "Z = 3 * i", "Letter = (X % 2 == 0) ? `A` : `B`"] ) result = source.agg_by([ agg.formula(formula="out_a=sqrt(5.0)"), agg.formula(formula="out_b=min(X)"), agg.formula(formula="out_c=min(X) + max(Y)"), agg.formula(formula="out_d=sum(X + Y + Z)"), ], by=["Letter"]) ``` ### Groovy examples: ``` source = emptyTable(20).update("X = i", "Y = 2 * i", "Z = 3 * i", "Letter = (X % 2 == 0) ? `A` : `B`") result = source.aggBy([ AggFormula("out_a=sqrt(5.0)"), AggFormula("out_b=min(X)"), AggFormula("out_c=min(X) + max(Y)"), AggFormula("out_d=sum(X + Y + Z)"), ], "Letter") ```
1 parent 8bb9afa commit a20cb19

File tree

25 files changed

+4540
-3391
lines changed

25 files changed

+4540
-3391
lines changed

engine/table/src/main/java/io/deephaven/engine/table/impl/by/AggregationProcessor.java

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
//
44
package io.deephaven.engine.table.impl.by;
55

6-
import io.deephaven.api.ColumnName;
7-
import io.deephaven.api.Pair;
8-
import io.deephaven.api.SortColumn;
6+
import io.deephaven.api.*;
97
import io.deephaven.api.agg.*;
108
import io.deephaven.api.agg.spec.AggSpec;
119
import io.deephaven.api.agg.spec.AggSpecAbsSum;
@@ -93,13 +91,15 @@
9391
import io.deephaven.engine.table.impl.by.ssmcountdistinct.unique.ShortRollupUniqueOperator;
9492
import io.deephaven.engine.table.impl.by.ssmminmax.SsmChunkedMinMaxOperator;
9593
import io.deephaven.engine.table.impl.by.ssmpercentile.SsmChunkedPercentileOperator;
94+
import io.deephaven.engine.table.impl.select.SelectColumn;
9695
import io.deephaven.engine.table.impl.sources.ReinterpretUtils;
9796
import io.deephaven.engine.table.impl.ssms.SegmentedSortedMultiSet;
9897
import io.deephaven.engine.table.impl.util.freezeby.FreezeByCountOperator;
9998
import io.deephaven.engine.table.impl.util.freezeby.FreezeByOperator;
10099
import io.deephaven.time.DateTimeUtils;
101100
import io.deephaven.util.annotations.FinalDefault;
102101
import io.deephaven.util.type.ArrayTypeUtils;
102+
import io.deephaven.vector.VectorFactory;
103103
import org.apache.commons.lang3.mutable.MutableBoolean;
104104
import org.jetbrains.annotations.NotNull;
105105
import org.jetbrains.annotations.Nullable;
@@ -113,6 +113,7 @@
113113
import java.util.Collections;
114114
import java.util.List;
115115
import java.util.Map;
116+
import java.util.Set;
116117
import java.util.function.BiFunction;
117118
import java.util.function.Function;
118119
import java.util.function.Supplier;
@@ -153,6 +154,13 @@ private enum Type {
153154
private final Collection<? extends Aggregation> aggregations;
154155
private final Type type;
155156

157+
/**
158+
* For {@link Formula formula} aggregations we need a representation of the table definition with the column data
159+
* types converted to {@link io.deephaven.vector.Vector vectors}. This can be computed once and re-used across all
160+
* formula aggregations.
161+
*/
162+
private Map<String, ColumnDefinition<?>> vectorColumnDefinitions;
163+
156164
/**
157165
* Convert a collection of {@link Aggregation aggregations} to an {@link AggregationContextFactory}.
158166
*
@@ -707,6 +715,50 @@ public void visit(@NotNull final Partition partition) {
707715
groupByColumnNames));
708716
}
709717

718+
@Override
719+
public void visit(@NotNull final Formula formula) {
720+
final SelectColumn selectColumn = SelectColumn.of(formula.selectable());
721+
722+
// Get or create a column definition map composed of vectors of the original column types (or scalars when
723+
// part of the key columns).
724+
final Set<String> groupByColumnSet = Set.of(groupByColumnNames);
725+
if (vectorColumnDefinitions == null) {
726+
vectorColumnDefinitions = table.getDefinition().getColumnStream().collect(Collectors.toMap(
727+
ColumnDefinition::getName,
728+
(final ColumnDefinition<?> cd) -> groupByColumnSet.contains(cd.getName())
729+
? cd
730+
: ColumnDefinition.fromGenericType(
731+
cd.getName(),
732+
VectorFactory.forElementType(cd.getDataType()).vectorType(),
733+
cd.getDataType())));
734+
}
735+
736+
// Get the input column names from the formula and provide them to the groupBy operator
737+
final String[] allInputColumns =
738+
selectColumn.initDef(vectorColumnDefinitions, compilationProcessor).toArray(String[]::new);
739+
740+
final Map<Boolean, List<String>> partitioned = Arrays.stream(allInputColumns)
741+
.collect(Collectors.partitioningBy(groupByColumnSet::contains));
742+
final String[] inputKeyColumns = partitioned.get(true).toArray(String[]::new);
743+
final String[] inputNonKeyColumns = partitioned.get(false).toArray(String[]::new);
744+
745+
if (!selectColumn.getColumnArrays().isEmpty()) {
746+
throw new IllegalArgumentException("AggFormula does not support column arrays ("
747+
+ selectColumn.getColumnArrays() + ")");
748+
}
749+
if (selectColumn.hasVirtualRowVariables()) {
750+
throw new IllegalArgumentException("AggFormula does not support virtual row variables");
751+
}
752+
// TODO: re-use shared groupBy operators (https://github.com/deephaven/deephaven-core/issues/6363)
753+
final GroupByChunkedOperator groupByChunkedOperator = new GroupByChunkedOperator(table, false, null,
754+
Arrays.stream(inputNonKeyColumns).map(col -> MatchPair.of(Pair.parse(col)))
755+
.toArray(MatchPair[]::new));
756+
757+
final FormulaMultiColumnChunkedOperator op = new FormulaMultiColumnChunkedOperator(table,
758+
groupByChunkedOperator, true, selectColumn, inputKeyColumns);
759+
addNoInputOperator(op);
760+
}
761+
710762
// -------------------------------------------------------------------------------------------------------------
711763
// AggSpec.Visitor
712764
// -------------------------------------------------------------------------------------------------------------
@@ -745,6 +797,7 @@ public void visit(@NotNull final AggSpecFirst first) {
745797
@Override
746798
public void visit(@NotNull final AggSpecFormula formula) {
747799
unsupportedForBlinkTables("Formula");
800+
// TODO: re-use shared groupBy operators (https://github.com/deephaven/deephaven-core/issues/6363)
748801
final GroupByChunkedOperator groupByChunkedOperator = new GroupByChunkedOperator(table, false, null,
749802
resultPairs.stream().map(pair -> MatchPair.of((Pair) pair.input())).toArray(MatchPair[]::new));
750803
final FormulaChunkedOperator formulaChunkedOperator = new FormulaChunkedOperator(groupByChunkedOperator,
@@ -860,6 +913,12 @@ default void visit(@NotNull final LastRowKey lastRowKey) {
860913
rollupUnsupported("LastRowKey");
861914
}
862915

916+
@Override
917+
@FinalDefault
918+
default void visit(@NotNull final Formula formula) {
919+
rollupUnsupported("Formula");
920+
}
921+
863922
// -------------------------------------------------------------------------------------------------------------
864923
// AggSpec.Visitor for unsupported column aggregation specs
865924
// -------------------------------------------------------------------------------------------------------------

engine/table/src/main/java/io/deephaven/engine/table/impl/by/FormulaChunkedOperator.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -291,9 +291,8 @@ public UnaryOperator<ModifiedColumnSet> initializeRefreshing(@NotNull final Quer
291291
resultColumnModifiedColumnSets[ci] = resultTable.newModifiedColumnSet(resultColumnNames[ci]);
292292
}
293293
if (delegateToBy) {
294-
// We cannot use the groupBy's result MCS factory, because the result column names are not guaranteed to be
295-
// the
296-
// same.
294+
// We cannot use the groupBy's result MCS factory, because the result column names are not guaranteed
295+
// to be the same.
297296
groupBy.initializeRefreshing(resultTable, aggregationUpdateListener);
298297
}
299298
// Note that we also use the factory in propagateUpdates to identify the set of modified columns to handle.
@@ -379,7 +378,7 @@ private class DataFillerContext implements SafeCloseable {
379378
private final boolean[] columnsToFillMask;
380379
final FillFromContext[] fillFromContexts;
381380

382-
private DataFillerContext(@NotNull final boolean[] columnsToFillMask) {
381+
private DataFillerContext(final boolean @NotNull [] columnsToFillMask) {
383382
this.columnsToFillMask = columnsToFillMask;
384383
fillFromContexts = new FillFromContext[resultColumnNames.length];
385384
for (int ci = 0; ci < resultColumnNames.length; ++ci) {
@@ -448,6 +447,7 @@ private void copyData(@NotNull final RowSequence rowSequence, @NotNull final boo
448447
rowSequenceSlice);
449448
}
450449
}
450+
sharedContext.reset();
451451
}
452452
}
453453
}

0 commit comments

Comments
 (0)