Skip to content

Commit 005c059

Browse files
authored
fix: DH-21400 - Implement findPositionForKey for StaticSymbolTableChunkedOperatorAggregationStateManager (#7572)
This pull request implements the findPositionForKey method for StaticSymbolTableChunkedOperatorAggregationStateManager, replacing a previous UnsupportedOperationException with a working implementation. The implementation uses a lazily-initialized Trove map to map keys to their positions in the state manager. Additionally, two new test methods verify that symbol table-backed DataIndex lookups and filtering work correctly.
1 parent 63371c2 commit 005c059

File tree

2 files changed

+91
-3
lines changed

2 files changed

+91
-3
lines changed

engine/table/src/main/java/io/deephaven/engine/table/impl/by/StaticSymbolTableChunkedOperatorAggregationStateManager.java

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,23 @@
33
//
44
package io.deephaven.engine.table.impl.by;
55

6+
import gnu.trove.map.hash.TObjectIntHashMap;
7+
import io.deephaven.base.verify.Assert;
68
import io.deephaven.chunk.LongChunk;
79
import io.deephaven.chunk.WritableIntChunk;
810
import io.deephaven.chunk.WritableLongChunk;
911
import io.deephaven.chunk.attributes.Values;
12+
import io.deephaven.engine.primitive.iterator.CloseableIterator;
1013
import io.deephaven.engine.rowset.RowSequence;
14+
import io.deephaven.engine.rowset.RowSequenceFactory;
1115
import io.deephaven.engine.rowset.chunkattributes.RowKeys;
1216
import io.deephaven.engine.table.ChunkSource;
1317
import io.deephaven.engine.table.ColumnSource;
1418
import io.deephaven.engine.table.Table;
1519
import io.deephaven.engine.table.impl.SymbolTableToUniqueIdSource;
1620
import io.deephaven.engine.table.impl.sources.ObjectArraySource;
1721
import io.deephaven.engine.table.impl.sources.regioned.SymbolTableSource;
22+
import io.deephaven.engine.table.iterators.ChunkedObjectColumnIterator;
1823
import io.deephaven.util.QueryConstants;
1924
import io.deephaven.util.SafeCloseable;
2025
import io.deephaven.util.mutable.MutableInt;
@@ -34,6 +39,8 @@ public class StaticSymbolTableChunkedOperatorAggregationStateManager implements
3439
private final int[] keyPositions;
3540
private int nextPosition = 0;
3641

42+
private volatile TObjectIntHashMap<String> keyToPosition;
43+
3744
StaticSymbolTableChunkedOperatorAggregationStateManager(final ColumnSource<?> keySource, final Table symbolTable) {
3845
this.symbolTable = symbolTable;
3946
tableSize = symbolTable.intSize();
@@ -110,6 +117,7 @@ public void add(final SafeCloseable bc, final RowSequence rowSequence, final Col
110117
private void updateKeyHashTableSources(final WritableLongChunk<RowKeys> symbolTableValues,
111118
final int firstNewPosition) {
112119
keyColumn.ensureCapacity(nextPosition);
120+
Assert.eqNull(keyToPosition, "keyToPosition");
113121

114122
final ColumnSource<?> symbolColumnSource = symbolTable.getColumnSource(SymbolTableSource.SYMBOL_COLUMN_NAME);
115123

@@ -135,8 +143,23 @@ public ColumnSource<?>[] getKeyHashTableSources() {
135143

136144
@Override
137145
public int findPositionForKey(final Object key) {
138-
// shouldn't be able to get here; rollup/treeview will call this when we're 2+ levels deep in out view. since
139-
// we're limited to a single keySource, we cannot be more than 1 level deep
140-
throw new UnsupportedOperationException("StaticSymbolTable StateManager must be used with a single keySource");
146+
// Build the map if it doesn't exist
147+
TObjectIntHashMap<String> localKeyToPosition;
148+
if ((localKeyToPosition = keyToPosition) == null) {
149+
synchronized (this) {
150+
if ((localKeyToPosition = keyToPosition) == null) {
151+
final int length = nextPosition;
152+
localKeyToPosition = new TObjectIntHashMap<>(length, 0.75f, UNKNOWN_ROW);
153+
try (final CloseableIterator<String> keyIterator = new ChunkedObjectColumnIterator<>(
154+
keyColumn, RowSequenceFactory.forRange(0, length - 1))) {
155+
for (int ii = 0; ii < length; ii++) {
156+
localKeyToPosition.put(keyIterator.next(), ii);
157+
}
158+
}
159+
keyToPosition = localKeyToPosition;
160+
}
161+
}
162+
}
163+
return localKeyToPosition.get(key);
141164
}
142165
}

extensions/parquet/table/src/test/java/io/deephaven/parquet/table/TestSymbolTableSource.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
package io.deephaven.parquet.table;
55

66
import io.deephaven.base.FileUtils;
7+
import io.deephaven.engine.table.DataIndex;
78
import io.deephaven.engine.table.Table;
9+
import io.deephaven.engine.table.impl.dataindex.TableBackedDataIndex;
10+
import io.deephaven.engine.table.impl.indexer.DataIndexer;
811
import io.deephaven.engine.table.impl.sources.regioned.SymbolTableSource;
912
import io.deephaven.engine.testutil.junit4.EngineCleanup;
1013
import io.deephaven.engine.util.TableTools;
@@ -61,4 +64,66 @@ public void testWriteAndReadSymbols() {
6164

6265
assertTableEquals(expected, syms);
6366
}
67+
68+
@Test
69+
public void testSymbolTableDataIndexLookup() {
70+
final Table t = TableTools.emptyTable(100)
71+
.update("TheBestColumn=i==9?(String)null:`S`+(i%10)", "Sentinel=i");
72+
final File toWrite = new File(dataDirectory, "table.parquet");
73+
ParquetTools.writeTable(t, toWrite.getPath());
74+
75+
// Make sure we have the expected symbol table (or not)
76+
final Table readBack = ParquetTools.readTable(toWrite.getPath());
77+
final SymbolTableSource<String> source =
78+
(SymbolTableSource<String>) readBack.getColumnSource("TheBestColumn", String.class);
79+
Assert.assertTrue(source.hasSymbolTable(readBack.getRowSet()));
80+
81+
final DataIndex index = DataIndexer.getOrCreateDataIndex(readBack, "TheBestColumn");
82+
Assert.assertTrue("index instanceof TableBackedDataIndex", index instanceof TableBackedDataIndex);
83+
final DataIndex.RowKeyLookup rkl = index.rowKeyLookup();
84+
85+
for (int i = 0; i < 9; i++) {
86+
final String key = "S" + i;
87+
final long rowKey = rkl.apply(key, false);
88+
Assert.assertEquals(i, rowKey);
89+
}
90+
// Assert null lookup is correct.
91+
final long rowKey = rkl.apply(null, false);
92+
Assert.assertEquals(9, rowKey);
93+
}
94+
95+
/**
96+
* This won't fail after 41.0 due to the table-level filtering replacing AbstractColumnSource#match. Used to test
97+
* bugfix against 0.40.x.
98+
*/
99+
@Test
100+
public void testFilterIndexedSymbolTable() {
101+
final Table t = TableTools.emptyTable(100)
102+
.update("TheBestColumn=i==9?(String)null:`S`+(i%10)", "Sentinel=i");
103+
final File toWrite = new File(dataDirectory, "table.parquet");
104+
ParquetTools.writeTable(t, toWrite.getPath());
105+
106+
// Make sure we have the expected symbol table (or not)
107+
final Table readBack = ParquetTools.readTable(toWrite.getPath());
108+
final SymbolTableSource<String> source =
109+
(SymbolTableSource<String>) readBack.getColumnSource("TheBestColumn", String.class);
110+
Assert.assertTrue(source.hasSymbolTable(readBack.getRowSet()));
111+
112+
final DataIndex index = DataIndexer.getOrCreateDataIndex(readBack, "TheBestColumn");
113+
Assert.assertTrue("index instanceof TableBackedDataIndex", index instanceof TableBackedDataIndex);
114+
// materialize the index table
115+
final Table indexTable = index.table();
116+
117+
Table filtered;
118+
Table expected;
119+
120+
filtered = readBack.where("TheBestColumn in `S0`");
121+
expected = TableTools.emptyTable(10).update("TheBestColumn=`S0`", "Sentinel=i*10");
122+
assertTableEquals(expected, filtered);
123+
124+
// Assert null filtering is correct.
125+
filtered = readBack.where("TheBestColumn in null");
126+
expected = TableTools.emptyTable(1).update("TheBestColumn=(String)null", "Sentinel=9");
127+
assertTableEquals(expected, filtered);
128+
}
64129
}

0 commit comments

Comments
 (0)