Skip to content

Commit 5ecc9f9

Browse files
committed
Refactor/unify recategorize
1 parent 5a4c8bb commit 5ecc9f9

File tree

2 files changed

+20
-25
lines changed

2 files changed

+20
-25
lines changed

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHash.java

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -154,24 +154,16 @@ private IntBlock addIntermediate(Page page) {
154154
seenNull = true;
155155
return blockFactory.newConstantIntBlockWith(NULL_ORD, 1);
156156
}
157-
158-
Map<Integer, Integer> idMap = readIntermediate(categorizerState.getBytesRef(0, new BytesRef()));
159-
try (IntBlock.Builder newIdsBuilder = blockFactory.newIntBlockBuilder(idMap.size())) {
160-
int fromId = idMap.containsKey(0) ? 0 : 1;
161-
int toId = fromId + idMap.size();
162-
for (int i = fromId; i < toId; i++) {
163-
newIdsBuilder.appendInt(idMap.get(i));
164-
}
165-
return newIdsBuilder.build();
166-
}
157+
int[] ids = recategorize(categorizerState.getBytesRef(0, new BytesRef()), null);
158+
return blockFactory.newIntArrayVector(ids, ids.length).asBlock();
167159
}
168160

169161
/**
170-
* Read intermediate state from a block.
171-
*
172-
* @return a map from the old category id to the new one. The old ids go from 0 to {@code size - 1}.
162+
* Reads the intermediate state from a block and recategorizes the provided IDs.
163+
* If no IDs are provided, the IDs are the IDs in the categorizer's state in order.
164+
* (So 0...N-1 or 1...N, depending on whether null is present.)
173165
*/
174-
Map<Integer, Integer> readIntermediate(BytesRef bytes) {
166+
int[] recategorize(BytesRef bytes, int[] ids) {
175167
Map<Integer, Integer> idMap = new HashMap<>();
176168
try (StreamInput in = new BytesArray(bytes).streamInput()) {
177169
if (in.readBoolean()) {
@@ -184,10 +176,20 @@ Map<Integer, Integer> readIntermediate(BytesRef bytes) {
184176
// +1 because the 0 ordinal is reserved for null
185177
idMap.put(oldCategoryId + 1, newCategoryId + 1);
186178
}
187-
return idMap;
188179
} catch (IOException e) {
189180
throw new RuntimeException(e);
190181
}
182+
if (ids == null) {
183+
ids = new int[idMap.size()];
184+
int idOffset = idMap.containsKey(0) ? 0 : 1;
185+
for (int i = 0; i < idMap.size(); i++) {
186+
ids[i] = i + idOffset;
187+
}
188+
}
189+
for (int i = 0; i < ids.length; i++) {
190+
ids[i] = idMap.get(ids[i]);
191+
}
192+
return ids;
191193
}
192194

193195
/**

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHash.java

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
import java.io.IOException;
3131
import java.util.ArrayList;
3232
import java.util.List;
33-
import java.util.Map;
3433

3534
/**
3635
* BlockHash implementation for {@code Categorize} grouping function as first
@@ -96,17 +95,11 @@ private IntBlock getCategories(Page page) {
9695
} else {
9796
BytesRefBlock stateBlock = page.getBlock(0);
9897
BytesRef stateBytes = stateBlock.getBytesRef(0, new BytesRef());
99-
10098
try (StreamInput in = new BytesArray(stateBytes).streamInput()) {
10199
BytesRef categorizerState = in.readBytesRef();
102-
Map<Integer, Integer> idMap = categorizeBlockHash.readIntermediate(categorizerState);
103-
int[] oldIds = in.readIntArray();
104-
try (IntBlock.Builder newIds = blockFactory.newIntBlockBuilder(page.getPositionCount())) {
105-
for (int oldId : oldIds) {
106-
newIds.appendInt(idMap.get(oldId));
107-
}
108-
return newIds.build();
109-
}
100+
int[] ids = in.readIntArray();
101+
ids = categorizeBlockHash.recategorize(categorizerState, ids);
102+
return blockFactory.newIntArrayVector(ids, ids.length).asBlock();
110103
} catch (IOException e) {
111104
throw new RuntimeException(e);
112105
}

0 commit comments

Comments
 (0)