|
13 | 13 | import org.elasticsearch.compute.data.BlockUtils; |
14 | 14 | import org.elasticsearch.compute.data.BooleanBlock; |
15 | 15 | import org.elasticsearch.compute.data.BytesRefBlock; |
| 16 | +import org.elasticsearch.compute.data.BytesRefVector; |
16 | 17 | import org.elasticsearch.compute.data.DocBlock; |
17 | 18 | import org.elasticsearch.compute.data.DoubleBlock; |
18 | 19 | import org.elasticsearch.compute.data.ElementType; |
19 | 20 | import org.elasticsearch.compute.data.FloatBlock; |
20 | 21 | import org.elasticsearch.compute.data.IntBlock; |
21 | 22 | import org.elasticsearch.compute.data.LongBlock; |
| 23 | +import org.elasticsearch.compute.data.OrdinalBytesRefBlock; |
22 | 24 | import org.elasticsearch.compute.data.Page; |
| 25 | +import org.elasticsearch.core.Releasables; |
23 | 26 | import org.hamcrest.Matcher; |
24 | 27 |
|
25 | 28 | import java.util.ArrayList; |
| 29 | +import java.util.Arrays; |
| 30 | +import java.util.HashMap; |
26 | 31 | import java.util.List; |
| 32 | +import java.util.Map; |
27 | 33 |
|
28 | 34 | import static org.elasticsearch.compute.data.BlockUtils.toJavaObject; |
29 | 35 | import static org.elasticsearch.test.ESTestCase.between; |
@@ -267,4 +273,67 @@ public static List<List<Object>> valuesAtPositions(Block block, int from, int to |
267 | 273 | } |
268 | 274 | return result; |
269 | 275 | } |
| 276 | + |
| 277 | + /** |
| 278 | + * Convert all of the {@link Block}s in a page that contain {@link BytesRef}s into |
| 279 | + * {@link OrdinalBytesRefBlock}s. |
| 280 | + */ |
| 281 | + public static Page convertBytesRefsToOrdinals(Page page) { |
| 282 | + Block[] blocks = new Block[page.getBlockCount()]; |
| 283 | + try { |
| 284 | + for (int b = 0; b < page.getBlockCount(); b++) { |
| 285 | + Block block = page.getBlock(b); |
| 286 | + if (block.elementType() != ElementType.BYTES_REF) { |
| 287 | + blocks[b] = block; |
| 288 | + continue; |
| 289 | + } |
| 290 | + Map<BytesRef, Integer> dedupe = new HashMap<>(); |
| 291 | + BytesRefBlock bytesRefBlock = (BytesRefBlock) block; |
| 292 | + try ( |
| 293 | + IntBlock.Builder ordinals = block.blockFactory().newIntBlockBuilder(block.getPositionCount()); |
| 294 | + BytesRefVector.Builder bytes = block.blockFactory().newBytesRefVectorBuilder(block.getPositionCount()) |
| 295 | + ) { |
| 296 | + BytesRef scratch = new BytesRef(); |
| 297 | + for (int p = 0; p < block.getPositionCount(); p++) { |
| 298 | + int first = block.getFirstValueIndex(p); |
| 299 | + int count = block.getValueCount(p); |
| 300 | + if (count == 0) { |
| 301 | + ordinals.appendNull(); |
| 302 | + continue; |
| 303 | + } |
| 304 | + if (count == 1) { |
| 305 | + BytesRef v = bytesRefBlock.getBytesRef(first, scratch); |
| 306 | + ordinals.appendInt(dedupe(dedupe, bytes, v)); |
| 307 | + continue; |
| 308 | + } |
| 309 | + int end = first + count; |
| 310 | + ordinals.beginPositionEntry(); |
| 311 | + for (int i = first; i < end; i++) { |
| 312 | + BytesRef v = bytesRefBlock.getBytesRef(i, scratch); |
| 313 | + ordinals.appendInt(dedupe(dedupe, bytes, v)); |
| 314 | + } |
| 315 | + ordinals.endPositionEntry(); |
| 316 | + } |
| 317 | + blocks[b] = new OrdinalBytesRefBlock(ordinals.build(), bytes.build()); |
| 318 | + bytesRefBlock.decRef(); |
| 319 | + } |
| 320 | + } |
| 321 | + Page p = new Page(blocks); |
| 322 | + Arrays.fill(blocks, null); |
| 323 | + return p; |
| 324 | + } finally { |
| 325 | + Releasables.close(blocks); |
| 326 | + } |
| 327 | + } |
| 328 | + |
| 329 | + private static int dedupe(Map<BytesRef, Integer> dedupe, BytesRefVector.Builder bytes, BytesRef v) { |
| 330 | + Integer current = dedupe.get(v); |
| 331 | + if (current != null) { |
| 332 | + return current; |
| 333 | + } |
| 334 | + bytes.appendBytesRef(v); |
| 335 | + int o = dedupe.size(); |
| 336 | + dedupe.put(v, o); |
| 337 | + return o; |
| 338 | + } |
270 | 339 | } |
0 commit comments