Skip to content

Commit dd48d97

Browse files
committed
Remove unused code from TopNMultivalueDedupeLong
1 parent 76291a6 commit dd48d97

File tree

1 file changed

+9
-265
lines changed

1 file changed

+9
-265
lines changed

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/mvdedupe/TopNMultivalueDedupeLong.java

Lines changed: 9 additions & 265 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@
2020
import java.util.function.Predicate;
2121

2222
/**
23-
* Removes duplicate values from multivalued positions, and keeps only the top N.
23+
* Removes duplicate values from multivalued positions, and keeps only the ones that pass the filters.
24+
* <p>
25+
* Clone of {@link MultivalueDedupeLong}, but for it accepts a predicate and nulls flag to filter the values.
26+
* </p>
2427
*/
2528
public class TopNMultivalueDedupeLong {
2629
/**
@@ -37,7 +40,7 @@ public class TopNMultivalueDedupeLong {
3740
/**
3841
* Whether the hash expects nulls or not.
3942
*/
40-
final boolean hasNull;
43+
final boolean acceptNulls;
4144
/**
4245
* A predicate to test if a value is part of the top N or not.
4346
*/
@@ -54,139 +57,12 @@ public class TopNMultivalueDedupeLong {
5457
*/
5558
int w;
5659

57-
public TopNMultivalueDedupeLong(LongBlock block, boolean hasNull, Predicate<Long> isAcceptable) {
60+
public TopNMultivalueDedupeLong(LongBlock block, boolean acceptNulls, Predicate<Long> isAcceptable) {
5861
this.block = block;
59-
this.hasNull = hasNull;
62+
this.acceptNulls = acceptNulls;
6063
this.isAcceptable = isAcceptable;
6164
}
6265

63-
/**
64-
* Remove duplicate values from each position and write the results to a
65-
* {@link Block} using an adaptive algorithm based on the size of the input list.
66-
*/
67-
public LongBlock dedupeToBlockAdaptive(BlockFactory blockFactory) {
68-
if (block.mvDeduplicated()) {
69-
block.incRef();
70-
return block;
71-
}
72-
try (LongBlock.Builder builder = blockFactory.newLongBlockBuilder(block.getPositionCount())) {
73-
for (int p = 0; p < block.getPositionCount(); p++) {
74-
int count = block.getValueCount(p);
75-
int first = block.getFirstValueIndex(p);
76-
switch (count) {
77-
case 0 -> builder.appendNull();
78-
case 1 -> builder.appendLong(block.getLong(first));
79-
default -> {
80-
/*
81-
* It's better to copyMissing when there are few unique values
82-
* and better to copy and sort when there are many unique values.
83-
* The more duplicate values there are the more comparatively worse
84-
* copyAndSort is. But we don't know how many unique values there
85-
* because our job is to find them. So we use the count of values
86-
* as a proxy that is fast to test. It's not always going to be
87-
* optimal but it has the nice property of being quite quick on
88-
* short lists and not n^2 levels of terrible on long ones.
89-
*
90-
* It'd also be possible to make a truly hybrid mechanism that
91-
* switches from copyMissing to copyUnique once it collects enough
92-
* unique values. The trouble is that the switch is expensive and
93-
* makes kind of a "hole" in the performance of that mechanism where
94-
* you may as well have just gone with either of the two other
95-
* strategies. So we just don't try it for now.
96-
*/
97-
if (count < ALWAYS_COPY_MISSING) {
98-
copyMissing(first, count);
99-
writeUniquedWork(builder);
100-
} else {
101-
copyAndSort(first, count);
102-
deduplicatedSortedWork(builder);
103-
}
104-
}
105-
}
106-
}
107-
return builder.build();
108-
}
109-
}
110-
111-
/**
112-
* Remove duplicate values from each position and write the results to a
113-
* {@link Block} using an algorithm with very low overhead but {@code n^2}
114-
* case complexity for larger. Prefer {@link #dedupeToBlockAdaptive}
115-
* which picks based on the number of elements at each position.
116-
*/
117-
public LongBlock dedupeToBlockUsingCopyAndSort(BlockFactory blockFactory) {
118-
if (block.mvDeduplicated()) {
119-
block.incRef();
120-
return block;
121-
}
122-
try (LongBlock.Builder builder = blockFactory.newLongBlockBuilder(block.getPositionCount())) {
123-
for (int p = 0; p < block.getPositionCount(); p++) {
124-
int count = block.getValueCount(p);
125-
int first = block.getFirstValueIndex(p);
126-
switch (count) {
127-
case 0 -> builder.appendNull();
128-
case 1 -> builder.appendLong(block.getLong(first));
129-
default -> {
130-
copyAndSort(first, count);
131-
deduplicatedSortedWork(builder);
132-
}
133-
}
134-
}
135-
return builder.build();
136-
}
137-
}
138-
139-
/**
140-
* Remove duplicate values from each position and write the results to a
141-
* {@link Block} using an algorithm that sorts all values. It has a higher
142-
* overhead for small numbers of values at each position than
143-
* {@link #dedupeToBlockUsingCopyMissing} for large numbers of values the
144-
* performance is dominated by the {@code n*log n} sort. Prefer
145-
* {@link #dedupeToBlockAdaptive} unless you need the results sorted.
146-
*/
147-
public LongBlock dedupeToBlockUsingCopyMissing(BlockFactory blockFactory) {
148-
if (block.mvDeduplicated()) {
149-
block.incRef();
150-
return block;
151-
}
152-
try (LongBlock.Builder builder = blockFactory.newLongBlockBuilder(block.getPositionCount())) {
153-
for (int p = 0; p < block.getPositionCount(); p++) {
154-
int count = block.getValueCount(p);
155-
int first = block.getFirstValueIndex(p);
156-
switch (count) {
157-
case 0 -> builder.appendNull();
158-
case 1 -> builder.appendLong(block.getLong(first));
159-
default -> {
160-
copyMissing(first, count);
161-
writeUniquedWork(builder);
162-
}
163-
}
164-
}
165-
return builder.build();
166-
}
167-
}
168-
169-
/**
170-
* Sort values from each position and write the results to a {@link Block}.
171-
*/
172-
public LongBlock sortToBlock(BlockFactory blockFactory, boolean ascending) {
173-
try (LongBlock.Builder builder = blockFactory.newLongBlockBuilder(block.getPositionCount())) {
174-
for (int p = 0; p < block.getPositionCount(); p++) {
175-
int count = block.getValueCount(p);
176-
int first = block.getFirstValueIndex(p);
177-
switch (count) {
178-
case 0 -> builder.appendNull();
179-
case 1 -> builder.appendLong(block.getLong(first));
180-
default -> {
181-
copyAndSort(first, count);
182-
writeSortedWork(builder, ascending);
183-
}
184-
}
185-
}
186-
return builder.build();
187-
}
188-
}
189-
19066
/**
19167
* Dedupe values, add them to the hash, and build an {@link IntBlock} of
19268
* their hashes. This block is suitable for passing as the grouping block
@@ -200,7 +76,7 @@ public MultivalueDedupe.HashResult hashAdd(BlockFactory blockFactory, LongHash h
20076
int first = block.getFirstValueIndex(p);
20177
switch (count) {
20278
case 0 -> {
203-
if (hasNull) {
79+
if (acceptNulls) {
20480
sawNull = true;
20581
builder.appendInt(0);
20682
} else {
@@ -237,7 +113,7 @@ public IntBlock hashLookup(BlockFactory blockFactory, LongHash hash) {
237113
int first = block.getFirstValueIndex(p);
238114
switch (count) {
239115
case 0 -> {
240-
if (hasNull) {
116+
if (acceptNulls) {
241117
builder.appendInt(0);
242118
} else {
243119
builder.appendNull();
@@ -262,68 +138,6 @@ public IntBlock hashLookup(BlockFactory blockFactory, LongHash hash) {
262138
}
263139
}
264140

265-
/**
266-
* Build a {@link BatchEncoder} which deduplicates values at each position
267-
* and then encodes the results into a {@link byte[]} which can be used for
268-
* things like hashing many fields together.
269-
*/
270-
public BatchEncoder batchEncoder(int batchSize) {
271-
block.incRef();
272-
return new BatchEncoder.Longs(batchSize) {
273-
@Override
274-
protected void readNextBatch() {
275-
int position = firstPosition();
276-
if (w > 0) {
277-
// The last block didn't fit so we have to *make* it fit
278-
ensureCapacity(w);
279-
startPosition();
280-
encodeUniquedWork(this);
281-
endPosition();
282-
position++;
283-
}
284-
for (; position < block.getPositionCount(); position++) {
285-
int count = block.getValueCount(position);
286-
int first = block.getFirstValueIndex(position);
287-
switch (count) {
288-
case 0 -> encodeNull();
289-
case 1 -> {
290-
long v = block.getLong(first);
291-
if (hasCapacity(1)) {
292-
startPosition();
293-
encode(v);
294-
endPosition();
295-
} else {
296-
work[0] = v;
297-
w = 1;
298-
return;
299-
}
300-
}
301-
default -> {
302-
if (count < ALWAYS_COPY_MISSING) {
303-
copyMissing(first, count);
304-
} else {
305-
copyAndSort(first, count);
306-
convertSortedWorkToUnique();
307-
}
308-
if (hasCapacity(w)) {
309-
startPosition();
310-
encodeUniquedWork(this);
311-
endPosition();
312-
} else {
313-
return;
314-
}
315-
}
316-
}
317-
}
318-
}
319-
320-
@Override
321-
public void close() {
322-
block.decRef();
323-
}
324-
};
325-
}
326-
327141
/**
328142
* Copy all value from the position into {@link #work} and then
329143
* sorts it {@code n * log(n)}.
@@ -378,52 +192,6 @@ void copyMissing(int first, int count) {
378192
}
379193
}
380194

381-
/**
382-
* Writes an already deduplicated {@link #work} to a {@link LongBlock.Builder}.
383-
*/
384-
private void writeUniquedWork(LongBlock.Builder builder) {
385-
if (w == 1) {
386-
builder.appendLong(work[0]);
387-
return;
388-
}
389-
builder.beginPositionEntry();
390-
for (int i = 0; i < w; i++) {
391-
builder.appendLong(work[i]);
392-
}
393-
builder.endPositionEntry();
394-
}
395-
396-
/**
397-
* Writes a sorted {@link #work} to a {@link LongBlock.Builder}, skipping duplicates.
398-
*/
399-
private void deduplicatedSortedWork(LongBlock.Builder builder) {
400-
builder.beginPositionEntry();
401-
long prev = work[0];
402-
builder.appendLong(prev);
403-
for (int i = 1; i < w; i++) {
404-
if (prev != work[i]) {
405-
prev = work[i];
406-
builder.appendLong(prev);
407-
}
408-
}
409-
builder.endPositionEntry();
410-
}
411-
412-
/**
413-
* Writes a {@link #work} to a {@link LongBlock.Builder}.
414-
*/
415-
private void writeSortedWork(LongBlock.Builder builder, boolean ascending) {
416-
builder.beginPositionEntry();
417-
for (int i = 0; i < w; i++) {
418-
if (ascending) {
419-
builder.appendLong(work[i]);
420-
} else {
421-
builder.appendLong(work[w - i - 1]);
422-
}
423-
}
424-
builder.endPositionEntry();
425-
}
426-
427195
/**
428196
* Writes an already deduplicated {@link #work} to a hash.
429197
*/
@@ -607,30 +375,6 @@ private void hashLookupSortedWork(LongHash hash, IntBlock.Builder builder) {
607375
builder.endPositionEntry();
608376
}
609377

610-
/**
611-
* Writes a deduplicated {@link #work} to a {@link BatchEncoder.Longs}.
612-
*/
613-
private void encodeUniquedWork(BatchEncoder.Longs encoder) {
614-
for (int i = 0; i < w; i++) {
615-
encoder.encode(work[i]);
616-
}
617-
}
618-
619-
/**
620-
* Converts {@link #work} from sorted array to a deduplicated array.
621-
*/
622-
private void convertSortedWorkToUnique() {
623-
long prev = work[0];
624-
int end = w;
625-
w = 1;
626-
for (int i = 1; i < end; i++) {
627-
if (false == valuesEqual(prev, work[i])) {
628-
prev = work[i];
629-
work[w++] = prev;
630-
}
631-
}
632-
}
633-
634378
private void grow(int size) {
635379
work = ArrayUtil.grow(work, size);
636380
}

0 commit comments

Comments
 (0)