Skip to content

Commit 36acb35

Browse files
committed
Add a random sample commadn
This adds RANDOM_SAMPLE <probability> <seed>? command.
1 parent 24c5e5c commit 36acb35

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+3886
-2534
lines changed

server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/random/RandomSamplingQuery.java

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,22 @@ public final class RandomSamplingQuery extends Query {
4444
* can be generated
4545
*/
4646
public RandomSamplingQuery(double p, int seed, int hash) {
47-
if (p <= 0.0 || p >= 1.0) {
48-
throw new IllegalArgumentException("RandomSampling probability must be between 0.0 and 1.0, was [" + p + "]");
49-
}
47+
checkProbabilityRange(p);
5048
this.p = p;
5149
this.seed = seed;
5250
this.hash = hash;
5351
}
5452

53+
/**
54+
* Verifies that the probability is within the (0.0, 1.0) range.
55+
* @throws IllegalArgumentException in case of an invalid probability.
56+
*/
57+
public static void checkProbabilityRange(double p) throws IllegalArgumentException {
58+
if (p <= 0.0 || p >= 1.0) {
59+
throw new IllegalArgumentException("RandomSampling probability must be between 0.0 and 1.0, was [" + p + "]");
60+
}
61+
}
62+
5563
@Override
5664
public String toString(String field) {
5765
return "RandomSamplingQuery{" + "p=" + p + ", seed=" + seed + ", hash=" + hash + '}';
@@ -98,13 +106,13 @@ public void visit(QueryVisitor visitor) {
98106
/**
99107
* A DocIDSetIter that skips a geometrically random number of documents
100108
*/
101-
static class RandomSamplingIterator extends DocIdSetIterator {
109+
public static class RandomSamplingIterator extends DocIdSetIterator {
102110
private final int maxDoc;
103111
private final double p;
104112
private final FastGeometric distribution;
105113
private int doc = -1;
106114

107-
RandomSamplingIterator(int maxDoc, double p, IntSupplier rng) {
115+
public RandomSamplingIterator(int maxDoc, double p, IntSupplier rng) {
108116
this.maxDoc = maxDoc;
109117
this.p = p;
110118
this.distribution = new FastGeometric(rng, p);
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.search.aggregations.bucket.sampler.random;
11+
12+
import org.apache.lucene.search.Query;
13+
import org.elasticsearch.TransportVersion;
14+
import org.elasticsearch.TransportVersions;
15+
import org.elasticsearch.common.Randomness;
16+
import org.elasticsearch.common.io.stream.StreamOutput;
17+
import org.elasticsearch.index.query.AbstractQueryBuilder;
18+
import org.elasticsearch.index.query.SearchExecutionContext;
19+
import org.elasticsearch.xcontent.XContentBuilder;
20+
21+
import java.io.IOException;
22+
import java.util.Objects;
23+
24+
import static org.elasticsearch.search.aggregations.bucket.sampler.random.RandomSamplingQuery.checkProbabilityRange;
25+
26+
public class RandomSamplingQueryBuilder extends AbstractQueryBuilder<RandomSamplingQueryBuilder> {
27+
28+
public static final String NAME = "random_sampling";
29+
30+
private final double probability;
31+
private int seed = Randomness.get().nextInt();
32+
private int hash = 0;
33+
34+
public RandomSamplingQueryBuilder(double probability) {
35+
checkProbabilityRange(probability);
36+
this.probability = probability;
37+
}
38+
39+
public RandomSamplingQueryBuilder seed(int seed) {
40+
checkProbabilityRange(probability);
41+
this.seed = seed;
42+
return this;
43+
}
44+
45+
public RandomSamplingQueryBuilder hash(Integer hash) {
46+
this.hash = hash;
47+
return this;
48+
}
49+
50+
@Override
51+
protected void doWriteTo(StreamOutput out) throws IOException {
52+
out.writeDouble(probability);
53+
out.writeInt(seed);
54+
out.writeInt(hash);
55+
}
56+
57+
@Override
58+
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
59+
builder.startObject(NAME);
60+
builder.field("probability", probability);
61+
builder.field("seed", seed);
62+
builder.field("hash", hash);
63+
builder.endObject();
64+
65+
}
66+
67+
@Override
68+
protected Query doToQuery(SearchExecutionContext context) throws IOException {
69+
return new RandomSamplingQuery(probability, seed, hash);
70+
}
71+
72+
@Override
73+
protected boolean doEquals(RandomSamplingQueryBuilder other) {
74+
return probability == other.probability && seed == other.seed && hash == other.hash;
75+
}
76+
77+
@Override
78+
protected int doHashCode() {
79+
return Objects.hash(probability, seed, hash);
80+
}
81+
82+
/**
83+
* Returns the name of the writeable object
84+
*/
85+
@Override
86+
public String getWriteableName() {
87+
return NAME;
88+
}
89+
90+
/**
91+
* The minimal version of the recipient this object can be sent to
92+
*/
93+
@Override
94+
public TransportVersion getMinimalSupportedVersion() {
95+
return TransportVersions.ZERO;
96+
}
97+
}

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,10 @@ public static boolean isSupported(String name) {
172172
return ATTRIBUTES_MAP.containsKey(name);
173173
}
174174

175+
public static boolean isScoreAttribute(Expression a) {
176+
return a instanceof MetadataAttribute ma && ma.name().equals(SCORE);
177+
}
178+
175179
@Override
176180
public boolean equals(Object obj) {
177181
if (false == super.equals(obj)) {

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Page.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,4 +293,21 @@ public Page projectBlocks(int[] blockMapping) {
293293
}
294294
}
295295
}
296+
297+
public Page filter(int... positions) {
298+
Block[] filteredBlocks = new Block[blocks.length];
299+
boolean success = false;
300+
try {
301+
for (int i = 0; i < blocks.length; i++) {
302+
filteredBlocks[i] = getBlock(i).filter(positions);
303+
}
304+
success = true;
305+
} finally {
306+
releaseBlocks();
307+
if (success == false) {
308+
Releasables.closeExpectNoException(filteredBlocks);
309+
}
310+
}
311+
return new Page(filteredBlocks);
312+
}
296313
}

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/FilterOperator.java

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -69,20 +69,7 @@ protected Page process(Page page) {
6969
}
7070
positions = Arrays.copyOf(positions, rowCount);
7171

72-
Block[] filteredBlocks = new Block[page.getBlockCount()];
73-
boolean success = false;
74-
try {
75-
for (int i = 0; i < page.getBlockCount(); i++) {
76-
filteredBlocks[i] = page.getBlock(i).filter(positions);
77-
}
78-
success = true;
79-
} finally {
80-
page.releaseBlocks();
81-
if (success == false) {
82-
Releasables.closeExpectNoException(filteredBlocks);
83-
}
84-
}
85-
return new Page(filteredBlocks);
72+
return page.filter(positions);
8673
}
8774
}
8875

0 commit comments

Comments
 (0)