Skip to content

Commit 78e4baa

Browse files
authored
Add classes to represent raw docs sampling configs (#134585)
* Add class to represent raw docs sampling configs * add xcontent serialization * Add custom project metadata * Added new transport version for serialization * Update docs/changelog/134585.yaml * Exclude SamplingMetadata from Snapshots --------- Co-authored-by: elasticsearchmachine <[email protected]> & Github Copilot
1 parent a720e2f commit 78e4baa

File tree

8 files changed

+776
-1
lines changed

8 files changed

+776
-1
lines changed

docs/changelog/134585.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 134585
2+
summary: Add classes to represent raw docs sampling configs
3+
area: Ingest Node
4+
type: feature
5+
issues: []
Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.action.admin.indices.sampling;
11+
12+
import org.elasticsearch.cluster.Diff;
13+
import org.elasticsearch.cluster.SimpleDiffable;
14+
import org.elasticsearch.common.io.stream.StreamInput;
15+
import org.elasticsearch.common.io.stream.StreamOutput;
16+
import org.elasticsearch.common.unit.ByteSizeValue;
17+
import org.elasticsearch.core.TimeValue;
18+
import org.elasticsearch.xcontent.ConstructingObjectParser;
19+
import org.elasticsearch.xcontent.ObjectParser;
20+
import org.elasticsearch.xcontent.ParseField;
21+
import org.elasticsearch.xcontent.ToXContentObject;
22+
import org.elasticsearch.xcontent.XContentBuilder;
23+
import org.elasticsearch.xcontent.XContentParser;
24+
25+
import java.io.IOException;
26+
27+
import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
28+
import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
29+
30+
/**
31+
* Configuration for sampling raw documents in an index.
32+
*/
33+
public record SamplingConfiguration(double rate, Integer maxSamples, ByteSizeValue maxSize, TimeValue timeToLive, String condition)
34+
implements
35+
ToXContentObject,
36+
SimpleDiffable<SamplingConfiguration> {
37+
38+
public static final String TYPE = "sampling_configuration";
39+
private static final String RATE_FIELD_NAME = "rate";
40+
private static final String MAX_SAMPLES_FIELD_NAME = "max_samples";
41+
private static final String MAX_SIZE_IN_BYTES_FIELD_NAME = "max_size_in_bytes";
42+
private static final String MAX_SIZE_FIELD_NAME = "max_size";
43+
private static final String TIME_TO_LIVE_IN_MILLIS_FIELD_NAME = "time_to_live_in_millis";
44+
private static final String TIME_TO_LIVE_FIELD_NAME = "time_to_live";
45+
private static final String CONDITION_FIELD_NAME = "if";
46+
47+
// Constants for validation and defaults
48+
public static final int MAX_SAMPLES_LIMIT = 10_000;
49+
public static final long MAX_SIZE_LIMIT_GIGABYTES = 5;
50+
public static final long MAX_TIME_TO_LIVE_DAYS = 30;
51+
public static final int DEFAULT_MAX_SAMPLES = 100;
52+
public static final long DEFAULT_MAX_SIZE_GIGABYTES = 1;
53+
public static final long DEFAULT_TIME_TO_LIVE_DAYS = 10;
54+
55+
// Error messages
56+
public static final String INVALID_RATE_MESSAGE = "rate must be greater than 0 and less than or equal to 1";
57+
public static final String INVALID_MAX_SAMPLES_MIN_MESSAGE = "maxSamples must be greater than 0";
58+
public static final String INVALID_MAX_SAMPLES_MAX_MESSAGE = "maxSamples must be less than or equal to " + MAX_SAMPLES_LIMIT;
59+
public static final String INVALID_MAX_SIZE_MIN_MESSAGE = "maxSize must be greater than 0";
60+
public static final String INVALID_MAX_SIZE_MAX_MESSAGE = "maxSize must be less than or equal to " + MAX_SIZE_LIMIT_GIGABYTES + "GB";
61+
public static final String INVALID_TIME_TO_LIVE_MIN_MESSAGE = "timeToLive must be greater than 0";
62+
public static final String INVALID_TIME_TO_LIVE_MAX_MESSAGE = "timeToLive must be less than or equal to "
63+
+ MAX_TIME_TO_LIVE_DAYS
64+
+ " days";
65+
public static final String INVALID_CONDITION_MESSAGE = "condition script, if provided, must not be empty";
66+
67+
private static final ConstructingObjectParser<SamplingConfiguration, Void> PARSER = new ConstructingObjectParser<>(
68+
TYPE,
69+
false,
70+
args -> {
71+
Double rate = (Double) args[0];
72+
Integer maxSamples = (Integer) args[1];
73+
ByteSizeValue humanReadableMaxSize = (ByteSizeValue) args[2];
74+
ByteSizeValue rawMaxSize = (ByteSizeValue) args[3];
75+
TimeValue humanReadableTimeToLive = (TimeValue) args[4];
76+
TimeValue rawTimeToLive = (TimeValue) args[5];
77+
String condition = (String) args[6];
78+
79+
return new SamplingConfiguration(
80+
rate,
81+
maxSamples,
82+
determineValue(humanReadableMaxSize, rawMaxSize),
83+
determineValue(humanReadableTimeToLive, rawTimeToLive),
84+
condition
85+
);
86+
}
87+
);
88+
89+
static {
90+
PARSER.declareDouble(constructorArg(), new ParseField(RATE_FIELD_NAME));
91+
PARSER.declareInt(optionalConstructorArg(), new ParseField(MAX_SAMPLES_FIELD_NAME));
92+
// Handle both human-readable and machine-readable fields for maxSize.
93+
PARSER.declareField(
94+
optionalConstructorArg(),
95+
(p, c) -> { return ByteSizeValue.parseBytesSizeValue(p.text(), MAX_SIZE_FIELD_NAME); },
96+
new ParseField(MAX_SIZE_FIELD_NAME),
97+
ObjectParser.ValueType.STRING
98+
);
99+
PARSER.declareField(
100+
optionalConstructorArg(),
101+
(p, c) -> { return ByteSizeValue.ofBytes(p.longValue()); },
102+
new ParseField(MAX_SIZE_IN_BYTES_FIELD_NAME),
103+
ObjectParser.ValueType.LONG
104+
);
105+
// Handle both human-readable and machine-readable fields for timeToLive
106+
PARSER.declareField(
107+
optionalConstructorArg(),
108+
(p, c) -> { return TimeValue.parseTimeValue(p.text(), TIME_TO_LIVE_FIELD_NAME); },
109+
new ParseField(TIME_TO_LIVE_FIELD_NAME),
110+
ObjectParser.ValueType.STRING
111+
);
112+
PARSER.declareField(
113+
optionalConstructorArg(),
114+
(p, c) -> { return TimeValue.timeValueMillis(p.longValue()); },
115+
new ParseField(TIME_TO_LIVE_IN_MILLIS_FIELD_NAME),
116+
ObjectParser.ValueType.LONG
117+
);
118+
PARSER.declareString(optionalConstructorArg(), new ParseField(CONDITION_FIELD_NAME));
119+
}
120+
121+
/**
122+
* Constructor with validation and defaulting for optional fields.
123+
*
124+
* @param rate The fraction of documents to sample (must be between 0 and 1)
125+
* @param maxSamples The maximum number of documents to sample (optional, defaults to {@link #DEFAULT_MAX_SAMPLES})
126+
* @param maxSize The maximum total size of sampled documents (optional, defaults to {@link #DEFAULT_MAX_SIZE_GIGABYTES} GB)
127+
* @param timeToLive The duration for which the sampled documents
128+
* should be retained (optional, defaults to {@link #DEFAULT_TIME_TO_LIVE_DAYS} days)
129+
* @param condition An optional condition script that sampled documents must satisfy (optional, can be null)
130+
* @throws IllegalArgumentException If any of the parameters are invalid, according to the validation rules
131+
*/
132+
public SamplingConfiguration(double rate, Integer maxSamples, ByteSizeValue maxSize, TimeValue timeToLive, String condition) {
133+
validateInputs(rate, maxSamples, maxSize, timeToLive, condition);
134+
135+
// Initialize record fields
136+
this.rate = rate;
137+
this.maxSamples = maxSamples == null ? DEFAULT_MAX_SAMPLES : maxSamples;
138+
this.maxSize = maxSize == null ? ByteSizeValue.ofGb(DEFAULT_MAX_SIZE_GIGABYTES) : maxSize;
139+
this.timeToLive = timeToLive == null ? TimeValue.timeValueDays(DEFAULT_TIME_TO_LIVE_DAYS) : timeToLive;
140+
this.condition = condition;
141+
}
142+
143+
/**
144+
* Constructs a SamplingConfiguration from a StreamInput for wire protocol deserialization.
145+
*
146+
* @param in The StreamInput to read from
147+
* @throws IOException If an I/O error occurs during deserialization
148+
*/
149+
public SamplingConfiguration(StreamInput in) throws IOException {
150+
this(in.readDouble(), in.readInt(), ByteSizeValue.readFrom(in), in.readTimeValue(), in.readOptionalString());
151+
}
152+
153+
// Write to StreamOutput
154+
@Override
155+
public void writeTo(StreamOutput out) throws IOException {
156+
out.writeDouble(this.rate);
157+
out.writeInt(this.maxSamples);
158+
out.writeWriteable(this.maxSize);
159+
out.writeTimeValue(this.timeToLive);
160+
out.writeOptionalString(this.condition);
161+
}
162+
163+
// Serialize to XContent (JSON)
164+
@Override
165+
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
166+
builder.startObject();
167+
builder.field(RATE_FIELD_NAME, rate);
168+
builder.field(MAX_SAMPLES_FIELD_NAME, maxSamples);
169+
builder.humanReadableField(MAX_SIZE_IN_BYTES_FIELD_NAME, MAX_SIZE_FIELD_NAME, maxSize);
170+
builder.humanReadableField(TIME_TO_LIVE_IN_MILLIS_FIELD_NAME, TIME_TO_LIVE_FIELD_NAME, timeToLive);
171+
if (condition != null) {
172+
builder.field(CONDITION_FIELD_NAME, condition);
173+
}
174+
builder.endObject();
175+
return builder;
176+
}
177+
178+
/**
179+
* Parses a SamplingConfiguration from XContent (JSON).
180+
*
181+
* @param parser The XContentParser to parse from
182+
* @return The parsed SamplingConfiguration object
183+
* @throws IOException If parsing fails due to invalid JSON or I/O errors
184+
*/
185+
public static SamplingConfiguration fromXContent(XContentParser parser) throws IOException {
186+
return PARSER.parse(parser, null);
187+
}
188+
189+
/**
190+
* Creates a diff reader for SamplingConfiguration objects that can deserialize diffs from wire protocol.
191+
*
192+
* @param in The StreamInput to read the diff from
193+
* @return A Diff that can be applied to produce the target SamplingConfiguration
194+
* @throws IOException If an I/O error occurs during deserialization
195+
*/
196+
public static Diff<SamplingConfiguration> readDiffFrom(StreamInput in) throws IOException {
197+
return SimpleDiffable.readDiffFrom(SamplingConfiguration::new, in);
198+
}
199+
200+
// Input validation method
201+
private static void validateInputs(double rate, Integer maxSamples, ByteSizeValue maxSize, TimeValue timeToLive, String condition) {
202+
// Validate rate
203+
if (rate <= 0 || rate > 1) {
204+
throw new IllegalArgumentException(INVALID_RATE_MESSAGE);
205+
}
206+
207+
// Validate maxSamples
208+
if (maxSamples != null) {
209+
if (maxSamples <= 0) {
210+
throw new IllegalArgumentException(INVALID_MAX_SAMPLES_MIN_MESSAGE);
211+
}
212+
if (maxSamples > MAX_SAMPLES_LIMIT) {
213+
throw new IllegalArgumentException(INVALID_MAX_SAMPLES_MAX_MESSAGE);
214+
}
215+
}
216+
217+
// Validate maxSize
218+
if (maxSize != null) {
219+
if (maxSize.compareTo(ByteSizeValue.ZERO) <= 0) {
220+
throw new IllegalArgumentException(INVALID_MAX_SIZE_MIN_MESSAGE);
221+
}
222+
ByteSizeValue maxLimit = ByteSizeValue.ofGb(MAX_SIZE_LIMIT_GIGABYTES);
223+
if (maxSize.compareTo(maxLimit) > 0) {
224+
throw new IllegalArgumentException(INVALID_MAX_SIZE_MAX_MESSAGE);
225+
}
226+
}
227+
228+
// Validate timeToLive
229+
if (timeToLive != null) {
230+
if (timeToLive.compareTo(TimeValue.ZERO) <= 0) {
231+
throw new IllegalArgumentException(INVALID_TIME_TO_LIVE_MIN_MESSAGE);
232+
}
233+
TimeValue maxLimit = TimeValue.timeValueDays(MAX_TIME_TO_LIVE_DAYS);
234+
if (timeToLive.compareTo(maxLimit) > 0) {
235+
throw new IllegalArgumentException(INVALID_TIME_TO_LIVE_MAX_MESSAGE);
236+
}
237+
}
238+
239+
if (condition != null && condition.isEmpty()) {
240+
throw new IllegalArgumentException(INVALID_CONDITION_MESSAGE);
241+
}
242+
}
243+
244+
private static <T> T determineValue(T humanReadableValue, T rawValue) {
245+
// If both human-readable and raw fields are present, the human-readable one takes precedence.
246+
if (humanReadableValue == null && rawValue == null) {
247+
return null;
248+
}
249+
return humanReadableValue != null ? humanReadableValue : rawValue;
250+
251+
}
252+
}

0 commit comments

Comments
 (0)