Skip to content

Commit 60faa3c

Browse files
committed
Improving random sampling performance by lazily calling getSamplingConfiguration()
1 parent 4245e18 commit 60faa3c

File tree

1 file changed

+28
-10
lines changed

1 file changed

+28
-10
lines changed

server/src/main/java/org/elasticsearch/ingest/SamplingService.java

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -210,16 +210,24 @@ private void maybeSample(
210210
return;
211211
}
212212
long startTime = statsTimeSupplier.getAsLong();
213-
SamplingConfiguration samplingConfig = getSamplingConfiguration(projectMetadata, indexName);
214-
if (samplingConfig == null) {
215-
return;
216-
}
217-
SoftReference<SampleInfo> sampleInfoReference = samples.compute(
218-
new ProjectIndex(projectMetadata.id(), indexName),
219-
(k, v) -> v == null || v.get() == null ? new SoftReference<>(new SampleInfo(samplingConfig.maxSamples())) : v
220-
);
213+
SoftReference<SampleInfo> sampleInfoReference = samples.compute(new ProjectIndex(projectMetadata.id(), indexName), (k, v) -> {
214+
if (v == null || v.get() == null) {
215+
SamplingConfiguration samplingConfig = getSamplingConfiguration(projectMetadata, indexName);
216+
if (samplingConfig == null) {
217+
/*
218+
* Calls to getSamplingConfiguration() are relatively expensive. So we store the NONE object here to indicate that there
219+
* was no sampling configuration. This way we don't have to do the lookup every single time for every index that has no
220+
* sampling configuration. If a sampling configuration is added for this index, this NONE sample will be removed by
221+
* the cluster state change listener.
222+
*/
223+
return new SoftReference<>(SampleInfo.NONE);
224+
}
225+
return new SoftReference<>(new SampleInfo(samplingConfig.maxSamples()));
226+
}
227+
return v;
228+
});
221229
SampleInfo sampleInfo = sampleInfoReference.get();
222-
if (sampleInfo == null) {
230+
if (sampleInfo == null || sampleInfo == SampleInfo.NONE) {
223231
return;
224232
}
225233
SampleStats stats = sampleInfo.stats;
@@ -229,6 +237,10 @@ private void maybeSample(
229237
stats.samplesRejectedForMaxSamplesExceeded.increment();
230238
return;
231239
}
240+
SamplingConfiguration samplingConfig = getSamplingConfiguration(projectMetadata, indexName);
241+
if (samplingConfig == null) {
242+
return; // it was not null above, but has since become null because the index was deleted asynchronously
243+
}
232244
if (sampleInfo.getSizeInBytes() + indexRequest.source().length() > samplingConfig.maxSize().getBytes()) {
233245
stats.samplesRejectedForSize.increment();
234246
return;
@@ -475,7 +487,12 @@ private void maybeRemoveStaleSamples(ClusterChangedEvent event, ProjectId projec
475487
if (oldSampleConfigsMap.containsKey(indexName) && entry.getValue().equals(oldSampleConfigsMap.get(indexName)) == false) {
476488
logger.debug("Removing sample info for {} because its configuration has changed", indexName);
477489
samples.remove(new ProjectIndex(projectId, indexName));
478-
}
490+
} else if (oldSampleConfigsMap.containsKey(indexName) == false
491+
&& samples.containsKey(new ProjectIndex(projectId, indexName))) {
492+
// There had previously been a NONE sample here. There is a real config now, so delete the NONE sample
493+
logger.debug("Removing sample info for {} because its configuration has been created", indexName);
494+
samples.remove(new ProjectIndex(projectId, indexName));
495+
}
479496
}
480497
}
481498
}
@@ -1003,6 +1020,7 @@ public SampleStats adjustForMaxSize(int maxSize) {
10031020
* This is used internally to store information about a sample in the samples Map.
10041021
*/
10051022
private static final class SampleInfo {
1023+
public static final SampleInfo NONE = new SampleInfo(0);
10061024
private final RawDocument[] rawDocuments;
10071025
/*
10081026
* This stores the maximum index in rawDocuments that has data currently. This is incremented speculatively before writing data to

0 commit comments

Comments
 (0)