Skip to content

Commit 1e532d5

Browse files
gmaroulicbuescher
authored andcommitted
Exclude internal data streams from global retention (elastic#112100)
With elastic#111972 we enable users to set up global retention for data streams that are managed by the data stream lifecycle. This will allow users of elasticsearch to have a more control over their data retention, and consequently better resource management of their clusters. However, there is a small number of data streams that are necessary for the good operation of elasticsearch and should not follow user defined retention to avoid surprises. For this reason, we put forth the following definition of internal data streams. A data stream is internal if it's either a system index (system flag is true) or if its name starts with a dot. This PR adds the `isInternalDataStream` param in the effective retention calculation making explicit that this is also used to determine the effective retention.
1 parent 0f0f7a4 commit 1e532d5

File tree

19 files changed

+195
-82
lines changed

19 files changed

+195
-82
lines changed

docs/changelog/112100.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 112100
2+
summary: Exclude internal data streams from global retention
3+
area: Data streams
4+
type: bug
5+
issues: []

docs/reference/data-streams/lifecycle/tutorial-manage-data-stream-retention.asciidoc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ will exceed this time period. This can be set via the <<cluster-update-settings,
9696
Effective retention cannot be set, it is derived by taking into account all the configured retention listed above and is
9797
calculated as it is described <<effective-retention-calculation,here>>.
9898

99+
NOTE: Global default and max retention do not apply to data streams internal to elastic. Internal data streams are recognised
100+
either by having the `system` flag set to `true` or if their name is prefixed with a dot (`.`).
101+
99102
[discrete]
100103
[[retention-configuration]]
101104
==== How to configure retention?

modules/data-streams/src/main/java/org/elasticsearch/datastreams/lifecycle/DataStreamLifecycleService.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -819,7 +819,7 @@ private Index maybeExecuteRollover(ClusterState state, DataStream dataStream, bo
819819
RolloverRequest rolloverRequest = getDefaultRolloverRequest(
820820
rolloverConfiguration,
821821
dataStream.getName(),
822-
dataStream.getLifecycle().getEffectiveDataRetention(dataStream.isSystem() ? null : globalRetentionSettings.get()),
822+
dataStream.getLifecycle().getEffectiveDataRetention(globalRetentionSettings.get(), dataStream.isInternal()),
823823
rolloverFailureStore
824824
);
825825
transportActionsDeduplicator.executeOnce(
@@ -879,7 +879,7 @@ Set<Index> maybeExecuteRetention(ClusterState state, DataStream dataStream, Set<
879879
Set<Index> indicesToBeRemoved = new HashSet<>();
880880
// We know that there is lifecycle and retention because there are indices to be deleted
881881
assert dataStream.getLifecycle() != null;
882-
TimeValue effectiveDataRetention = dataStream.getLifecycle().getEffectiveDataRetention(globalRetention);
882+
TimeValue effectiveDataRetention = dataStream.getLifecycle().getEffectiveDataRetention(globalRetention, dataStream.isInternal());
883883
for (Index index : backingIndicesOlderThanRetention) {
884884
if (indicesToExcludeForRemainingRun.contains(index) == false) {
885885
IndexMetadata backingIndex = metadata.index(index);

modules/data-streams/src/main/java/org/elasticsearch/datastreams/lifecycle/action/TransportExplainDataStreamLifecycleAction.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ protected void masterOperation(
103103
ExplainIndexDataStreamLifecycle explainIndexDataStreamLifecycle = new ExplainIndexDataStreamLifecycle(
104104
index,
105105
true,
106-
parentDataStream.isSystem(),
106+
parentDataStream.isInternal(),
107107
idxMetadata.getCreationDate(),
108108
rolloverInfo == null ? null : rolloverInfo.getTime(),
109109
generationDate,

server/src/main/java/org/elasticsearch/action/datastreams/GetDataStreamAction.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,7 @@ public XContentBuilder toXContent(
319319
}
320320
if (dataStream.getLifecycle() != null) {
321321
builder.field(LIFECYCLE_FIELD.getPreferredName());
322-
dataStream.getLifecycle()
323-
.toXContent(builder, params, rolloverConfiguration, dataStream.isSystem() ? null : globalRetention);
322+
dataStream.getLifecycle().toXContent(builder, params, rolloverConfiguration, globalRetention, dataStream.isInternal());
324323
}
325324
if (ilmPolicyName != null) {
326325
builder.field(ILM_POLICY_FIELD.getPreferredName(), ilmPolicyName);

server/src/main/java/org/elasticsearch/action/datastreams/lifecycle/ExplainIndexDataStreamLifecycle.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public class ExplainIndexDataStreamLifecycle implements Writeable, ToXContentObj
4545

4646
private final String index;
4747
private final boolean managedByLifecycle;
48-
private final boolean isSystemDataStream;
48+
private final boolean isInternalDataStream;
4949
@Nullable
5050
private final Long indexCreationDate;
5151
@Nullable
@@ -61,7 +61,7 @@ public class ExplainIndexDataStreamLifecycle implements Writeable, ToXContentObj
6161
public ExplainIndexDataStreamLifecycle(
6262
String index,
6363
boolean managedByLifecycle,
64-
boolean isSystemDataStream,
64+
boolean isInternalDataStream,
6565
@Nullable Long indexCreationDate,
6666
@Nullable Long rolloverDate,
6767
@Nullable TimeValue generationDate,
@@ -70,7 +70,7 @@ public ExplainIndexDataStreamLifecycle(
7070
) {
7171
this.index = index;
7272
this.managedByLifecycle = managedByLifecycle;
73-
this.isSystemDataStream = isSystemDataStream;
73+
this.isInternalDataStream = isInternalDataStream;
7474
this.indexCreationDate = indexCreationDate;
7575
this.rolloverDate = rolloverDate;
7676
this.generationDateMillis = generationDate == null ? null : generationDate.millis();
@@ -82,9 +82,9 @@ public ExplainIndexDataStreamLifecycle(StreamInput in) throws IOException {
8282
this.index = in.readString();
8383
this.managedByLifecycle = in.readBoolean();
8484
if (in.getTransportVersion().onOrAfter(TransportVersions.NO_GLOBAL_RETENTION_FOR_SYSTEM_DATA_STREAMS)) {
85-
this.isSystemDataStream = in.readBoolean();
85+
this.isInternalDataStream = in.readBoolean();
8686
} else {
87-
this.isSystemDataStream = false;
87+
this.isInternalDataStream = false;
8888
}
8989
if (managedByLifecycle) {
9090
this.indexCreationDate = in.readOptionalLong();
@@ -141,7 +141,7 @@ public XContentBuilder toXContent(
141141
}
142142
if (this.lifecycle != null) {
143143
builder.field(LIFECYCLE_FIELD.getPreferredName());
144-
lifecycle.toXContent(builder, params, rolloverConfiguration, isSystemDataStream ? null : globalRetention);
144+
lifecycle.toXContent(builder, params, rolloverConfiguration, globalRetention, isInternalDataStream);
145145
}
146146
if (this.error != null) {
147147
if (error.firstOccurrenceTimestamp() != -1L && error.recordedTimestamp() != -1L && error.retryCount() != -1) {
@@ -161,7 +161,7 @@ public void writeTo(StreamOutput out) throws IOException {
161161
out.writeString(index);
162162
out.writeBoolean(managedByLifecycle);
163163
if (out.getTransportVersion().onOrAfter(TransportVersions.NO_GLOBAL_RETENTION_FOR_SYSTEM_DATA_STREAMS)) {
164-
out.writeBoolean(isSystemDataStream);
164+
out.writeBoolean(isInternalDataStream);
165165
}
166166
if (managedByLifecycle) {
167167
out.writeOptionalLong(indexCreationDate);

server/src/main/java/org/elasticsearch/action/datastreams/lifecycle/GetDataStreamLifecycleAction.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ public static class Response extends ActionResponse implements ChunkedToXContent
143143
public record DataStreamLifecycle(
144144
String dataStreamName,
145145
@Nullable org.elasticsearch.cluster.metadata.DataStreamLifecycle lifecycle,
146-
boolean isSystemDataStream
146+
boolean isInternalDataStream
147147
) implements Writeable, ToXContentObject {
148148

149149
public static final ParseField NAME_FIELD = new ParseField("name");
@@ -162,7 +162,7 @@ public void writeTo(StreamOutput out) throws IOException {
162162
out.writeString(dataStreamName);
163163
out.writeOptionalWriteable(lifecycle);
164164
if (out.getTransportVersion().onOrAfter(TransportVersions.NO_GLOBAL_RETENTION_FOR_SYSTEM_DATA_STREAMS)) {
165-
out.writeBoolean(isSystemDataStream);
165+
out.writeBoolean(isInternalDataStream);
166166
}
167167
}
168168

@@ -189,7 +189,8 @@ public XContentBuilder toXContent(
189189
builder,
190190
org.elasticsearch.cluster.metadata.DataStreamLifecycle.addEffectiveRetentionParams(params),
191191
rolloverConfiguration,
192-
isSystemDataStream ? null : globalRetention
192+
globalRetention,
193+
isInternalDataStream
193194
);
194195
}
195196
builder.endObject();

server/src/main/java/org/elasticsearch/cluster/metadata/DataStream.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,18 @@ public boolean rolloverOnWrite() {
277277
return backingIndices.rolloverOnWrite;
278278
}
279279

280+
/**
281+
* We define that a data stream is considered internal either if it is a system index or if
282+
* its name starts with a dot.
283+
*
284+
* Note: Dot-prefixed internal data streams is a naming convention for internal data streams,
285+
* but it's not yet enforced.
286+
* @return true if it's a system index or has a dot-prefixed name.
287+
*/
288+
public boolean isInternal() {
289+
return isSystem() || name.charAt(0) == '.';
290+
}
291+
280292
/**
281293
* @param timestamp The timestamp used to select a backing index based on its start and end time.
282294
* @param metadata The metadata that is used to fetch the start and end times for backing indices of this data stream.
@@ -796,12 +808,12 @@ public List<Index> getIndicesPastRetention(
796808
) {
797809
if (lifecycle == null
798810
|| lifecycle.isEnabled() == false
799-
|| lifecycle.getEffectiveDataRetention(isSystem() ? null : globalRetention) == null) {
811+
|| lifecycle.getEffectiveDataRetention(globalRetention, isInternal()) == null) {
800812
return List.of();
801813
}
802814

803815
List<Index> indicesPastRetention = getNonWriteIndicesOlderThan(
804-
lifecycle.getEffectiveDataRetention(isSystem() ? null : globalRetention),
816+
lifecycle.getEffectiveDataRetention(globalRetention, isInternal()),
805817
indexMetadataSupplier,
806818
this::isIndexManagedByDataStreamLifecycle,
807819
nowSupplier
@@ -1202,7 +1214,7 @@ public XContentBuilder toXContent(
12021214
}
12031215
if (lifecycle != null) {
12041216
builder.field(LIFECYCLE.getPreferredName());
1205-
lifecycle.toXContent(builder, params, rolloverConfiguration, isSystem() ? null : globalRetention);
1217+
lifecycle.toXContent(builder, params, rolloverConfiguration, globalRetention, isInternal());
12061218
}
12071219
builder.field(ROLLOVER_ON_WRITE_FIELD.getPreferredName(), backingIndices.rolloverOnWrite);
12081220
if (backingIndices.autoShardingEvent != null) {

server/src/main/java/org/elasticsearch/cluster/metadata/DataStreamLifecycle.java

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ public class DataStreamLifecycle implements SimpleDiffable<DataStreamLifecycle>,
6565
DataStreamLifecycle.INCLUDE_EFFECTIVE_RETENTION_PARAM_NAME,
6666
"true"
6767
);
68+
public static final Tuple<TimeValue, RetentionSource> INFINITE_RETENTION = Tuple.tuple(null, RetentionSource.DATA_STREAM_CONFIGURATION);
6869

6970
/**
7071
* Check if {@link #DATA_STREAMS_LIFECYCLE_ONLY_SETTING_NAME} is present and set to {@code true}, indicating that
@@ -145,32 +146,37 @@ public boolean isEnabled() {
145146
}
146147

147148
/**
148-
* The least amount of time data should be kept by elasticsearch. If a caller does not want the global retention considered (for
149-
* example, when evaluating the effective retention for a system data stream or a template) then null should be given for
150-
* globalRetention.
151-
* @param globalRetention The global retention, or null if global retention does not exist or should not be applied
149+
* The least amount of time data should be kept by elasticsearch. The effective retention is a function with three parameters,
150+
* the {@link DataStreamLifecycle#dataRetention}, the global retention and whether this lifecycle is associated with an internal
151+
* data stream.
152+
* @param globalRetention The global retention, or null if global retention does not exist.
153+
* @param isInternalDataStream A flag denoting if this lifecycle is associated with an internal data stream or not
152154
* @return the time period or null, null represents that data should never be deleted.
153155
*/
154156
@Nullable
155-
public TimeValue getEffectiveDataRetention(@Nullable DataStreamGlobalRetention globalRetention) {
156-
return getEffectiveDataRetentionWithSource(globalRetention).v1();
157+
public TimeValue getEffectiveDataRetention(@Nullable DataStreamGlobalRetention globalRetention, boolean isInternalDataStream) {
158+
return getEffectiveDataRetentionWithSource(globalRetention, isInternalDataStream).v1();
157159
}
158160

159161
/**
160-
* The least amount of time data should be kept by elasticsearch. If a caller does not want the global retention considered (for
161-
* example, when evaluating the effective retention for a system data stream or a template) then null should be given for
162-
* globalRetention.
163-
* @param globalRetention The global retention, or null if global retention does not exist or should not be applied
162+
* The least amount of time data should be kept by elasticsearch.. The effective retention is a function with three parameters,
163+
* the {@link DataStreamLifecycle#dataRetention}, the global retention and whether this lifecycle is associated with an internal
164+
* data stream.
165+
* @param globalRetention The global retention, or null if global retention does not exist.
166+
* @param isInternalDataStream A flag denoting if this lifecycle is associated with an internal data stream or not
164167
* @return A tuple containing the time period or null as v1 (where null represents that data should never be deleted), and the non-null
165168
* retention source as v2.
166169
*/
167-
public Tuple<TimeValue, RetentionSource> getEffectiveDataRetentionWithSource(@Nullable DataStreamGlobalRetention globalRetention) {
170+
public Tuple<TimeValue, RetentionSource> getEffectiveDataRetentionWithSource(
171+
@Nullable DataStreamGlobalRetention globalRetention,
172+
boolean isInternalDataStream
173+
) {
168174
// If lifecycle is disabled there is no effective retention
169175
if (enabled == false) {
170-
return Tuple.tuple(null, RetentionSource.DATA_STREAM_CONFIGURATION);
176+
return INFINITE_RETENTION;
171177
}
172178
var dataStreamRetention = getDataStreamRetention();
173-
if (globalRetention == null) {
179+
if (globalRetention == null || isInternalDataStream) {
174180
return Tuple.tuple(dataStreamRetention, RetentionSource.DATA_STREAM_CONFIGURATION);
175181
}
176182
if (dataStreamRetention == null) {
@@ -187,7 +193,7 @@ public Tuple<TimeValue, RetentionSource> getEffectiveDataRetentionWithSource(@Nu
187193

188194
/**
189195
* The least amount of time data the data stream is requesting es to keep the data.
190-
* NOTE: this can be overridden by the {@link DataStreamLifecycle#getEffectiveDataRetention(DataStreamGlobalRetention)}.
196+
* NOTE: this can be overridden by the {@link DataStreamLifecycle#getEffectiveDataRetention(DataStreamGlobalRetention,boolean)}.
191197
* @return the time period or null, null represents that data should never be deleted.
192198
*/
193199
@Nullable
@@ -199,12 +205,16 @@ public TimeValue getDataStreamRetention() {
199205
* This method checks if the effective retention is matching what the user has configured; if the effective retention
200206
* does not match then it adds a warning informing the user about the effective retention and the source.
201207
*/
202-
public void addWarningHeaderIfDataRetentionNotEffective(@Nullable DataStreamGlobalRetention globalRetention) {
203-
if (globalRetention == null) {
208+
public void addWarningHeaderIfDataRetentionNotEffective(
209+
@Nullable DataStreamGlobalRetention globalRetention,
210+
boolean isInternalDataStream
211+
) {
212+
if (globalRetention == null || isInternalDataStream) {
204213
return;
205214
}
206215
Tuple<TimeValue, DataStreamLifecycle.RetentionSource> effectiveDataRetentionWithSource = getEffectiveDataRetentionWithSource(
207-
globalRetention
216+
globalRetention,
217+
isInternalDataStream
208218
);
209219
if (effectiveDataRetentionWithSource.v1() == null) {
210220
return;
@@ -318,20 +328,22 @@ public String toString() {
318328

319329
@Override
320330
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
321-
return toXContent(builder, params, null, null);
331+
return toXContent(builder, params, null, null, false);
322332
}
323333

324334
/**
325335
* Converts the data stream lifecycle to XContent, enriches it with effective retention information when requested
326336
* and injects the RolloverConditions if they exist.
327337
* In order to request the effective retention you need to set {@link #INCLUDE_EFFECTIVE_RETENTION_PARAM_NAME} to true
328338
* in the XContent params.
339+
* NOTE: this is used for serialising user output and the result is never deserialised in elasticsearch.
329340
*/
330341
public XContentBuilder toXContent(
331342
XContentBuilder builder,
332343
Params params,
333344
@Nullable RolloverConfiguration rolloverConfiguration,
334-
@Nullable DataStreamGlobalRetention globalRetention
345+
@Nullable DataStreamGlobalRetention globalRetention,
346+
boolean isInternalDataStream
335347
) throws IOException {
336348
builder.startObject();
337349
builder.field(ENABLED_FIELD.getPreferredName(), enabled);
@@ -342,11 +354,14 @@ public XContentBuilder toXContent(
342354
builder.field(DATA_RETENTION_FIELD.getPreferredName(), dataRetention.value().getStringRep());
343355
}
344356
}
357+
Tuple<TimeValue, RetentionSource> effectiveDataRetentionWithSource = getEffectiveDataRetentionWithSource(
358+
globalRetention,
359+
isInternalDataStream
360+
);
345361
if (params.paramAsBoolean(INCLUDE_EFFECTIVE_RETENTION_PARAM_NAME, false)) {
346-
Tuple<TimeValue, RetentionSource> effectiveRetention = getEffectiveDataRetentionWithSource(globalRetention);
347-
if (effectiveRetention.v1() != null) {
348-
builder.field(EFFECTIVE_RETENTION_FIELD.getPreferredName(), effectiveRetention.v1().getStringRep());
349-
builder.field(RETENTION_SOURCE_FIELD.getPreferredName(), effectiveRetention.v2().displayName());
362+
if (effectiveDataRetentionWithSource.v1() != null) {
363+
builder.field(EFFECTIVE_RETENTION_FIELD.getPreferredName(), effectiveDataRetentionWithSource.v1().getStringRep());
364+
builder.field(RETENTION_SOURCE_FIELD.getPreferredName(), effectiveDataRetentionWithSource.v2().displayName());
350365
}
351366
}
352367

@@ -356,12 +371,18 @@ public XContentBuilder toXContent(
356371
}
357372
if (rolloverConfiguration != null) {
358373
builder.field(ROLLOVER_FIELD.getPreferredName());
359-
rolloverConfiguration.evaluateAndConvertToXContent(builder, params, getEffectiveDataRetention(globalRetention));
374+
rolloverConfiguration.evaluateAndConvertToXContent(builder, params, effectiveDataRetentionWithSource.v1());
360375
}
361376
builder.endObject();
362377
return builder;
363378
}
364379

380+
/**
381+
* This method deserialises XContent format as it was generated ONLY by {@link DataStreamLifecycle#toXContent(XContentBuilder, Params)}.
382+
* It does not support the output of
383+
* {@link DataStreamLifecycle#toXContent(XContentBuilder, Params, RolloverConfiguration, DataStreamGlobalRetention, boolean)} because
384+
* this output is enriched with derived fields we do not handle in this deserialisation.
385+
*/
365386
public static DataStreamLifecycle fromXContent(XContentParser parser) throws IOException {
366387
return PARSER.parse(parser, null);
367388
}

server/src/main/java/org/elasticsearch/cluster/metadata/MetadataDataStreamsService.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -214,17 +214,15 @@ static ClusterState modifyDataStream(
214214
ClusterState updateDataLifecycle(ClusterState currentState, List<String> dataStreamNames, @Nullable DataStreamLifecycle lifecycle) {
215215
Metadata metadata = currentState.metadata();
216216
Metadata.Builder builder = Metadata.builder(metadata);
217-
boolean atLeastOneDataStreamIsNotSystem = false;
217+
boolean onlyInternalDataStreams = true;
218218
for (var dataStreamName : dataStreamNames) {
219219
var dataStream = validateDataStream(metadata, dataStreamName);
220220
builder.put(dataStream.copy().setLifecycle(lifecycle).build());
221-
atLeastOneDataStreamIsNotSystem = atLeastOneDataStreamIsNotSystem || dataStream.isSystem() == false;
221+
onlyInternalDataStreams = onlyInternalDataStreams && dataStream.isInternal();
222222
}
223223
if (lifecycle != null) {
224-
if (atLeastOneDataStreamIsNotSystem) {
225-
// We don't issue any warnings if all data streams are system data streams
226-
lifecycle.addWarningHeaderIfDataRetentionNotEffective(globalRetentionSettings.get());
227-
}
224+
// We don't issue any warnings if all data streams are internal data streams
225+
lifecycle.addWarningHeaderIfDataRetentionNotEffective(globalRetentionSettings.get(), onlyInternalDataStreams);
228226
}
229227
return ClusterState.builder(currentState).metadata(builder.build()).build();
230228
}

0 commit comments

Comments
 (0)