Skip to content

Commit c4c2ce8

Browse files
authored
Downsampling supports date_histogram with tz (#103511)
* Downsampling supports date_histogram with tz This comes with caveats, for downsampled indexes at intervals more than 15 minutes. For instance, - 1-hour downsampling will produce inaccurate results for 1-hour histograms on timezones shifted by XX:30 - 1-day downsampling will produce inaccurate daily histograms for not-UTC timezones as it tracks days at UTC. Related to #101309 * Update docs/changelog/103511.yaml * test daylight savings * update documentation * Offset time buckets over downsampled data with TZ * Update docs/changelog/103511.yaml * check for TSDS * fixme for transport version * add interval to index metadata * add transport version * bump up transport version * address feedbcak * spotless fix
1 parent a0cf690 commit c4c2ce8

File tree

18 files changed

+679
-56
lines changed

18 files changed

+679
-56
lines changed

docs/changelog/103511.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 103511
2+
summary: Downsampling supports `date_histogram` with tz
3+
area: Downsampling
4+
type: bug
5+
issues:
6+
- 101309

docs/reference/data-streams/downsampling.asciidoc

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,29 @@ downsampled.
135135
* For
136136
<<search-aggregations-bucket-datehistogram-aggregation,date histogram aggregations>>,
137137
only `fixed_intervals` (and not calendar-aware intervals) are supported.
138-
* Only Coordinated Universal Time (UTC) date-times are supported.
138+
* Timezone support comes with caveats:
139+
140+
** Date histograms at intervals that are multiples of an hour are based on
141+
values generated at UTC. This works well for timezones that are on the hour, e.g.
142+
+5:00 or -3:00, but requires offsetting the reported time buckets, e.g.
143+
`2020-01-01T10:30:00.000` instead of `2020-03-07T10:00:00.000` for
144+
timezone +5:30 (India), if downsampling aggregates values per hour. In this case,
145+
the results include the field `downsampled_results_offset: true`, to indicate that
146+
the time buckets are shifted. This can be avoided if a downsampling interval of 15
147+
minutes is used, as it allows properly calculating hourly values for the shifted
148+
buckets.
149+
150+
** Date histograms at intervals that are multiples of a day are similarly
151+
affected, in case downsampling aggregates values per day. In this case, the
152+
beginning of each day is always calculated at UTC when generated the downsampled
153+
values, so the time buckets need to be shifted, e.g. reported as
154+
`2020-03-07T19:00:00.000` instead of `2020-03-07T00:00:00.000` for timezone `America/New_York`.
155+
The field `downsampled_results_offset: true` is added in this case too.
156+
157+
** Daylight savings and similar peculiarities around timezones affect
158+
reported results, as <<datehistogram-aggregation-time-zone,documented>>
159+
for date histogram aggregation. Besides, downsampling at daily interval
160+
hinders tracking any information related to daylight savings changes.
139161

140162
[discrete]
141163
[[downsampling-restrictions]]

server/src/main/java/org/elasticsearch/TransportVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ static TransportVersion def(int id) {
183183
public static final TransportVersion HOT_THREADS_AS_BYTES = def(8_571_00_0);
184184
public static final TransportVersion ML_INFERENCE_REQUEST_INPUT_TYPE_ADDED = def(8_572_00_0);
185185
public static final TransportVersion ESQL_ENRICH_POLICY_CCQ_MODE = def(8_573_00_0);
186+
public static final TransportVersion DATE_HISTOGRAM_SUPPORT_DOWNSAMPLED_TZ = def(8_574_00_0);
186187

187188
/*
188189
* STOP! READ THIS FIRST! No, really,

server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
import org.elasticsearch.cluster.routing.allocation.IndexMetadataUpdater;
2727
import org.elasticsearch.cluster.routing.allocation.decider.DiskThresholdDecider;
2828
import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider;
29-
import org.elasticsearch.common.Strings;
3029
import org.elasticsearch.common.collect.ImmutableOpenMap;
3130
import org.elasticsearch.common.compress.CompressedXContent;
3231
import org.elasticsearch.common.io.stream.StreamInput;
@@ -138,14 +137,9 @@ public class IndexMetadata implements Diffable<IndexMetadata>, ToXContentFragmen
138137
EnumSet.of(ClusterBlockLevel.WRITE)
139138
);
140139

141-
// TODO: refactor this method after adding more downsampling metadata
142-
public boolean isDownsampledIndex() {
143-
final String sourceIndex = settings.get(IndexMetadata.INDEX_DOWNSAMPLE_SOURCE_NAME_KEY);
144-
final String indexDownsamplingStatus = settings.get(IndexMetadata.INDEX_DOWNSAMPLE_STATUS_KEY);
145-
final boolean downsamplingSuccess = DownsampleTaskStatus.SUCCESS.name()
146-
.toLowerCase(Locale.ROOT)
147-
.equals(indexDownsamplingStatus != null ? indexDownsamplingStatus.toLowerCase(Locale.ROOT) : DownsampleTaskStatus.UNKNOWN);
148-
return Strings.isNullOrEmpty(sourceIndex) == false && downsamplingSuccess;
140+
@Nullable
141+
public String getDownsamplingInterval() {
142+
return settings.get(IndexMetadata.INDEX_DOWNSAMPLE_INTERVAL_KEY);
149143
}
150144

151145
public enum State implements Writeable {
@@ -1235,6 +1229,7 @@ public Index getResizeSourceIndex() {
12351229
public static final String INDEX_DOWNSAMPLE_ORIGIN_UUID_KEY = "index.downsample.origin.uuid";
12361230

12371231
public static final String INDEX_DOWNSAMPLE_STATUS_KEY = "index.downsample.status";
1232+
public static final String INDEX_DOWNSAMPLE_INTERVAL_KEY = "index.downsample.interval";
12381233
public static final Setting<String> INDEX_DOWNSAMPLE_SOURCE_UUID = Setting.simpleString(
12391234
INDEX_DOWNSAMPLE_SOURCE_UUID_KEY,
12401235
Property.IndexScope,
@@ -1277,6 +1272,14 @@ public String toString() {
12771272
Property.InternalIndex
12781273
);
12791274

1275+
public static final Setting<String> INDEX_DOWNSAMPLE_INTERVAL = Setting.simpleString(
1276+
INDEX_DOWNSAMPLE_INTERVAL_KEY,
1277+
"",
1278+
Property.IndexScope,
1279+
Property.InternalIndex,
1280+
Property.PrivateIndex
1281+
);
1282+
12801283
// LIFECYCLE_NAME is here an as optimization, see LifecycleSettings.LIFECYCLE_NAME and
12811284
// LifecycleSettings.LIFECYCLE_NAME_SETTING for the 'real' version
12821285
public static final String LIFECYCLE_NAME = "index.lifecycle.name";

server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
7676
IndexMetadata.INDEX_DOWNSAMPLE_ORIGIN_NAME,
7777
IndexMetadata.INDEX_DOWNSAMPLE_ORIGIN_UUID,
7878
IndexMetadata.INDEX_DOWNSAMPLE_STATUS,
79+
IndexMetadata.INDEX_DOWNSAMPLE_INTERVAL,
7980
SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_FETCH_DEBUG_SETTING,
8081
SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_FETCH_WARN_SETTING,
8182
SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_FETCH_INFO_SETTING,

server/src/main/java/org/elasticsearch/index/query/QueryRewriteContext.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,13 @@ public IndexSettings getIndexSettings() {
285285
return indexSettings;
286286
}
287287

288+
/**
289+
* Returns the MappingLookup for the queried index.
290+
*/
291+
public MappingLookup getMappingLookup() {
292+
return mappingLookup;
293+
}
294+
288295
/**
289296
* Given an index pattern, checks whether it matches against the current shard. The pattern
290297
* may represent a fully qualified index name if the search targets remote shards.

server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.elasticsearch.common.io.stream.StreamInput;
1515
import org.elasticsearch.common.io.stream.StreamOutput;
1616
import org.elasticsearch.core.TimeValue;
17+
import org.elasticsearch.index.mapper.DataStreamTimestampFieldMapper;
1718
import org.elasticsearch.search.aggregations.AggregationBuilder;
1819
import org.elasticsearch.search.aggregations.AggregatorFactories;
1920
import org.elasticsearch.search.aggregations.AggregatorFactory;
@@ -36,6 +37,7 @@
3637
import java.util.List;
3738
import java.util.Map;
3839
import java.util.Objects;
40+
import java.util.SimpleTimeZone;
3941
import java.util.function.Consumer;
4042

4143
import static java.util.Map.entry;
@@ -406,23 +408,46 @@ protected ValuesSourceAggregatorFactory innerBuild(
406408
) throws IOException {
407409
final DateIntervalWrapper.IntervalTypeEnum dateHistogramIntervalType = dateHistogramInterval.getIntervalType();
408410

409-
if (context.getIndexSettings().getIndexMetadata().isDownsampledIndex()
410-
&& DateIntervalWrapper.IntervalTypeEnum.CALENDAR.equals(dateHistogramIntervalType)) {
411-
throw new IllegalArgumentException(
412-
config.getDescription()
413-
+ " is not supported for aggregation ["
414-
+ getName()
415-
+ "] with interval type ["
416-
+ dateHistogramIntervalType.getPreferredName()
417-
+ "]"
418-
);
419-
}
420-
411+
boolean downsampledResultsOffset = false;
421412
final ZoneId tz = timeZone();
422-
if (context.getIndexSettings().getIndexMetadata().isDownsampledIndex() && tz != null && ZoneId.of("UTC").equals(tz) == false) {
423-
throw new IllegalArgumentException(
424-
config.getDescription() + " is not supported for aggregation [" + getName() + "] with timezone [" + tz + "]"
425-
);
413+
414+
String downsamplingInterval = context.getIndexSettings().getIndexMetadata().getDownsamplingInterval();
415+
if (downsamplingInterval != null) {
416+
if (DateIntervalWrapper.IntervalTypeEnum.CALENDAR.equals(dateHistogramIntervalType)) {
417+
throw new IllegalArgumentException(
418+
config.getDescription()
419+
+ " is not supported for aggregation ["
420+
+ getName()
421+
+ "] with interval type ["
422+
+ dateHistogramIntervalType.getPreferredName()
423+
+ "]"
424+
);
425+
}
426+
427+
// Downsampled data in time-series indexes contain aggregated values that get calculated over UTC-based intervals.
428+
// When they get aggregated using a different timezone, the resulting buckets may need to be offset to account for
429+
// the difference between UTC (where stored data refers to) and the requested timezone. For instance:
430+
// a. A TZ shifted by -01:15 over hourly downsampled data will lead to buckets with times XX:45, instead of XX:00
431+
// b. A TZ shifted by +07:00 over daily downsampled data will lead to buckets with times 07:00, instead of 00:00
432+
// c. Intervals over DST are approximate, not including gaps in time buckets. This applies to date histogram aggregation in
433+
// general.
434+
if (tz != null && ZoneId.of("UTC").equals(tz) == false && field().equals(DataStreamTimestampFieldMapper.DEFAULT_PATH)) {
435+
436+
// Get the downsampling interval.
437+
DateHistogramInterval interval = new DateHistogramInterval(downsamplingInterval);
438+
long downsamplingResolution = interval.estimateMillis();
439+
long aggregationResolution = dateHistogramInterval.getAsFixedInterval().estimateMillis();
440+
441+
// If the aggregation resolution is not a multiple of the downsampling resolution, the reported time for each
442+
// bucket needs to be shifted by the mod - in addition to rounding that's applied as usual.
443+
// Note that the aggregation resolution gets shifted to match the specified timezone. Timezone.getOffset() normally expects
444+
// a date but it can also process an offset (interval) in milliseconds as it uses the Unix epoch for reference.
445+
long aggregationOffset = SimpleTimeZone.getTimeZone(tz).getOffset(aggregationResolution) % downsamplingResolution;
446+
if (aggregationOffset != 0) {
447+
downsampledResultsOffset = true;
448+
offset += aggregationOffset;
449+
}
450+
}
426451
}
427452

428453
DateHistogramAggregationSupplier aggregatorSupplier = context.getValuesSourceRegistry().getAggregator(REGISTRY_KEY, config);
@@ -473,6 +498,7 @@ protected ValuesSourceAggregatorFactory innerBuild(
473498
order,
474499
keyed,
475500
minDocCount,
501+
downsampledResultsOffset,
476502
rounding,
477503
roundedBounds,
478504
roundedHardBounds,

server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationSupplier.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Aggregator build(
2929
BucketOrder order,
3030
boolean keyed,
3131
long minDocCount,
32+
boolean downsampledResultsOffset,
3233
@Nullable LongBounds extendedBounds,
3334
@Nullable LongBounds hardBounds,
3435
ValuesSourceConfig valuesSourceConfig,

server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ public static Aggregator build(
7979
BucketOrder order,
8080
boolean keyed,
8181
long minDocCount,
82+
boolean downsampledResultsOffset,
8283
@Nullable LongBounds extendedBounds,
8384
@Nullable LongBounds hardBounds,
8485
ValuesSourceConfig valuesSourceConfig,
@@ -96,6 +97,7 @@ public static Aggregator build(
9697
order,
9798
keyed,
9899
minDocCount,
100+
downsampledResultsOffset,
99101
extendedBounds,
100102
hardBounds,
101103
valuesSourceConfig,
@@ -115,6 +117,7 @@ public static Aggregator build(
115117
order,
116118
keyed,
117119
minDocCount,
120+
downsampledResultsOffset,
118121
extendedBounds,
119122
hardBounds,
120123
valuesSourceConfig,
@@ -133,6 +136,7 @@ private static FromDateRange adaptIntoRangeOrNull(
133136
BucketOrder order,
134137
boolean keyed,
135138
long minDocCount,
139+
boolean downsampledResultsOffset,
136140
@Nullable LongBounds extendedBounds,
137141
@Nullable LongBounds hardBounds,
138142
ValuesSourceConfig valuesSourceConfig,
@@ -191,6 +195,7 @@ private static FromDateRange adaptIntoRangeOrNull(
191195
minDocCount,
192196
extendedBounds,
193197
keyed,
198+
downsampledResultsOffset,
194199
fixedRoundingPoints
195200
);
196201
}
@@ -227,6 +232,7 @@ private static RangeAggregator.Range[] ranges(LongBounds hardBounds, long[] fixe
227232
private final boolean keyed;
228233

229234
private final long minDocCount;
235+
private final boolean downsampledResultsOffset;
230236
private final LongBounds extendedBounds;
231237
private final LongBounds hardBounds;
232238

@@ -240,6 +246,7 @@ private static RangeAggregator.Range[] ranges(LongBounds hardBounds, long[] fixe
240246
BucketOrder order,
241247
boolean keyed,
242248
long minDocCount,
249+
boolean downsampledResultsOffset,
243250
@Nullable LongBounds extendedBounds,
244251
@Nullable LongBounds hardBounds,
245252
ValuesSourceConfig valuesSourceConfig,
@@ -255,6 +262,7 @@ private static RangeAggregator.Range[] ranges(LongBounds hardBounds, long[] fixe
255262
order.validate(this);
256263
this.keyed = keyed;
257264
this.minDocCount = minDocCount;
265+
this.downsampledResultsOffset = downsampledResultsOffset;
258266
this.extendedBounds = extendedBounds;
259267
this.hardBounds = hardBounds;
260268
// TODO: Stop using null here
@@ -328,6 +336,7 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I
328336
emptyBucketInfo,
329337
formatter,
330338
keyed,
339+
downsampledResultsOffset,
331340
metadata()
332341
);
333342
});
@@ -347,6 +356,7 @@ public InternalAggregation buildEmptyAggregation() {
347356
emptyBucketInfo,
348357
formatter,
349358
keyed,
359+
downsampledResultsOffset,
350360
metadata()
351361
);
352362
}
@@ -392,6 +402,7 @@ static class FromDateRange extends AdaptingAggregator implements SizedBucketAggr
392402
private final long minDocCount;
393403
private final LongBounds extendedBounds;
394404
private final boolean keyed;
405+
private final boolean downsampledResultsOffset;
395406
private final long[] fixedRoundingPoints;
396407

397408
FromDateRange(
@@ -405,6 +416,7 @@ static class FromDateRange extends AdaptingAggregator implements SizedBucketAggr
405416
long minDocCount,
406417
LongBounds extendedBounds,
407418
boolean keyed,
419+
boolean downsampledResultsOffset,
408420
long[] fixedRoundingPoints
409421
) throws IOException {
410422
super(parent, subAggregators, delegate);
@@ -416,6 +428,7 @@ static class FromDateRange extends AdaptingAggregator implements SizedBucketAggr
416428
this.minDocCount = minDocCount;
417429
this.extendedBounds = extendedBounds;
418430
this.keyed = keyed;
431+
this.downsampledResultsOffset = downsampledResultsOffset;
419432
this.fixedRoundingPoints = fixedRoundingPoints;
420433
}
421434

@@ -454,6 +467,7 @@ protected InternalAggregation adapt(InternalAggregation delegateResult) {
454467
emptyBucketInfo,
455468
format,
456469
keyed,
470+
downsampledResultsOffset,
457471
range.getMetadata()
458472
);
459473
}

server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ public static void registerAggregators(ValuesSourceRegistry.Builder builder) {
5252
order,
5353
keyed,
5454
minDocCount,
55+
downsampledResultsOffset,
5556
extendedBounds,
5657
hardBounds,
5758
valuesSourceConfig,
@@ -71,6 +72,7 @@ public static void registerAggregators(ValuesSourceRegistry.Builder builder) {
7172
order,
7273
keyed,
7374
minDocCount,
75+
downsampledResultsOffset,
7476
extendedBounds,
7577
hardBounds,
7678
valuesSourceConfig,
@@ -88,6 +90,7 @@ public static void registerAggregators(ValuesSourceRegistry.Builder builder) {
8890
private final BucketOrder order;
8991
private final boolean keyed;
9092
private final long minDocCount;
93+
private final boolean downsampledResultsOffset;
9194
private final LongBounds extendedBounds;
9295
private final LongBounds hardBounds;
9396
private final Rounding rounding;
@@ -98,6 +101,7 @@ public DateHistogramAggregatorFactory(
98101
BucketOrder order,
99102
boolean keyed,
100103
long minDocCount,
104+
boolean downsampledResultsOffset,
101105
Rounding rounding,
102106
LongBounds extendedBounds,
103107
LongBounds hardBounds,
@@ -111,6 +115,7 @@ public DateHistogramAggregatorFactory(
111115
this.aggregatorSupplier = aggregationSupplier;
112116
this.order = order;
113117
this.keyed = keyed;
118+
this.downsampledResultsOffset = downsampledResultsOffset;
114119
this.minDocCount = minDocCount;
115120
this.extendedBounds = extendedBounds;
116121
this.hardBounds = hardBounds;
@@ -139,6 +144,7 @@ protected Aggregator doCreateInternal(Aggregator parent, CardinalityUpperBound c
139144
order,
140145
keyed,
141146
minDocCount,
147+
downsampledResultsOffset,
142148
extendedBounds,
143149
hardBounds,
144150
config,
@@ -159,6 +165,7 @@ protected Aggregator createUnmapped(Aggregator parent, Map<String, Object> metad
159165
order,
160166
keyed,
161167
minDocCount,
168+
downsampledResultsOffset,
162169
extendedBounds,
163170
hardBounds,
164171
config,

0 commit comments

Comments
 (0)