|
| 1 | +/* |
| 2 | + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one |
| 3 | + * or more contributor license agreements. Licensed under the Elastic License |
| 4 | + * 2.0; you may not use this file except in compliance with the Elastic License |
| 5 | + * 2.0. |
| 6 | + */ |
| 7 | + |
| 8 | +package org.elasticsearch.xpack.esql.optimizer.rules.logical.promql; |
| 9 | + |
| 10 | +import org.elasticsearch.xpack.esql.core.expression.Attribute; |
| 11 | +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; |
| 12 | +import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; |
| 13 | + |
| 14 | +import java.util.ArrayList; |
| 15 | +import java.util.HashSet; |
| 16 | +import java.util.LinkedHashSet; |
| 17 | +import java.util.List; |
| 18 | +import java.util.Set; |
| 19 | + |
| 20 | +public final class LabelSetSpec { |
| 21 | + /** Labels known to be visible so far. */ |
| 22 | + private final List<Attribute> declaredLabels; |
| 23 | + |
| 24 | + /** |
| 25 | + * Labels removed by WITHOUT so far. |
| 26 | + */ |
| 27 | + private final List<Attribute> excludedLabels; |
| 28 | + |
| 29 | + /** |
| 30 | + * Labels requested by the current BY shape. |
| 31 | + * This can be wider than declaredLabels because BY labels missing from visible output |
| 32 | + * must still be preserved in the final output as null-filled attrs. |
| 33 | + */ |
| 34 | + private final List<Attribute> byDeclaredLabels; |
| 35 | + |
| 36 | + /** |
| 37 | + * Concrete labels to resolve against at apply() time. |
| 38 | + * Empty means "not bound yet"; apply() then falls back to declaredLabels. |
| 39 | + */ |
| 40 | + private final List<Attribute> availableLabels; |
| 41 | + |
| 42 | + /** |
| 43 | + * Exclusions inherited from parent demand. |
| 44 | + * Used only by apply(), mainly for the innermost aggregate, to build TimeSeriesWithout. |
| 45 | + */ |
| 46 | + private final List<Attribute> inheritedExcludedLabels; |
| 47 | + |
| 48 | + private LabelSetSpec( |
| 49 | + List<Attribute> declaredLabels, |
| 50 | + List<Attribute> excludedLabels, |
| 51 | + List<Attribute> byDeclaredLabels, |
| 52 | + List<Attribute> availableLabels, |
| 53 | + List<Attribute> inheritedExcludedLabels |
| 54 | + ) { |
| 55 | + this.declaredLabels = List.copyOf(declaredLabels); |
| 56 | + this.excludedLabels = List.copyOf(excludedLabels); |
| 57 | + this.byDeclaredLabels = List.copyOf(byDeclaredLabels); |
| 58 | + this.availableLabels = List.copyOf(availableLabels); |
| 59 | + this.inheritedExcludedLabels = List.copyOf(inheritedExcludedLabels); |
| 60 | + } |
| 61 | + |
| 62 | + /** |
| 63 | + * Build an exact spec from known labels. |
| 64 | + */ |
| 65 | + static LabelSetSpec of(List<Attribute> labels) { |
| 66 | + return of(labels, List.of()); |
| 67 | + } |
| 68 | + |
| 69 | + /** |
| 70 | + * Build an exact spec and carry inherited exclusions. |
| 71 | + * Used when BY must preserve a parent WITHOUT so the innermost aggregate can |
| 72 | + * emit TimeSeriesWithout with the full exclusion set. |
| 73 | + */ |
| 74 | + static LabelSetSpec of(List<Attribute> declared, List<Attribute> excluded) { |
| 75 | + return new LabelSetSpec(declared, unionByFieldName(List.of(), excluded), declared, List.of(), List.of()); |
| 76 | + } |
| 77 | + |
| 78 | + /** |
| 79 | + * Apply BY semantics. |
| 80 | + * Keep only labels visible from the input, but remember the full BY list so apply() |
| 81 | + * can report missing BY labels for later null synthesis. |
| 82 | + */ |
| 83 | + static LabelSetSpec by(LabelSetSpec input, List<Attribute> labels) { |
| 84 | + return new LabelSetSpec(intersection(input.declared(), labels), List.of(), labels, List.of(), List.of()); |
| 85 | + } |
| 86 | + |
| 87 | + /** |
| 88 | + * Apply WITHOUT semantics. |
| 89 | + * Drop visible labels now and accumulate exclusions for later TimeSeriesWithout synthesis. |
| 90 | + */ |
| 91 | + static LabelSetSpec without(LabelSetSpec input, List<Attribute> excluded) { |
| 92 | + List<Attribute> accumulated = unionByFieldName(input.excluded(), excluded); |
| 93 | + List<Attribute> visible = difference(input.declared(), excluded); |
| 94 | + return new LabelSetSpec(visible, accumulated, visible, List.of(), List.of()); |
| 95 | + } |
| 96 | + |
| 97 | + /** |
| 98 | + * Clamp declared labels to what is available. |
| 99 | + * Empty availability means `trust the input as-is`. |
| 100 | + */ |
| 101 | + static LabelSetSpec intersectWithLabels(LabelSetSpec input, List<Attribute> available) { |
| 102 | + if (available.isEmpty()) { |
| 103 | + return input; |
| 104 | + } |
| 105 | + List<Attribute> visible = intersection(input.declared(), available); |
| 106 | + return new LabelSetSpec(visible, List.of(), visible, List.of(), List.of()); |
| 107 | + } |
| 108 | + |
| 109 | + /** |
| 110 | + * Empty spec. |
| 111 | + */ |
| 112 | + static LabelSetSpec none() { |
| 113 | + return new LabelSetSpec(List.of(), List.of(), List.of(), List.of(), List.of()); |
| 114 | + } |
| 115 | + |
| 116 | + /** |
| 117 | + * Name-based set difference. |
| 118 | + */ |
| 119 | + static List<Attribute> difference(List<Attribute> from, List<Attribute> toRemove) { |
| 120 | + Set<String> removeNames = new HashSet<>(); |
| 121 | + for (Attribute attr : toRemove) { |
| 122 | + removeNames.add(fieldName(attr)); |
| 123 | + } |
| 124 | + List<Attribute> result = new ArrayList<>(); |
| 125 | + for (Attribute attr : from) { |
| 126 | + if (removeNames.contains(fieldName(attr)) == false) { |
| 127 | + result.add(attr); |
| 128 | + } |
| 129 | + } |
| 130 | + return result; |
| 131 | + } |
| 132 | + |
| 133 | + /** |
| 134 | + * Name-based set intersection. |
| 135 | + */ |
| 136 | + static List<Attribute> intersection(List<Attribute> requested, List<Attribute> available) { |
| 137 | + Set<String> availableNames = new HashSet<>(); |
| 138 | + for (Attribute attr : available) { |
| 139 | + availableNames.add(fieldName(attr)); |
| 140 | + } |
| 141 | + List<Attribute> result = new ArrayList<>(); |
| 142 | + for (Attribute attr : requested) { |
| 143 | + if (availableNames.contains(fieldName(attr))) { |
| 144 | + result.add(attr); |
| 145 | + } |
| 146 | + } |
| 147 | + return result; |
| 148 | + } |
| 149 | + |
| 150 | + /** |
| 151 | + * Resolve requested labels against visible output. |
| 152 | + * Match by identity first, then by field name. |
| 153 | + */ |
| 154 | + static List<Attribute> resolveLabels(List<Attribute> requested, List<Attribute> visibleOutput) { |
| 155 | + List<Attribute> resolved = new ArrayList<>(); |
| 156 | + for (Attribute attribute : requested) { |
| 157 | + if (visibleOutput.contains(attribute)) { |
| 158 | + resolved.add(attribute); |
| 159 | + continue; |
| 160 | + } |
| 161 | + Attribute byName = findAttributeByFieldName(visibleOutput, fieldName(attribute)); |
| 162 | + if (byName != null) { |
| 163 | + resolved.add(byName); |
| 164 | + } |
| 165 | + } |
| 166 | + return resolved; |
| 167 | + } |
| 168 | + |
| 169 | + static Attribute findAttributeByFieldName(List<Attribute> attributes, String fieldNameToFind) { |
| 170 | + for (Attribute attribute : attributes) { |
| 171 | + if (fieldName(attribute).equals(fieldNameToFind)) { |
| 172 | + return attribute; |
| 173 | + } |
| 174 | + } |
| 175 | + return null; |
| 176 | + } |
| 177 | + |
| 178 | + /** |
| 179 | + * Canonical name used by label algebra. |
| 180 | + * FieldAttribute uses fieldName(); everything else falls back to name(). |
| 181 | + */ |
| 182 | + static String fieldName(Attribute attr) { |
| 183 | + if (attr instanceof FieldAttribute fieldAttr) { |
| 184 | + return fieldAttr.fieldName().string(); |
| 185 | + } |
| 186 | + return attr.name(); |
| 187 | + } |
| 188 | + |
| 189 | + /** |
| 190 | + * Name-based union preserving first occurrence order. |
| 191 | + */ |
| 192 | + static List<Attribute> unionByFieldName(List<Attribute> first, List<Attribute> second) { |
| 193 | + List<Attribute> result = new ArrayList<>(); |
| 194 | + Set<String> seen = new LinkedHashSet<>(); |
| 195 | + addUniqueByFieldName(result, seen, first); |
| 196 | + addUniqueByFieldName(result, seen, second); |
| 197 | + return result; |
| 198 | + } |
| 199 | + |
| 200 | + private static void addUniqueByFieldName(List<Attribute> result, Set<String> seen, List<Attribute> attrs) { |
| 201 | + for (Attribute attr : attrs) { |
| 202 | + if (seen.add(fieldName(attr))) { |
| 203 | + result.add(attr); |
| 204 | + } |
| 205 | + } |
| 206 | + } |
| 207 | + |
| 208 | + /** |
| 209 | + * Keep only dimension attrs, deduped by field name. |
| 210 | + * Only these can feed TimeSeriesWithout. |
| 211 | + */ |
| 212 | + static List<Attribute> dimensionAttributes(List<Attribute> attrs) { |
| 213 | + List<Attribute> result = new ArrayList<>(); |
| 214 | + Set<String> seen = new LinkedHashSet<>(); |
| 215 | + for (Attribute attr : attrs) { |
| 216 | + if (attr instanceof FieldAttribute fa && fa.isDimension() && seen.add(fieldName(attr))) { |
| 217 | + result.add(attr); |
| 218 | + } |
| 219 | + } |
| 220 | + return result; |
| 221 | + } |
| 222 | + |
| 223 | + /** |
| 224 | + * Bind this spec to visible child output. |
| 225 | + * Used by outer aggregation before apply(). |
| 226 | + */ |
| 227 | + public LabelSetSpec withIncluded(List<Attribute> includedLabels) { |
| 228 | + return new LabelSetSpec(declaredLabels, excludedLabels, byDeclaredLabels, includedLabels, List.of()); |
| 229 | + } |
| 230 | + |
| 231 | + /** |
| 232 | + * Bind inherited exclusions and self-resolve against declaredLabels. |
| 233 | + * Used by the innermost aggregate, where no child aggregate output exists yet. |
| 234 | + */ |
| 235 | + public LabelSetSpec withExcluded(List<Attribute> excludedLabels) { |
| 236 | + return new LabelSetSpec(declaredLabels, this.excludedLabels, byDeclaredLabels, declaredLabels, excludedLabels); |
| 237 | + } |
| 238 | + |
| 239 | + /** |
| 240 | + * Finalize the deferred spec into concrete aggregate shape. |
| 241 | + * |
| 242 | + * Result contract: |
| 243 | + * - includedGroupings: actual grouping keys |
| 244 | + * - matchedAttributes: visible output attrs that must survive but are not keys |
| 245 | + * - missingAttributes: BY labels absent from visible output; caller null-fills them |
| 246 | + * - excludedGroupings: concrete dimension exclusions for TimeSeriesWithout |
| 247 | + */ |
| 248 | + public LabelSet apply() { |
| 249 | + List<Attribute> available = availableLabels.isEmpty() ? declaredLabels : availableLabels; |
| 250 | + |
| 251 | + Attribute ts = findAttributeByFieldName(available, MetadataAttribute.TIMESERIES); |
| 252 | + List<Attribute> resolved = resolveLabels(byDeclaredLabels, available); |
| 253 | + List<Attribute> missing = difference(byDeclaredLabels, resolved); |
| 254 | + List<Attribute> excludedDimensions = dimensionAttributes(unionByFieldName(excludedLabels, inheritedExcludedLabels)); |
| 255 | + |
| 256 | + if (ts != null) { |
| 257 | + List<Attribute> attrs = new ArrayList<>(); |
| 258 | + for (Attribute attr : resolved) { |
| 259 | + if (MetadataAttribute.isTimeSeriesAttributeName(attr.name()) == false) { |
| 260 | + attrs.add(attr); |
| 261 | + } |
| 262 | + } |
| 263 | + return new LabelSet(List.of(ts), attrs, missing, excludedDimensions); |
| 264 | + } |
| 265 | + |
| 266 | + return new LabelSet(resolved, List.of(), missing, excludedDimensions); |
| 267 | + } |
| 268 | + |
| 269 | + /** |
| 270 | + * Labels currently known to exist. |
| 271 | + * This is the visible label domain before final apply(). |
| 272 | + */ |
| 273 | + public List<Attribute> declared() { |
| 274 | + return declaredLabels; |
| 275 | + } |
| 276 | + |
| 277 | + /** |
| 278 | + * Labels known to be excluded by WITHOUT. |
| 279 | + * Empty means the spec is exact. |
| 280 | + */ |
| 281 | + public List<Attribute> excluded() { |
| 282 | + return excludedLabels; |
| 283 | + } |
| 284 | + |
| 285 | + /** |
| 286 | + * Concrete aggregate shape produced by apply(). |
| 287 | + * Translator code consumes this directly. |
| 288 | + */ |
| 289 | + public record LabelSet( |
| 290 | + /** |
| 291 | + * Concrete grouping keys for the aggregate. |
| 292 | + * |
| 293 | + * If _timeseries is present, it is the grouping key and concrete label attrs |
| 294 | + * move to matchedAttributes. Otherwise these are ordinary concrete label keys. |
| 295 | + */ |
| 296 | + List<Attribute> includedGroupings, |
| 297 | + |
| 298 | + /** |
| 299 | + * Visible label attrs that belong in the output shape but are not grouping keys. |
| 300 | + * |
| 301 | + * Outer aggregation carries these through pack/unpack so they survive grouping |
| 302 | + * without becoming split keys. |
| 303 | + */ |
| 304 | + List<Attribute> matchedAttributes, |
| 305 | + |
| 306 | + /** |
| 307 | + * BY labels requested by the spec but absent from visible output. |
| 308 | + * |
| 309 | + * Caller turns these into null aliases so PromQL BY preserves declared labels |
| 310 | + * in the final output even when they are not visible in the child. |
| 311 | + */ |
| 312 | + List<Attribute> missingAttributes, |
| 313 | + |
| 314 | + /** |
| 315 | + * Concrete dimension exclusions accumulated from WITHOUT. |
| 316 | + * |
| 317 | + * Innermost aggregation feeds these into TimeSeriesWithout when timeseries |
| 318 | + * grouping is needed. |
| 319 | + */ |
| 320 | + List<Attribute> excludedGroupings |
| 321 | + ) {} |
| 322 | +} |
0 commit comments