Skip to content

Commit b64f3ff

Browse files
committed
refactor promql translator path
1 parent 6bf5b67 commit b64f3ff

File tree

6 files changed

+1196
-874
lines changed

6 files changed

+1196
-874
lines changed
Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.optimizer.rules.logical.promql;
9+
10+
import org.elasticsearch.xpack.esql.core.expression.Attribute;
11+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
12+
import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute;
13+
14+
import java.util.ArrayList;
15+
import java.util.HashSet;
16+
import java.util.LinkedHashSet;
17+
import java.util.List;
18+
import java.util.Set;
19+
20+
public final class LabelSetSpec {
21+
/** Labels known to be visible so far. */
22+
private final List<Attribute> declaredLabels;
23+
24+
/**
25+
* Labels removed by WITHOUT so far.
26+
*/
27+
private final List<Attribute> excludedLabels;
28+
29+
/**
30+
* Labels requested by the current BY shape.
31+
* This can be wider than declaredLabels because BY labels missing from visible output
32+
* must still be preserved in the final output as null-filled attrs.
33+
*/
34+
private final List<Attribute> byDeclaredLabels;
35+
36+
/**
37+
* Concrete labels to resolve against at apply() time.
38+
* Empty means "not bound yet"; apply() then falls back to declaredLabels.
39+
*/
40+
private final List<Attribute> availableLabels;
41+
42+
/**
43+
* Exclusions inherited from parent demand.
44+
* Used only by apply(), mainly for the innermost aggregate, to build TimeSeriesWithout.
45+
*/
46+
private final List<Attribute> inheritedExcludedLabels;
47+
48+
private LabelSetSpec(
49+
List<Attribute> declaredLabels,
50+
List<Attribute> excludedLabels,
51+
List<Attribute> byDeclaredLabels,
52+
List<Attribute> availableLabels,
53+
List<Attribute> inheritedExcludedLabels
54+
) {
55+
this.declaredLabels = List.copyOf(declaredLabels);
56+
this.excludedLabels = List.copyOf(excludedLabels);
57+
this.byDeclaredLabels = List.copyOf(byDeclaredLabels);
58+
this.availableLabels = List.copyOf(availableLabels);
59+
this.inheritedExcludedLabels = List.copyOf(inheritedExcludedLabels);
60+
}
61+
62+
/**
63+
* Build an exact spec from known labels.
64+
*/
65+
static LabelSetSpec of(List<Attribute> labels) {
66+
return of(labels, List.of());
67+
}
68+
69+
/**
70+
* Build an exact spec and carry inherited exclusions.
71+
* Used when BY must preserve a parent WITHOUT so the innermost aggregate can
72+
* emit TimeSeriesWithout with the full exclusion set.
73+
*/
74+
static LabelSetSpec of(List<Attribute> declared, List<Attribute> excluded) {
75+
return new LabelSetSpec(declared, unionByFieldName(List.of(), excluded), declared, List.of(), List.of());
76+
}
77+
78+
/**
79+
* Apply BY semantics.
80+
* Keep only labels visible from the input, but remember the full BY list so apply()
81+
* can report missing BY labels for later null synthesis.
82+
*/
83+
static LabelSetSpec by(LabelSetSpec input, List<Attribute> labels) {
84+
return new LabelSetSpec(intersection(input.declared(), labels), List.of(), labels, List.of(), List.of());
85+
}
86+
87+
/**
88+
* Apply WITHOUT semantics.
89+
* Drop visible labels now and accumulate exclusions for later TimeSeriesWithout synthesis.
90+
*/
91+
static LabelSetSpec without(LabelSetSpec input, List<Attribute> excluded) {
92+
List<Attribute> accumulated = unionByFieldName(input.excluded(), excluded);
93+
List<Attribute> visible = difference(input.declared(), excluded);
94+
return new LabelSetSpec(visible, accumulated, visible, List.of(), List.of());
95+
}
96+
97+
/**
98+
* Clamp declared labels to what is available.
99+
* Empty availability means `trust the input as-is`.
100+
*/
101+
static LabelSetSpec intersectWithLabels(LabelSetSpec input, List<Attribute> available) {
102+
if (available.isEmpty()) {
103+
return input;
104+
}
105+
List<Attribute> visible = intersection(input.declared(), available);
106+
return new LabelSetSpec(visible, List.of(), visible, List.of(), List.of());
107+
}
108+
109+
/**
110+
* Empty spec.
111+
*/
112+
static LabelSetSpec none() {
113+
return new LabelSetSpec(List.of(), List.of(), List.of(), List.of(), List.of());
114+
}
115+
116+
/**
117+
* Name-based set difference.
118+
*/
119+
static List<Attribute> difference(List<Attribute> from, List<Attribute> toRemove) {
120+
Set<String> removeNames = new HashSet<>();
121+
for (Attribute attr : toRemove) {
122+
removeNames.add(fieldName(attr));
123+
}
124+
List<Attribute> result = new ArrayList<>();
125+
for (Attribute attr : from) {
126+
if (removeNames.contains(fieldName(attr)) == false) {
127+
result.add(attr);
128+
}
129+
}
130+
return result;
131+
}
132+
133+
/**
134+
* Name-based set intersection.
135+
*/
136+
static List<Attribute> intersection(List<Attribute> requested, List<Attribute> available) {
137+
Set<String> availableNames = new HashSet<>();
138+
for (Attribute attr : available) {
139+
availableNames.add(fieldName(attr));
140+
}
141+
List<Attribute> result = new ArrayList<>();
142+
for (Attribute attr : requested) {
143+
if (availableNames.contains(fieldName(attr))) {
144+
result.add(attr);
145+
}
146+
}
147+
return result;
148+
}
149+
150+
/**
151+
* Resolve requested labels against visible output.
152+
* Match by identity first, then by field name.
153+
*/
154+
static List<Attribute> resolveLabels(List<Attribute> requested, List<Attribute> visibleOutput) {
155+
List<Attribute> resolved = new ArrayList<>();
156+
for (Attribute attribute : requested) {
157+
if (visibleOutput.contains(attribute)) {
158+
resolved.add(attribute);
159+
continue;
160+
}
161+
Attribute byName = findAttributeByFieldName(visibleOutput, fieldName(attribute));
162+
if (byName != null) {
163+
resolved.add(byName);
164+
}
165+
}
166+
return resolved;
167+
}
168+
169+
static Attribute findAttributeByFieldName(List<Attribute> attributes, String fieldNameToFind) {
170+
for (Attribute attribute : attributes) {
171+
if (fieldName(attribute).equals(fieldNameToFind)) {
172+
return attribute;
173+
}
174+
}
175+
return null;
176+
}
177+
178+
/**
179+
* Canonical name used by label algebra.
180+
* FieldAttribute uses fieldName(); everything else falls back to name().
181+
*/
182+
static String fieldName(Attribute attr) {
183+
if (attr instanceof FieldAttribute fieldAttr) {
184+
return fieldAttr.fieldName().string();
185+
}
186+
return attr.name();
187+
}
188+
189+
/**
190+
* Name-based union preserving first occurrence order.
191+
*/
192+
static List<Attribute> unionByFieldName(List<Attribute> first, List<Attribute> second) {
193+
List<Attribute> result = new ArrayList<>();
194+
Set<String> seen = new LinkedHashSet<>();
195+
addUniqueByFieldName(result, seen, first);
196+
addUniqueByFieldName(result, seen, second);
197+
return result;
198+
}
199+
200+
private static void addUniqueByFieldName(List<Attribute> result, Set<String> seen, List<Attribute> attrs) {
201+
for (Attribute attr : attrs) {
202+
if (seen.add(fieldName(attr))) {
203+
result.add(attr);
204+
}
205+
}
206+
}
207+
208+
/**
209+
* Keep only dimension attrs, deduped by field name.
210+
* Only these can feed TimeSeriesWithout.
211+
*/
212+
static List<Attribute> dimensionAttributes(List<Attribute> attrs) {
213+
List<Attribute> result = new ArrayList<>();
214+
Set<String> seen = new LinkedHashSet<>();
215+
for (Attribute attr : attrs) {
216+
if (attr instanceof FieldAttribute fa && fa.isDimension() && seen.add(fieldName(attr))) {
217+
result.add(attr);
218+
}
219+
}
220+
return result;
221+
}
222+
223+
/**
224+
* Bind this spec to visible child output.
225+
* Used by outer aggregation before apply().
226+
*/
227+
public LabelSetSpec withIncluded(List<Attribute> includedLabels) {
228+
return new LabelSetSpec(declaredLabels, excludedLabels, byDeclaredLabels, includedLabels, List.of());
229+
}
230+
231+
/**
232+
* Bind inherited exclusions and self-resolve against declaredLabels.
233+
* Used by the innermost aggregate, where no child aggregate output exists yet.
234+
*/
235+
public LabelSetSpec withExcluded(List<Attribute> excludedLabels) {
236+
return new LabelSetSpec(declaredLabels, this.excludedLabels, byDeclaredLabels, declaredLabels, excludedLabels);
237+
}
238+
239+
/**
240+
* Finalize the deferred spec into concrete aggregate shape.
241+
*
242+
* Result contract:
243+
* - includedGroupings: actual grouping keys
244+
* - matchedAttributes: visible output attrs that must survive but are not keys
245+
* - missingAttributes: BY labels absent from visible output; caller null-fills them
246+
* - excludedGroupings: concrete dimension exclusions for TimeSeriesWithout
247+
*/
248+
public LabelSet apply() {
249+
List<Attribute> available = availableLabels.isEmpty() ? declaredLabels : availableLabels;
250+
251+
Attribute ts = findAttributeByFieldName(available, MetadataAttribute.TIMESERIES);
252+
List<Attribute> resolved = resolveLabels(byDeclaredLabels, available);
253+
List<Attribute> missing = difference(byDeclaredLabels, resolved);
254+
List<Attribute> excludedDimensions = dimensionAttributes(unionByFieldName(excludedLabels, inheritedExcludedLabels));
255+
256+
if (ts != null) {
257+
List<Attribute> attrs = new ArrayList<>();
258+
for (Attribute attr : resolved) {
259+
if (MetadataAttribute.isTimeSeriesAttributeName(attr.name()) == false) {
260+
attrs.add(attr);
261+
}
262+
}
263+
return new LabelSet(List.of(ts), attrs, missing, excludedDimensions);
264+
}
265+
266+
return new LabelSet(resolved, List.of(), missing, excludedDimensions);
267+
}
268+
269+
/**
270+
* Labels currently known to exist.
271+
* This is the visible label domain before final apply().
272+
*/
273+
public List<Attribute> declared() {
274+
return declaredLabels;
275+
}
276+
277+
/**
278+
* Labels known to be excluded by WITHOUT.
279+
* Empty means the spec is exact.
280+
*/
281+
public List<Attribute> excluded() {
282+
return excludedLabels;
283+
}
284+
285+
/**
286+
* Concrete aggregate shape produced by apply().
287+
* Translator code consumes this directly.
288+
*/
289+
public record LabelSet(
290+
/**
291+
* Concrete grouping keys for the aggregate.
292+
*
293+
* If _timeseries is present, it is the grouping key and concrete label attrs
294+
* move to matchedAttributes. Otherwise these are ordinary concrete label keys.
295+
*/
296+
List<Attribute> includedGroupings,
297+
298+
/**
299+
* Visible label attrs that belong in the output shape but are not grouping keys.
300+
*
301+
* Outer aggregation carries these through pack/unpack so they survive grouping
302+
* without becoming split keys.
303+
*/
304+
List<Attribute> matchedAttributes,
305+
306+
/**
307+
* BY labels requested by the spec but absent from visible output.
308+
*
309+
* Caller turns these into null aliases so PromQL BY preserves declared labels
310+
* in the final output even when they are not visible in the child.
311+
*/
312+
List<Attribute> missingAttributes,
313+
314+
/**
315+
* Concrete dimension exclusions accumulated from WITHOUT.
316+
*
317+
* Innermost aggregation feeds these into TimeSeriesWithout when timeseries
318+
* grouping is needed.
319+
*/
320+
List<Attribute> excludedGroupings
321+
) {}
322+
}

0 commit comments

Comments
 (0)