-
Notifications
You must be signed in to change notification settings - Fork 190
Allow renaming group-by fields to existing field names #4586
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
1e07c71
56d6033
9dcc2c5
ae40ac0
3cae4b8
053edf5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -997,13 +997,54 @@ private Pair<List<RexNode>, List<AggCall>> aggregateWithTrimming( | |
| // because that Mapping only works for RexNode, but we need both AggCall and RexNode list. | ||
| Pair<List<RexNode>, List<AggCall>> reResolved = | ||
| resolveAttributesForAggregation(groupExprList, aggExprList, context); | ||
|
|
||
| List<String> names = getGroupKeyNamesAfterAggregation(reResolved.getLeft()); | ||
| context.relBuilder.aggregate( | ||
| context.relBuilder.groupKey(reResolved.getLeft()), reResolved.getRight()); | ||
|
|
||
| // During aggregation, Calcite projects both input dependencies and output group-by fields. | ||
| // When names conflict, Calcite adds numeric suffixes (e.g., "value0"). | ||
| // Apply explicit renaming to restore the intended aliases. | ||
| if (names.size() == reResolved.getLeft().size()) { | ||
|
||
| // Defense check: if any group-by key is not aliased, do not rename | ||
| context.relBuilder.rename(names); | ||
| } | ||
| return Pair.of(reResolved.getLeft(), reResolved.getRight()); | ||
| } | ||
|
|
||
| /** | ||
| * Imitates {@code Registrar.registerExpression} of {@link RelBuilder} to derive the output order | ||
| * of group-by keys after aggregation. | ||
| * | ||
| * <p>The projected input reference comes first, while any other computed expression follows. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In But since our PPL only allow
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found a bad case: Update: Fixed by checking duplication |
||
| */ | ||
| private List<String> getGroupKeyNamesAfterAggregation(List<RexNode> nodes) { | ||
| List<RexNode> reordered = new ArrayList<>(); | ||
| List<RexNode> left = new ArrayList<>(); | ||
| for (RexNode n : nodes) { | ||
| if (isInputRef(n)) { | ||
| reordered.add(n); | ||
| } else { | ||
| left.add(n); | ||
| } | ||
| } | ||
| reordered.addAll(left); | ||
| return reordered.stream() | ||
| .map(this::extractAliasLiteral) | ||
| .flatMap(Optional::stream) | ||
| .map(RexLiteral::stringValue) | ||
| .toList(); | ||
| } | ||
|
|
||
| /** Whether a rex node is an aliased input reference */ | ||
| private boolean isInputRef(RexNode node) { | ||
| return switch (node.getKind()) { | ||
| case AS, DESCENDING, NULLS_FIRST, NULLS_LAST -> { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any case that we have
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, I didn't manage to create any. It seems there is always a projection after sorting and before aggregation. E.g. |
||
| final List<RexNode> operands = ((RexCall) node).operands; | ||
| yield isInputRef(operands.getFirst()); | ||
| } | ||
| default -> node instanceof RexInputRef; | ||
| }; | ||
| } | ||
|
Comment on lines
+1043
to
+1051
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can the
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think they serve different purposes. |
||
|
|
||
| /** | ||
| * Resolve attributes for aggregation. | ||
| * | ||
|
|
@@ -1102,7 +1143,7 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { | |
| aggregationAttributes.getLeft().stream() | ||
| .map(this::extractAliasLiteral) | ||
| .flatMap(Optional::stream) | ||
| .map(ref -> ((RexLiteral) ref).getValueAs(String.class)) | ||
| .map(ref -> ref.getValueAs(String.class)) | ||
| .map(context.relBuilder::field) | ||
| .map(f -> (RexNode) f) | ||
| .toList(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| setup: | ||
| - do: | ||
| indices.create: | ||
| index: time_test | ||
| - do: | ||
| query.settings: | ||
| body: | ||
| transient: | ||
| plugins.calcite.enabled : true | ||
|
|
||
| - do: | ||
| bulk: | ||
| refresh: true | ||
| body: | ||
| - '{"index": {"_index": "time_test"}}' | ||
| - '{"category":"A","value":1000,"@timestamp":"2024-01-01T00:00:00Z"}' | ||
| - '{"index": {"_index": "time_test"}}' | ||
| - '{"category":"B","value":2000,"@timestamp":"2024-01-01T00:05:00Z"}' | ||
| - '{"index": {"_index": "time_test"}}' | ||
| - '{"category":"A","value":1500,"@timestamp":"2024-01-01T00:10:00Z"}' | ||
| - '{"index": {"_index": "time_test"}}' | ||
| - '{"category":"C","value":3000,"@timestamp":"2024-01-01T00:20:00Z"}' | ||
|
|
||
| --- | ||
| teardown: | ||
| - do: | ||
| query.settings: | ||
| body: | ||
| transient: | ||
| plugins.calcite.enabled : false | ||
|
|
||
| --- | ||
| "Test span aggregation with field name collision - basic case": | ||
| - skip: | ||
| features: | ||
| - headers | ||
| - allowed_warnings | ||
| - do: | ||
| headers: | ||
| Content-Type: 'application/json' | ||
| ppl: | ||
| body: | ||
| query: source=time_test | stats count() by span(value, 1000) as value | ||
|
|
||
| - match: { total: 3 } | ||
| - match: { schema: [{"name": "count()", "type": "bigint"}, {"name": "value", "type": "bigint"}] } | ||
| - match: { datarows: [[2, 1000], [1, 2000], [1, 3000]] } | ||
|
|
||
| --- | ||
| "Test span aggregation with field name collision - multiple aggregations": | ||
| - skip: | ||
| features: | ||
| - headers | ||
| - allowed_warnings | ||
| - do: | ||
| headers: | ||
| Content-Type: 'application/json' | ||
| ppl: | ||
| body: | ||
| query: source=time_test | stats count(), avg(value) by span(value, 1000) as value | ||
|
|
||
| - match: { total: 3 } | ||
| - match: { schema: [{"name": "count()", "type": "bigint"}, {"name": "avg(value)", "type": "double"}, {"name": "value", "type": "bigint"}] } | ||
| - match: { datarows: [[2, 1250.0, 1000], [1, 2000.0, 2000], [1, 3000.0, 3000]] } | ||
|
|
||
| --- | ||
| "Test span aggregation without name collision - multiple group-by": | ||
| - skip: | ||
| features: | ||
| - headers | ||
| - allowed_warnings | ||
| - do: | ||
| headers: | ||
| Content-Type: 'application/json' | ||
| ppl: | ||
| body: | ||
| query: source=time_test | stats count() by span(@timestamp, 10min) as @timestamp, category, value | ||
|
|
||
| - match: { total: 4 } | ||
| - match: { schema: [{"name": "count()", "type": "bigint"}, {"name": "@timestamp", "type": "timestamp"}, {"name": "category", "type": "string"}, {"name": "value", "type": "bigint"}] } | ||
| - match: { datarows: [[1, "2024-01-01 00:00:00", "A", 1000], [1, "2024-01-01 00:10:00", "A", 1500], [1, "2024-01-01 00:00:00", "B", 2000], [1, "2024-01-01 00:20:00", "C", 3000]] } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you rename the var
namesto make it more meaningfulThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Renamed