Skip to content

Commit f267697

Browse files
authored
[query-engine] Validate unique identifiers in KQL summarize statements (open-telemetry#1267)
Relates to open-telemetry#1262 ## Changes * Validate unique identifiers in KQL summarize statements
1 parent 346319a commit f267697

File tree

1 file changed

+65
-4
lines changed

1 file changed

+65
-4
lines changed

rust/experimental/query_engine/kql-parser/src/tabular_expressions.rs

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -356,12 +356,33 @@ pub(crate) fn parse_summarize_expression(
356356
let mut aggregation_expressions: HashMap<Box<str>, AggregationExpression> = HashMap::new();
357357
let mut group_by_expressions: HashMap<Box<str>, ScalarExpression> = HashMap::new();
358358
let mut post_expressions = Vec::new();
359+
let mut identifiers = HashSet::new();
360+
361+
fn validate_unique_identifier(
362+
location: QueryLocation,
363+
identifiers: &mut HashSet<Box<str>>,
364+
identifier: &str,
365+
) -> Result<(), ParserError> {
366+
if !identifiers.insert(identifier.into()) {
367+
return Err(ParserError::QueryLanguageDiagnostic {
368+
location,
369+
diagnostic_id: "KS171",
370+
message: format!("A column with the name '{identifier}' is already declared"),
371+
});
372+
}
373+
374+
Ok(())
375+
}
359376

360377
for summarize_rule in summarize_expression_rule.into_inner() {
361378
match summarize_rule.as_rule() {
362379
Rule::aggregate_expression => {
380+
let location = to_query_location(&summarize_rule);
381+
363382
let (key, aggregate) = parse_aggregate_expression(summarize_rule, scope)?;
364383

384+
validate_unique_identifier(location, &mut identifiers, key.as_ref())?;
385+
365386
aggregation_expressions.insert(key, aggregate);
366387
}
367388
Rule::group_by_expression => {
@@ -374,10 +395,16 @@ pub(crate) fn parse_summarize_expression(
374395
Rule::identifier_literal => {
375396
let identifier = validate_summary_identifier(
376397
scope,
377-
group_by_first_rule_location,
398+
group_by_first_rule_location.clone(),
378399
&[group_by_first_rule.as_str().trim().into()],
379400
)?;
380401

402+
validate_unique_identifier(
403+
group_by_first_rule_location,
404+
&mut identifiers,
405+
identifier.as_ref(),
406+
)?;
407+
381408
let scalar = parse_scalar_expression(group_by.next().unwrap(), scope)?;
382409

383410
group_by_expressions.insert(identifier.into(), scalar);
@@ -389,7 +416,7 @@ pub(crate) fn parse_summarize_expression(
389416
Some(identifier) => {
390417
let full_identifier = validate_summary_identifier(
391418
scope,
392-
group_by_first_rule_location,
419+
group_by_first_rule_location.clone(),
393420
&identifier,
394421
)?;
395422

@@ -402,6 +429,12 @@ pub(crate) fn parse_summarize_expression(
402429
));
403430
}
404431

432+
validate_unique_identifier(
433+
group_by_first_rule_location,
434+
&mut identifiers,
435+
full_identifier.as_ref(),
436+
)?;
437+
405438
group_by_expressions.insert(full_identifier.into(), scalar);
406439
}
407440
_ => {
@@ -2879,7 +2912,7 @@ mod tests {
28792912
assert_eq!(DataExpression::Summary(expected), expression);
28802913
};
28812914

2882-
let run_test_failure = |input: &str, expected: &str| {
2915+
let run_test_failure = |input: &str, expected_id: Option<&str>, expected_message: &str| {
28832916
let mut state = ParserState::new_with_options(
28842917
input,
28852918
ParserOptions::new()
@@ -2905,7 +2938,13 @@ mod tests {
29052938

29062939
let e = parse_summarize_expression(result.next().unwrap(), &state).unwrap_err();
29072940

2908-
assert!(matches!(e, ParserError::SyntaxError(_, msg) if msg == expected))
2941+
if let Some(expected_id) = expected_id {
2942+
assert!(
2943+
matches!(e, ParserError::QueryLanguageDiagnostic{location: _, diagnostic_id: id, message: msg} if id == expected_id && msg == expected_message)
2944+
)
2945+
} else {
2946+
assert!(matches!(e, ParserError::SyntaxError(_, msg) if msg == expected_message))
2947+
}
29092948
};
29102949

29112950
run_test_success(
@@ -3319,23 +3358,45 @@ mod tests {
33193358

33203359
run_test_failure(
33213360
"summarize | extend v = 1",
3361+
None,
33223362
"Invalid summarize operator: missing both aggregates and group-by expressions",
33233363
);
33243364

33253365
run_test_failure(
33263366
"summarize by source",
3367+
None,
33273368
"Cannot refer to a root map directly in a group-by expression",
33283369
);
33293370

33303371
run_test_failure(
33313372
"summarize by resource",
3373+
None,
33323374
"Cannot refer to a root map directly in a group-by expression",
33333375
);
33343376

33353377
run_test_failure(
33363378
"summarize by Attributes[tostring(now())]",
3379+
None,
33373380
"Could not determine the identifier for summary group-by expression. Try using assignment syntax instead (identifier = [expression]).",
33383381
);
3382+
3383+
run_test_failure(
3384+
"summarize Count = count(), Count = count()",
3385+
Some("KS171"),
3386+
"A column with the name 'Count' is already declared",
3387+
);
3388+
3389+
run_test_failure(
3390+
"summarize Count = count() by Count = 1",
3391+
Some("KS171"),
3392+
"A column with the name 'Count' is already declared",
3393+
);
3394+
3395+
run_test_failure(
3396+
"summarize by Count = 1, Count = 2",
3397+
Some("KS171"),
3398+
"A column with the name 'Count' is already declared",
3399+
);
33393400
}
33403401

33413402
#[test]

0 commit comments

Comments
 (0)