Skip to content

Commit b2f68af

Browse files
authored
[query-engine] KQL extract_json expression (open-telemetry#1366)
Relates to open-telemetry#1362 ## Changes * Implement support for `extract_json` in KQL parser
1 parent 142f76d commit b2f68af

File tree

3 files changed

+209
-14
lines changed

3 files changed

+209
-14
lines changed

rust/experimental/query_engine/kql-parser/src/kql.pest

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -75,18 +75,21 @@ string_literal = @{
7575
("\"" ~ double_quote_string_char* ~ "\"")
7676
| ("'" ~ single_quote_string_char* ~ "'")
7777
}
78-
null_literal = {
79-
("bool(" ~ "null" ~ ")") |
80-
("datetime(" ~ "null" ~ ")") |
81-
("dynamic(" ~ "null" ~ ")") |
82-
("guid(" ~ "null" ~ ")") |
83-
("int(" ~ "null" ~ ")") |
84-
("long(" ~ "null" ~ ")") |
85-
("real(" ~ "null" ~ ")") |
86-
("double(" ~ "null" ~ ")") |
87-
("timespan(" ~ "null" ~ ")") |
88-
("regex(" ~ "null" ~ ")")
89-
}
78+
type_literal = {
79+
"bool"
80+
| "datetime"
81+
| "decimal"
82+
| "double"
83+
| "dynamic"
84+
| "guid"
85+
| "int"
86+
| "long"
87+
| "real"
88+
| "regex"
89+
| "string"
90+
| "timespan"
91+
}
92+
null_literal = { type_literal ~ "(" ~ "null" ~ ")" }
9093
identifier_literal = @{ ("_" | ASCII_ALPHA) ~ ("_" | ASCII_ALPHANUMERIC)* }
9194
identifier_or_pattern_literal = ${
9295
(("_" | ASCII_ALPHA | "*") ~ ("_" | ASCII_ALPHANUMERIC | "*")* ~ !("["|"."))
@@ -144,6 +147,7 @@ type_unary_expressions = {
144147
}
145148

146149
get_type_expression = { "gettype" ~ "(" ~ scalar_expression ~ ")" }
150+
typeof_expression = { "typeof" ~ "(" ~ type_literal ~ ")" }
147151

148152
conditional_expression = { ("iff"|"iif") ~ "(" ~ logical_expression ~ "," ~ scalar_expression ~ "," ~ scalar_expression ~ ")" }
149153
case_expression = { "case" ~ "(" ~ logical_expression ~ "," ~ scalar_expression ~ ("," ~ logical_expression ~ "," ~ scalar_expression)* ~ "," ~ scalar_expression ~ ")" }
@@ -219,6 +223,8 @@ logical_unary_expressions = {
219223
not_expression
220224
}
221225

226+
extract_json_expression = { "extract_json" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ("," ~ typeof_expression)? ~ ")" }
227+
222228
/* Note: Order is imporant here. Once Pest has matched something it won't go
223229
backwards. For example if integer_literal is defined before time_expression "1h"
224230
would be parsed as integer_literal(1) and the remaining "h" would be fed into
@@ -234,6 +240,7 @@ scalar_unary_expression = {
234240
| temporal_unary_expressions
235241
| logical_unary_expressions
236242
| parse_unary_expressions
243+
| extract_json_expression
237244
| accessor_expression
238245
| "(" ~ scalar_expression ~ ")"
239246
}

rust/experimental/query_engine/kql-parser/src/scalar_expression.rs

Lines changed: 168 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,59 @@ pub(crate) fn parse_scalar_unary_expression(
347347
Rule::math_unary_expressions => parse_math_unary_expressions(rule, scope)?,
348348
Rule::temporal_unary_expressions => parse_temporal_unary_expressions(rule, scope)?,
349349
Rule::logical_unary_expressions => parse_logical_unary_expressions(rule, scope)?,
350+
Rule::extract_json_expression => {
351+
let location = to_query_location(&rule);
352+
353+
let mut extract_json_rules = rule.into_inner();
354+
355+
let path = parse_scalar_expression(extract_json_rules.next().unwrap(), scope)?;
356+
357+
let value = parse_scalar_expression(extract_json_rules.next().unwrap(), scope)?;
358+
359+
let inner_expression = ScalarExpression::Select(SelectScalarExpression::new(
360+
location.clone(),
361+
ScalarExpression::Parse(ParseScalarExpression::Json(
362+
ParseJsonScalarExpression::new(value.get_query_location().clone(), value),
363+
)),
364+
ScalarExpression::Parse(ParseScalarExpression::JsonPath(
365+
ParseJsonPathScalarExpression::new(path.get_query_location().clone(), path),
366+
)),
367+
));
368+
369+
if let Some(typeof_rule) = extract_json_rules.next() {
370+
let typeof_location = to_query_location(&typeof_rule);
371+
match crate::shared_expressions::parse_typeof_expression(typeof_rule)? {
372+
Some(t) => {
373+
let c = ConversionScalarExpression::new(
374+
typeof_location.clone(),
375+
inner_expression,
376+
);
377+
378+
ScalarExpression::Convert(match t {
379+
ValueType::Boolean => ConvertScalarExpression::Boolean(c),
380+
ValueType::DateTime => ConvertScalarExpression::DateTime(c),
381+
ValueType::Double => ConvertScalarExpression::Double(c),
382+
ValueType::Integer => ConvertScalarExpression::Integer(c),
383+
ValueType::String => ConvertScalarExpression::String(c),
384+
ValueType::TimeSpan => ConvertScalarExpression::TimeSpan(c),
385+
v => {
386+
return Err(ParserError::SyntaxNotSupported(
387+
typeof_location,
388+
format!(
389+
"Type '{v}' specified in typeof expression is not supported"
390+
),
391+
));
392+
}
393+
})
394+
}
395+
None => inner_expression,
396+
}
397+
} else {
398+
ScalarExpression::Convert(ConvertScalarExpression::String(
399+
ConversionScalarExpression::new(location, inner_expression),
400+
))
401+
}
402+
}
350403
Rule::accessor_expression => {
351404
// Note: When used as a scalar expression it is valid for an
352405
// accessor to fold into a static at the root so
@@ -446,9 +499,13 @@ pub(crate) fn try_resolve_identifier(
446499
}
447500
ScalarExpression::Select(s) => {
448501
if let Some(mut value) = try_resolve_identifier(s.get_value(), scope)?
449-
&& let Some(mut selector) = try_resolve_identifier(s.get_selectors(), scope)?
502+
&& let ScalarExpression::Static(StaticScalarExpression::Array(selectors)) =
503+
s.get_selectors()
450504
{
451-
value.append(&mut selector);
505+
for v in selectors.get_values() {
506+
value.push(v.to_value().to_string().into());
507+
}
508+
452509
return Ok(Some(value));
453510
}
454511

@@ -1171,4 +1228,113 @@ mod tests {
11711228

11721229
run_test_success("gettype(1m)", "timespan");
11731230
}
1231+
1232+
#[test]
1233+
fn test_parse_extract_json_scalar_expression() {
1234+
let run_test_success = |input: &str, expected: ScalarExpression| {
1235+
println!("Testing: {input}");
1236+
1237+
let state = ParserState::new(input);
1238+
1239+
let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap();
1240+
1241+
let actual = parse_scalar_expression(result.next().unwrap(), &state).unwrap();
1242+
1243+
assert_eq!(expected, actual);
1244+
};
1245+
1246+
run_test_success(
1247+
"extract_json('$.key1', '{\"key1\":1}')",
1248+
ScalarExpression::Convert(ConvertScalarExpression::String(
1249+
ConversionScalarExpression::new(
1250+
QueryLocation::new_fake(),
1251+
ScalarExpression::Select(SelectScalarExpression::new(
1252+
QueryLocation::new_fake(),
1253+
ScalarExpression::Parse(ParseScalarExpression::Json(
1254+
ParseJsonScalarExpression::new(
1255+
QueryLocation::new_fake(),
1256+
ScalarExpression::Static(StaticScalarExpression::String(
1257+
StringScalarExpression::new(
1258+
QueryLocation::new_fake(),
1259+
"{\"key1\":1}",
1260+
),
1261+
)),
1262+
),
1263+
)),
1264+
ScalarExpression::Parse(ParseScalarExpression::JsonPath(
1265+
ParseJsonPathScalarExpression::new(
1266+
QueryLocation::new_fake(),
1267+
ScalarExpression::Static(StaticScalarExpression::String(
1268+
StringScalarExpression::new(
1269+
QueryLocation::new_fake(),
1270+
"$.key1",
1271+
),
1272+
)),
1273+
),
1274+
)),
1275+
)),
1276+
),
1277+
)),
1278+
);
1279+
1280+
run_test_success(
1281+
"extract_json('$.key1', '{\"key1\":1}', typeof(int))",
1282+
ScalarExpression::Convert(ConvertScalarExpression::Integer(
1283+
ConversionScalarExpression::new(
1284+
QueryLocation::new_fake(),
1285+
ScalarExpression::Select(SelectScalarExpression::new(
1286+
QueryLocation::new_fake(),
1287+
ScalarExpression::Parse(ParseScalarExpression::Json(
1288+
ParseJsonScalarExpression::new(
1289+
QueryLocation::new_fake(),
1290+
ScalarExpression::Static(StaticScalarExpression::String(
1291+
StringScalarExpression::new(
1292+
QueryLocation::new_fake(),
1293+
"{\"key1\":1}",
1294+
),
1295+
)),
1296+
),
1297+
)),
1298+
ScalarExpression::Parse(ParseScalarExpression::JsonPath(
1299+
ParseJsonPathScalarExpression::new(
1300+
QueryLocation::new_fake(),
1301+
ScalarExpression::Static(StaticScalarExpression::String(
1302+
StringScalarExpression::new(
1303+
QueryLocation::new_fake(),
1304+
"$.key1",
1305+
),
1306+
)),
1307+
),
1308+
)),
1309+
)),
1310+
),
1311+
)),
1312+
);
1313+
1314+
run_test_success(
1315+
"extract_json('$.key1', '{\"key1\":true}', typeof(dynamic))",
1316+
ScalarExpression::Select(SelectScalarExpression::new(
1317+
QueryLocation::new_fake(),
1318+
ScalarExpression::Parse(ParseScalarExpression::Json(
1319+
ParseJsonScalarExpression::new(
1320+
QueryLocation::new_fake(),
1321+
ScalarExpression::Static(StaticScalarExpression::String(
1322+
StringScalarExpression::new(
1323+
QueryLocation::new_fake(),
1324+
"{\"key1\":true}",
1325+
),
1326+
)),
1327+
),
1328+
)),
1329+
ScalarExpression::Parse(ParseScalarExpression::JsonPath(
1330+
ParseJsonPathScalarExpression::new(
1331+
QueryLocation::new_fake(),
1332+
ScalarExpression::Static(StaticScalarExpression::String(
1333+
StringScalarExpression::new(QueryLocation::new_fake(), "$.key1"),
1334+
)),
1335+
),
1336+
)),
1337+
)),
1338+
);
1339+
}
11741340
}

rust/experimental/query_engine/kql-parser/src/shared_expressions.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,28 @@ use crate::{
1010
scalar_primitive_expressions::parse_accessor_expression,
1111
};
1212

13+
pub(crate) fn parse_typeof_expression(
14+
typeof_expression_rule: Pair<Rule>,
15+
) -> Result<Option<ValueType>, ParserError> {
16+
let typeof_rules = typeof_expression_rule.into_inner();
17+
18+
Ok(match typeof_rules.as_str() {
19+
"bool" => Some(ValueType::Boolean),
20+
"datetime" => Some(ValueType::DateTime),
21+
"decimal" => Some(ValueType::Double),
22+
"double" => Some(ValueType::Double),
23+
"dynamic" => None,
24+
"guid" => Some(ValueType::String), // todo: Possibly support GUIDs natively
25+
"int" => Some(ValueType::Integer),
26+
"long" => Some(ValueType::Integer),
27+
"real" => Some(ValueType::Double),
28+
"regex" => Some(ValueType::Regex),
29+
"string" => Some(ValueType::String),
30+
"timespan" => Some(ValueType::TimeSpan),
31+
_ => panic!("Unexpected rule in typeof_expression_rule: {typeof_rules}"),
32+
})
33+
}
34+
1335
pub(crate) fn parse_source_assignment_expression(
1436
assignment_expression_rule: Pair<Rule>,
1537
scope: &dyn ParserScope,

0 commit comments

Comments
 (0)