Skip to content

Commit ac1e6e0

Browse files
normalise field name: change prefix from @ to _ to allow proper querying (#1514)
all fields prefixed with `@` will be renamed to have `_` prefix this is to make field queryable
1 parent 8229632 commit ac1e6e0

File tree

3 files changed

+65
-2
lines changed

3 files changed

+65
-2
lines changed

src/event/format/json.rs

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@ use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc};
2727
use datafusion::arrow::util::bit_util::round_upto_multiple_of_64;
2828
use itertools::Itertools;
2929
use serde_json::Value;
30-
use std::{collections::HashMap, sync::Arc};
30+
use std::{
31+
collections::{HashMap, HashSet},
32+
sync::Arc,
33+
};
3134
use tracing::error;
3235

3336
use super::EventFormat;
@@ -74,6 +77,10 @@ impl EventFormat for Event {
7477
_ => unreachable!("flatten would have failed beforehand"),
7578
};
7679

80+
// Rename JSON keys starting with '@' to '_' to match the schema
81+
// Reject event if renaming would cause a key collision
82+
let value_arr = rename_json_keys(value_arr)?;
83+
7784
// collect all the keys from all the json objects in the request body
7885
let fields =
7986
collect_keys(value_arr.iter()).expect("fields can be collected from array of objects");
@@ -257,6 +264,49 @@ fn collect_keys<'a>(values: impl Iterator<Item = &'a Value>) -> Result<Vec<&'a s
257264
Ok(keys)
258265
}
259266

267+
/// Renames JSON keys to match the schema transformation using normalize_field_name.
268+
/// Returns an error if renaming would cause a key collision.
269+
fn rename_json_keys(values: Vec<Value>) -> Result<Vec<Value>, anyhow::Error> {
270+
values
271+
.into_iter()
272+
.map(|value| {
273+
if let Value::Object(map) = value {
274+
// Collect original keys to check for collisions
275+
let original_keys: HashSet<String> = map.keys().cloned().collect();
276+
277+
// Check for collisions before renaming
278+
for key in map.keys() {
279+
if key.starts_with('@') {
280+
let mut normalized_key = key.clone();
281+
super::normalize_field_name(&mut normalized_key);
282+
if original_keys.contains(&normalized_key) {
283+
return Err(anyhow!(
284+
"Key collision detected: '{}' and '{}' would both map to '{}'",
285+
key,
286+
normalized_key,
287+
normalized_key
288+
));
289+
}
290+
}
291+
}
292+
293+
let new_map: serde_json::Map<String, Value> = map
294+
.into_iter()
295+
.map(|(mut key, val)| {
296+
if key.starts_with('@') {
297+
super::normalize_field_name(&mut key);
298+
}
299+
(key, val)
300+
})
301+
.collect();
302+
Ok(Value::Object(new_map))
303+
} else {
304+
Ok(value)
305+
}
306+
})
307+
.collect()
308+
}
309+
260310
fn fields_mismatch(
261311
schema: &[Arc<Field>],
262312
body: &Value,

src/event/format/mod.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,15 @@ static TIME_FIELD_NAME_PARTS: [&str; 11] = [
5757
];
5858
type EventSchema = Vec<Arc<Field>>;
5959

60+
/// Normalizes a field name by replacing leading '@' with '_'.
61+
/// Fields starting with '@' are renamed to start with '_'.
62+
#[inline]
63+
pub fn normalize_field_name(name: &mut String) {
64+
if let Some(stripped) = name.strip_prefix('@') {
65+
*name = format!("_{}", stripped);
66+
}
67+
}
68+
6069
/// Source of the logs, used to perform special processing for certain sources
6170
#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
6271
pub enum LogSource {
@@ -335,7 +344,9 @@ pub fn override_data_type(
335344
.fields()
336345
.iter()
337346
.map(|field| {
338-
let field_name = field.name().as_str();
347+
// Normalize field names - replace '@' prefix with '_'
348+
let mut field_name = field.name().to_string();
349+
normalize_field_name(&mut field_name);
339350
match (schema_version, map.get(field.name())) {
340351
// in V1 for new fields in json named "time"/"date" or such and having inferred
341352
// type string, that can be parsed as timestamp, use the timestamp type.

src/handlers/http/ingest.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use actix_web::{HttpRequest, HttpResponse, http::header::ContentType};
2424
use arrow_array::RecordBatch;
2525
use bytes::Bytes;
2626
use chrono::Utc;
27+
use tracing::error;
2728

2829
use crate::event::error::EventError;
2930
use crate::event::format::known_schema::{self, KNOWN_SCHEMA_LIST};
@@ -543,6 +544,7 @@ impl actix_web::ResponseError for PostError {
543544
}
544545

545546
fn error_response(&self) -> actix_web::HttpResponse<actix_web::body::BoxBody> {
547+
error!("{self}");
546548
match self {
547549
PostError::MetastoreError(metastore_error) => {
548550
actix_web::HttpResponse::build(metastore_error.status_code())

0 commit comments

Comments
 (0)