Skip to content

Commit b65f381

Browse files
committed
Limit field names to 255 characters
1 parent 54b590c commit b65f381

File tree

3 files changed

+83
-40
lines changed

3 files changed

+83
-40
lines changed

quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,10 @@ impl DefaultDocMapperBuilder {
162162
for (field_path, field_type) in field_mapping.field_entries() {
163163
let field_name = field_path.field_name();
164164
if field_name == SOURCE_FIELD_NAME {
165-
bail!("`_source` is a reserved name, change your field name.");
165+
bail!(
166+
"`_source` is a reserved field name, please, use a different name for \
167+
this field."
168+
);
166169
}
167170
if self.tag_fields.contains(&field_name) {
168171
match &field_type {
@@ -713,8 +716,10 @@ mod tests {
713716
}"#;
714717

715718
let builder = serde_json::from_str::<DefaultDocMapperBuilder>(doc_mapper)?;
716-
let expected_msg = "`_source` is a reserved name, change your field name.".to_string();
717-
assert_eq!(builder.build().unwrap_err().to_string(), expected_msg);
719+
assert_eq!(
720+
builder.build().unwrap_err().to_string(),
721+
"`_source` is a reserved field name, please, use a different name for this field."
722+
);
718723
Ok(())
719724
}
720725

quickwit-doc-mapper/src/default_doc_mapper/field_mapping_entry.rs

Lines changed: 17 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use tantivy::schema::{
3131
use thiserror::Error;
3232

3333
use super::{default_as_true, FieldMappingType};
34-
use crate::default_doc_mapper::is_valid_field_mapping_name;
34+
use crate::default_doc_mapper::validate_field_mapping_name;
3535

3636
/// A `FieldMappingEntry` defines how a field is indexed, stored,
3737
/// and mapped from a JSON document to the related index fields.
@@ -50,7 +50,7 @@ pub struct FieldMappingEntry {
5050
impl FieldMappingEntry {
5151
/// Creates a new [`FieldMappingEntry`].
5252
pub fn new(name: String, mapping_type: FieldMappingType) -> Self {
53-
assert!(is_valid_field_mapping_name(&name));
53+
assert!(validate_field_mapping_name(&name).is_ok());
5454
FieldMappingEntry { name, mapping_type }
5555
}
5656

@@ -512,9 +512,7 @@ impl TryFrom<FieldMappingEntryForSerialization> for FieldMappingEntry {
512512
type_str
513513
),
514514
};
515-
if !is_valid_field_mapping_name(&value.name) {
516-
bail!("Invalid field name: `{}`.", value.name)
517-
}
515+
validate_field_mapping_name(&value.name)?;
518516
Ok(FieldMappingEntry::new(value.name, field_type))
519517
}
520518
}
@@ -894,17 +892,17 @@ mod tests {
894892

895893
#[test]
896894
fn test_deserialize_i64_field_with_invalid_name() {
897-
let result = serde_json::from_str::<FieldMappingEntry>(
895+
assert!(serde_json::from_str::<FieldMappingEntry>(
898896
r#"
899897
{
900898
"name": "this is not ok",
901899
"type": "i64"
902900
}
903901
"#,
904-
);
905-
assert!(result.is_err());
906-
let error = result.unwrap_err();
907-
assert_eq!(error.to_string(), "Invalid field name: `this is not ok`.");
902+
)
903+
.unwrap_err()
904+
.to_string()
905+
.contains("illegal characters"));
908906
}
909907

910908
#[test]
@@ -1064,35 +1062,20 @@ mod tests {
10641062

10651063
#[test]
10661064
fn test_deserialize_u64_field_with_wrong_options() {
1067-
let cases = vec![
1068-
(
1065+
assert_eq!(
1066+
serde_json::from_str::<FieldMappingEntry>(
10691067
r#"
10701068
{
10711069
"name": "my_field_name",
10721070
"type": "u64",
10731071
"tokenizer": "basic"
1074-
}
1075-
"#,
1076-
"Error when parsing `my_field_name`: `record` and `tokenizer` parameters are for \
1077-
text field only.",
1078-
),
1079-
(
1080-
r#"
1081-
{
1082-
"name": "this is not ok",
1083-
"type": "i64"
1084-
}
1085-
"#,
1086-
"Invalid field name: `this is not ok`.",
1087-
),
1088-
];
1089-
1090-
for (json_str, err_str) in cases {
1091-
let result = serde_json::from_str::<FieldMappingEntry>(json_str);
1092-
assert!(result.is_err());
1093-
let error = result.unwrap_err();
1094-
assert_eq!(error.to_string(), err_str,)
1095-
}
1072+
}"#
1073+
)
1074+
.unwrap_err()
1075+
.to_string(),
1076+
"Error when parsing `my_field_name`: `record` and `tokenizer` parameters are for text \
1077+
field only."
1078+
);
10961079
}
10971080

10981081
#[test]

quickwit-doc-mapper/src/default_doc_mapper/mod.rs

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ mod default_mapper;
2121
mod field_mapping_entry;
2222
mod field_mapping_type;
2323

24+
use anyhow::bail;
2425
use once_cell::sync::Lazy;
2526
use regex::Regex;
2627

@@ -29,20 +30,74 @@ pub use self::field_mapping_entry::{DocParsingError, FieldMappingEntry};
2930
pub use self::field_mapping_type::FieldMappingType;
3031

3132
/// Regular expression representing the restriction on a valid field name.
32-
pub const FIELD_MAPPING_NAME_PATTERN: &str = r#"^[_a-zA-Z][_\.\-a-zA-Z0-9]*$"#;
33+
pub const FIELD_MAPPING_NAME_PATTERN: &str = r#"^[_a-zA-Z][_\.\-a-zA-Z0-9]{0,254}$"#;
3334

3435
/// Validator for a potential `field_mapping_name`.
3536
/// Returns true if the name can be use for a field mapping name.
3637
///
3738
/// A field mapping name must start by a letter `[a-zA-Z]`.
3839
/// The other characters can be any alphanumic character `[a-ZA-Z0-9]` or `_`, `.`, `-`.
39-
pub fn is_valid_field_mapping_name(field_mapping_name: &str) -> bool {
40+
pub fn validate_field_mapping_name(field_mapping_name: &str) -> anyhow::Result<()> {
4041
static FIELD_MAPPING_NAME_PTN: Lazy<Regex> =
4142
Lazy::new(|| Regex::new(FIELD_MAPPING_NAME_PATTERN).unwrap());
42-
FIELD_MAPPING_NAME_PTN.is_match(field_mapping_name)
43+
44+
if FIELD_MAPPING_NAME_PTN.is_match(field_mapping_name) {
45+
return Ok(());
46+
}
47+
if field_mapping_name.is_empty() {
48+
bail!("Field name is empty.");
49+
}
50+
if field_mapping_name.len() > 255 {
51+
bail!(
52+
"Field name `{}` is too long. Field names must not be longer than 255 characters.",
53+
field_mapping_name
54+
)
55+
}
56+
let first_char = field_mapping_name.chars().next().unwrap();
57+
if !first_char.is_ascii_alphabetic() && first_char != '_' {
58+
bail!(
59+
"Field name `{}` is invalid. Field names must start with an uppercase or lowercase \
60+
ASCII letter or an underscore `_`.",
61+
field_mapping_name
62+
)
63+
}
64+
bail!(
65+
"Field name `{}` contains illegal characters. Field names must only contain uppercase and \
66+
lowercase ASCII letters, digits, hyphens `-`, periods `.`, and underscores `_`.",
67+
field_mapping_name
68+
);
4369
}
4470

4571
/// Function used with serde to initialize boolean value at true if there is no value in json.
4672
fn default_as_true() -> bool {
4773
true
4874
}
75+
76+
#[cfg(test)]
77+
mod tests {
78+
use super::*;
79+
80+
#[test]
81+
fn test_validate_field_mapping_name() {
82+
assert!(validate_field_mapping_name("")
83+
.unwrap_err()
84+
.to_string()
85+
.contains("is empty"));
86+
assert!(validate_field_mapping_name(&"a".repeat(256))
87+
.unwrap_err()
88+
.to_string()
89+
.contains("is too long"));
90+
assert!(validate_field_mapping_name("0")
91+
.unwrap_err()
92+
.to_string()
93+
.contains("must start with"));
94+
assert!(validate_field_mapping_name("_my-field!")
95+
.unwrap_err()
96+
.to_string()
97+
.contains("illegal characters"));
98+
assert!(validate_field_mapping_name("my-field").is_ok());
99+
assert!(validate_field_mapping_name("my.field").is_ok());
100+
assert!(validate_field_mapping_name("my_field").is_ok());
101+
assert!(validate_field_mapping_name(&"a".repeat(255)).is_ok());
102+
}
103+
}

0 commit comments

Comments
 (0)