Skip to content

Commit 7aafbd8

Browse files
fmassotguilload
andauthored
Timestamp field can now be defined at any field mapping level (#3537)
* Fix #3531 - Timestamp field can now be defined at any field mapping level. * Update index config doc. * Apply suggestions from code review Co-authored-by: Adrien Guillo <[email protected]> * Add new tests to cover edges cases and building from a yaml doc mapper. --------- Co-authored-by: Adrien Guillo <[email protected]>
1 parent da1268e commit 7aafbd8

File tree

5 files changed

+265
-32
lines changed

5 files changed

+265
-32
lines changed

docs/configuration/index-config.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,15 +88,17 @@ The doc mapping defines how a document and the fields it contains are stored and
8888
| `field_mappings` | Collection of field mapping, each having its own data type (text, binary, datetime, bool, i64, u64, f64). | `[]` |
8989
| `mode` | Defines how quickwit should handle document fields that are not present in the `field_mappings`. In particular, the "dynamic" mode makes it possible to use quickwit in a schemaless manner. (See [mode](#mode)) | `lenient`
9090
| `dynamic_mapping` | This parameter is only allowed when `mode` is set to `dynamic`. It then defines whether dynamically mapped fields should be indexed, stored, etc. | (See [mode](#mode))
91-
| `tag_fields` | Collection of fields already defined in `field_mappings` whose values will be stored as part of the `tags` metadata. [Learn more about tags](../overview/concepts/querying.md#tag-pruning). | `[]` |
91+
| `tag_fields` | Collection of fields* already defined in `field_mappings` whose values will be stored as part of the `tags` metadata. [Learn more about tags](../overview/concepts/querying.md#tag-pruning). | `[]` |
9292
| `store_source` | Whether or not the original JSON document is stored or not in the index. | `false` |
93-
| `timestamp_field` | Timestamp field used for sharding documents in splits. The field has to be of type `datetime`. [Learn more about time sharding](./../overview/architecture.md). | `None` |
93+
| `timestamp_field` | Timestamp field* used for sharding documents in splits. The field has to be of type `datetime`. [Learn more about time sharding](./../overview/architecture.md). | `None` |
9494
`partition_key` | If set, quickwit will route documents into different splits depending on the field name declared as the `partition_key`. | `null` |
9595
| `max_num_partitions` | Limits the number of splits created through partitioning. (See [Partitioning](../overview/concepts/querying.md#partitioning)) | `200` |
9696

97+
*: tags fields and timestamp field are expressed as a path from the root of the JSON object to the given field. If a field name contains a `.` character, it needs to be escaped with a `\` character.
98+
9799
### Field types
98100

99-
Each field has a type that indicates the kind of data it contains, such as integer on 64 bits or text.
101+
Each field[^1] has a type that indicates the kind of data it contains, such as integer on 64 bits or text.
100102
Quickwit supports the following raw types [`text`](#text-type), [`i64`](#numeric-types-i64-u64-and-f64-type), [`u64`](#numeric-types-i64-u64-and-f64-type), [`f64`](#numeric-types-i64-u64-and-f64-type), [`datetime`](#datetime-type), [`bool`](#bool-type), [`ip`](#ip-type), and [`bytes`](#bytes-type), and also supports composite types such as array and object. Behind the scenes, Quickwit is using tantivy field types, don't hesitate to look at [tantivy documentation](https://github.com/tantivy-search/tantivy) if you want to go into the details.
101103

102104
### Raw types

quickwit/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

quickwit/quickwit-doc-mapper/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,11 @@ typetag = { workspace = true }
3333
utoipa = { workspace = true }
3434

3535
[dev-dependencies]
36-
proptest = { workspace = true }
3736
criterion = { workspace = true }
3837
matches = { workspace = true }
38+
proptest = { workspace = true }
3939
quickwit-proto = { workspace = true }
40+
serde_yaml = { workspace = true }
4041
time = { workspace = true }
4142

4243
[features]

quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs

Lines changed: 182 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -117,31 +117,22 @@ impl DefaultDocMapper {
117117
}
118118
}
119119

120-
fn validate_timestamp_field_if_any(builder: &DefaultDocMapperBuilder) -> anyhow::Result<()> {
121-
let Some(timestamp_field_name) = builder.timestamp_field.as_ref() else {
122-
return Ok(());
120+
fn validate_timestamp_field(
121+
timestamp_field_path: &str,
122+
mapping_root_node: &MappingNode,
123+
) -> anyhow::Result<()> {
124+
let Some(timestamp_field_type) = mapping_root_node.find_field_mapping_type(timestamp_field_path) else {
125+
bail!("Could not find timestamp field `{timestamp_field_path}` in field mappings.");
123126
};
124-
let Some(timestamp_field_entry) = builder.field_mappings.iter().find(|mapping| {
125-
&mapping.name == timestamp_field_name
126-
}) else {
127-
bail!("Missing timestamp field in field mappings: `{}`", timestamp_field_name);
128-
};
129-
if let FieldMappingType::DateTime(date_time_option, cardinality) =
130-
&timestamp_field_entry.mapping_type
131-
{
127+
if let FieldMappingType::DateTime(date_time_option, cardinality) = &timestamp_field_type {
132128
if cardinality != &Cardinality::SingleValue {
133-
bail!(
134-
"Multiple values are forbidden for the timestamp field \
135-
(`{timestamp_field_name}`)."
136-
);
129+
bail!("Timestamp field `{timestamp_field_path}` should be single-valued.");
137130
}
138131
if !date_time_option.fast {
139-
bail!("The timestamp field `{timestamp_field_name}`is required to be a fast field.");
132+
bail!("Timestamp field `{timestamp_field_path}` should be a fast field.");
140133
}
141134
} else {
142-
bail!(
143-
"The timestamp field `{timestamp_field_name}` is required to have the datetime type."
144-
);
135+
bail!("Timestamp field `{timestamp_field_path}` should be a datetime field.");
145136
}
146137
Ok(())
147138
}
@@ -159,7 +150,9 @@ impl TryFrom<DefaultDocMapperBuilder> for DefaultDocMapper {
159150
None
160151
};
161152

162-
validate_timestamp_field_if_any(&builder)?;
153+
if let Some(timestamp_field_path) = builder.timestamp_field.as_ref() {
154+
validate_timestamp_field(timestamp_field_path, &field_mappings)?;
155+
};
163156

164157
let dynamic_field = if let Mode::Dynamic(json_options) = &mode {
165158
Some(schema_builder.add_json_field(DYNAMIC_FIELD_NAME, json_options.clone()))
@@ -584,20 +577,84 @@ mod tests {
584577
}
585578

586579
#[test]
587-
fn test_fail_to_build_doc_mapper_with_non_datetime_timestamp_field() {
580+
fn test_timestamp_field_in_object_is_valid() {
581+
serde_json::from_str::<DefaultDocMapper>(
582+
r#"{
583+
"field_mappings": [
584+
{
585+
"name": "some_obj",
586+
"type": "object",
587+
"field_mappings": [
588+
{
589+
"name": "timestamp",
590+
"type": "datetime",
591+
"fast": true
592+
}
593+
]
594+
}
595+
],
596+
"timestamp_field": "some_obj.timestamp"
597+
}"#,
598+
)
599+
.unwrap();
600+
601+
serde_yaml::from_str::<DefaultDocMapper>(
602+
r#"
603+
field_mappings:
604+
- name: some_obj
605+
type: object
606+
field_mappings:
607+
- name: timestamp
608+
type: datetime
609+
fast: true
610+
timestamp_field: some_obj.timestamp
611+
"#,
612+
)
613+
.unwrap();
614+
}
615+
616+
#[test]
617+
fn test_timestamp_field_with_dots_in_its_name_is_valid() {
618+
serde_json::from_str::<DefaultDocMapper>(
619+
r#"{
620+
"field_mappings": [
621+
{
622+
"name": "my.timestamp",
623+
"type": "datetime",
624+
"fast": true
625+
}
626+
],
627+
"timestamp_field": "my\\.timestamp"
628+
}"#,
629+
)
630+
.unwrap();
631+
632+
serde_yaml::from_str::<DefaultDocMapper>(
633+
r#"
634+
field_mappings:
635+
- name: my.timestamp
636+
type: datetime
637+
fast: true
638+
timestamp_field: "my\\.timestamp"
639+
"#,
640+
)
641+
.unwrap();
642+
}
643+
644+
#[test]
645+
fn test_fail_to_build_doc_mapper_with_timestamp_field_with_multivalues_cardinality() {
588646
let doc_mapper = r#"{
589-
"default_search_fields": [],
590647
"timestamp_field": "timestamp",
591648
"tag_fields": [],
592649
"field_mappings": [
593650
{
594651
"name": "timestamp",
595-
"type": "text"
652+
"type": "array<i64>"
596653
}
597654
]
598655
}"#;
599656
let builder = serde_json::from_str::<DefaultDocMapperBuilder>(doc_mapper).unwrap();
600-
let expected_msg = "The timestamp field `timestamp` is required to have the datetime type.";
657+
let expected_msg = "Timestamp field `timestamp` should be a datetime field.";
601658
assert_eq!(&builder.try_build().unwrap_err().to_string(), &expected_msg);
602659
}
603660

@@ -616,7 +673,7 @@ mod tests {
616673
]
617674
}"#;
618675
let builder = serde_json::from_str::<DefaultDocMapperBuilder>(doc_mapper).unwrap();
619-
let expected_msg = "The timestamp field `timestamp`is required to be a fast field.";
676+
let expected_msg = "Timestamp field `timestamp` should be a fast field.";
620677
assert_eq!(&builder.try_build().unwrap_err().to_string(), &expected_msg);
621678
}
622679

@@ -689,7 +746,7 @@ mod tests {
689746
}"#;
690747

691748
let builder = serde_json::from_str::<DefaultDocMapperBuilder>(doc_mapper).unwrap();
692-
let expected_msg = "Multiple values are forbidden for the timestamp field (`timestamp`).";
749+
let expected_msg = "Timestamp field `timestamp` should be single-valued.";
693750
assert_eq!(&builder.try_build().unwrap_err().to_string(), expected_msg);
694751
}
695752

@@ -802,7 +859,7 @@ mod tests {
802859
}
803860

804861
#[test]
805-
fn test_partion_key_in_tags() {
862+
fn test_partition_key_in_tags() {
806863
let doc_mapper = r#"{
807864
"default_search_fields": [],
808865
"timestamp_field": null,
@@ -838,7 +895,7 @@ mod tests {
838895
}
839896

840897
#[test]
841-
fn test_partion_key_in_tags_without_explicit_tags() {
898+
fn test_partition_key_in_tags_without_explicit_tags() {
842899
let doc_mapper = r#"{
843900
"default_search_fields": [],
844901
"timestamp_field": null,
@@ -872,6 +929,42 @@ mod tests {
872929
assert_eq!(tag_fields, vec!["city", "division", "service",]);
873930
}
874931

932+
#[test]
933+
fn test_build_doc_mapper_with_tag_field_with_dots_in_its_name() {
934+
let doc_mapper = r#"{
935+
"default_search_fields": [],
936+
"tag_fields": ["my\\.city\\.id"],
937+
"field_mappings": [
938+
{
939+
"name": "my.city.id",
940+
"type": "u64"
941+
}
942+
]
943+
}"#;
944+
serde_json::from_str::<DefaultDocMapper>(doc_mapper).unwrap();
945+
}
946+
947+
#[test]
948+
fn test_build_doc_mapper_with_tag_field_in_object() {
949+
let doc_mapper = r#"{
950+
"default_search_fields": [],
951+
"tag_fields": ["location.city"],
952+
"field_mappings": [
953+
{
954+
"name": "location",
955+
"type": "object",
956+
"field_mappings": [
957+
{
958+
"name": "city",
959+
"type": "u64"
960+
}
961+
]
962+
}
963+
]
964+
}"#;
965+
serde_json::from_str::<DefaultDocMapper>(doc_mapper).unwrap();
966+
}
967+
875968
#[test]
876969
fn test_fail_to_build_doc_mapper_with_wrong_tag_fields_types() -> anyhow::Result<()> {
877970
let doc_mapper_one = r#"{
@@ -1280,4 +1373,65 @@ mod tests {
12801373
);
12811374
}
12821375
}
1376+
1377+
#[test]
1378+
fn test_find_field_mapping_type() {
1379+
let mapper = serde_json::from_str::<DefaultDocMapper>(
1380+
r#"{
1381+
"field_mappings": [
1382+
{
1383+
"name": "some_obj",
1384+
"type": "object",
1385+
"field_mappings": [
1386+
{
1387+
"name": "timestamp",
1388+
"type": "datetime",
1389+
"fast": true
1390+
},
1391+
{
1392+
"name": "object2",
1393+
"type": "object",
1394+
"field_mappings": [
1395+
{
1396+
"name": "id",
1397+
"type": "u64"
1398+
},
1399+
{
1400+
"name": "my.id",
1401+
"type": "u64"
1402+
}
1403+
]
1404+
}
1405+
]
1406+
},
1407+
{
1408+
"name": "my.timestamp",
1409+
"type": "datetime",
1410+
"fast": true
1411+
}
1412+
]
1413+
}"#,
1414+
)
1415+
.unwrap();
1416+
mapper
1417+
.field_mappings
1418+
.find_field_mapping_type("some_obj.timestamp")
1419+
.unwrap();
1420+
mapper
1421+
.field_mappings
1422+
.find_field_mapping_type("some_obj.object2.id")
1423+
.unwrap();
1424+
mapper
1425+
.field_mappings
1426+
.find_field_mapping_type("some_obj.object2")
1427+
.unwrap();
1428+
mapper
1429+
.field_mappings
1430+
.find_field_mapping_type("some_obj.object2.my\\.id")
1431+
.unwrap();
1432+
mapper
1433+
.field_mappings
1434+
.find_field_mapping_type("my\\.timestamp")
1435+
.unwrap();
1436+
}
12831437
}

0 commit comments

Comments
 (0)