-
Notifications
You must be signed in to change notification settings - Fork 554
Expand file tree
/
Copy pathschema.rs
More file actions
101 lines (90 loc) · 3.04 KB
/
schema.rs
File metadata and controls
101 lines (90 loc) · 3.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
use tantivy::TantivyDocument;
use tantivy::schema::{
FAST, FacetOptions, Field, STORED, STRING, Schema, TextFieldIndexing, TextOptions, Value,
};
use crate::SearchDocument;
pub struct SchemaFields {
pub id: Field,
pub doc_type: Field,
pub language: Field,
pub title: Field,
pub content: Field,
pub created_at: Field,
pub facets: Field,
}
pub fn build_schema() -> Schema {
let mut schema_builder = Schema::builder();
schema_builder.add_text_field("id", STRING | STORED);
schema_builder.add_text_field("doc_type", STRING | STORED);
schema_builder.add_text_field("language", STRING | STORED);
let text_indexing = TextFieldIndexing::default()
.set_tokenizer("multilang")
.set_index_option(tantivy::schema::IndexRecordOption::WithFreqsAndPositions);
let text_options = TextOptions::default()
.set_indexing_options(text_indexing)
.set_stored();
schema_builder.add_text_field("title", text_options.clone());
schema_builder.add_text_field("content", text_options);
schema_builder.add_i64_field("created_at", FAST | STORED);
schema_builder.add_facet_field("facets", FacetOptions::default());
schema_builder.build()
}
pub fn get_fields(schema: &Schema) -> SchemaFields {
SchemaFields {
id: schema.get_field("id").unwrap(),
doc_type: schema.get_field("doc_type").unwrap(),
language: schema.get_field("language").unwrap(),
title: schema.get_field("title").unwrap(),
content: schema.get_field("content").unwrap(),
created_at: schema.get_field("created_at").unwrap(),
facets: schema.get_field("facets").unwrap(),
}
}
pub fn extract_search_document(
_schema: &Schema,
fields: &SchemaFields,
doc: &TantivyDocument,
) -> Option<SearchDocument> {
let id = doc.get_first(fields.id)?.as_str()?.to_string();
let doc_type = doc.get_first(fields.doc_type)?.as_str()?.to_string();
let language = doc
.get_first(fields.language)
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let title = doc.get_first(fields.title)?.as_str()?.to_string();
let content = doc.get_first(fields.content)?.as_str()?.to_string();
let created_at = doc.get_first(fields.created_at)?.as_i64()?;
let facets: Vec<String> = doc
.get_all(fields.facets)
.filter_map(|v| v.as_facet().map(|f| f.to_string()))
.collect();
Some(SearchDocument {
id,
doc_type,
language,
title,
content,
created_at,
facets,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_build_schema_has_language_field() {
let schema = build_schema();
assert!(
schema.get_field("language").is_ok(),
"Schema should have a language field"
);
assert!(
schema.get_field("title").is_ok(),
"Schema should have a title field"
);
assert!(
schema.get_field("content").is_ok(),
"Schema should have a content field"
);
}
}