Skip to content

Commit f02edc3

Browse files
committed
patch md2f.rs and add test coverage
1 parent 6e6ab06 commit f02edc3

File tree

3 files changed

+603
-125
lines changed

3 files changed

+603
-125
lines changed

1!

Lines changed: 384 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,384 @@
1+
use log::{debug, info};
2+
use serde_json::Value;
3+
4+
/// Possible errors while filtering may be due to
5+
///
6+
/// parsing, invalid operation value etc.
7+
#[derive(Debug)]
8+
pub enum Md2fsError {
9+
SerdeJsonError,
10+
ParseError,
11+
}
12+
13+
/// Where clause keys
14+
#[derive(Debug, PartialEq, Eq)]
15+
enum FilterOperations {
16+
EqualTo,
17+
GreaterThanEqualTo,
18+
GreaterThan,
19+
LessThan,
20+
LessThanEqualTo,
21+
In,
22+
Noop,
23+
}
24+
25+
impl FilterOperations {
26+
/// Seek and return enum for pattern matching
27+
fn get_enum(s: &str) -> FilterOperations {
28+
let op = s.strip_prefix('$').unwrap_or(s);
29+
match op {
30+
"eq" => FilterOperations::EqualTo,
31+
"gt" => FilterOperations::GreaterThan,
32+
"gte" => FilterOperations::GreaterThanEqualTo,
33+
"lt" => FilterOperations::LessThan,
34+
"lte" => FilterOperations::LessThanEqualTo,
35+
"in" => FilterOperations::In,
36+
_ => FilterOperations::EqualTo,
37+
}
38+
}
39+
}
40+
41+
#[derive(Debug)]
42+
enum MetadataFilterResult {
43+
U64Filter(MetadataFilter<u64>),
44+
StringFilter(MetadataFilter<String>),
45+
StringVecFilter(MetadataFilter<Vec<String>>),
46+
}
47+
48+
/// Metadata filter
49+
#[derive(Debug)]
50+
pub struct MetadataFilter<T> {
51+
/// Key to filter on
52+
key: String,
53+
/// Valid json type to filter on
54+
value: T,
55+
/// Filter operations eq, gt, gte, in, lt, lte
56+
filter: FilterOperations,
57+
}
58+
59+
impl<T: Default> Default for MetadataFilter<T> {
60+
fn default() -> Self {
61+
MetadataFilter {
62+
key: Default::default(),
63+
value: Default::default(),
64+
filter: FilterOperations::Noop,
65+
}
66+
}
67+
}
68+
69+
70+
pub trait Filter<T> {
71+
fn create_filter(raw: &str) -> Result<MetadataFilterResult, Md2fsError>;
72+
fn eq(self, m: MetadataFilter<T>) -> bool;
73+
fn gt(self, m: MetadataFilter<T>) -> bool;
74+
fn gte(self, m: MetadataFilter<T>) -> bool;
75+
fn lt(self, m: MetadataFilter<T>) -> bool;
76+
fn lte(self, m: MetadataFilter<T>) -> bool;
77+
}
78+
79+
impl<T> Filter<T> for MetadataFilter<T>
80+
where
81+
T: PartialEq + PartialOrd + Default,
82+
{
83+
/// Create a filter on a valid string value
84+
fn create_filter(raw: &str) -> Result<MetadataFilterResult, Md2fsError> {
85+
let v: Result<Value, serde_json::Error> = serde_json::from_str(raw);
86+
if v.is_err() {
87+
debug!("invalid json string");
88+
return Err(Md2fsError::SerdeJsonError);
89+
}
90+
let u_v: Value = v.map_err(|_| Md2fsError::ParseError)?;
91+
let vo = u_v.as_object();
92+
if vo.is_none() {
93+
debug!("could not parse string");
94+
return Err(Md2fsError::ParseError);
95+
}
96+
let key = match vo {
97+
Some(v) => v.keys().next().unwrap_or(&String::new()).to_string(),
98+
_ => String::new(),
99+
};
100+
let vo2 = match vo {
101+
Some(v) => v[&key].as_object(),
102+
_ => None,
103+
};
104+
if vo2.is_none() {
105+
info!("no op key found, processing as metadata");
106+
let p_value = &u_v[&key];
107+
if p_value.is_string() {
108+
let value: String = p_value.as_str().unwrap_or_default().to_string();
109+
return Ok(MetadataFilterResult::StringFilter(MetadataFilter {
110+
key,
111+
filter: FilterOperations::Noop,
112+
value,
113+
}));
114+
} else {
115+
let value: u64 = p_value.as_u64().unwrap_or_default();
116+
return Ok(MetadataFilterResult::U64Filter(MetadataFilter {
117+
key,
118+
filter: FilterOperations::Noop,
119+
value,
120+
}));
121+
}
122+
}
123+
let op = match vo2 {
124+
Some(v) => v.keys().next().unwrap_or(&String::new()).to_string(),
125+
_ => String::new(),
126+
};
127+
let value = match vo2 {
128+
Some(v) => &v[&op],
129+
_ => &Value::Null,
130+
};
131+
let filter: FilterOperations = FilterOperations::get_enum(&op);
132+
133+
if filter == FilterOperations::In {
134+
if let Some(arr) = value.as_array() {
135+
let str_vec: Vec<String> = arr
136+
.iter()
137+
.filter_map(|v| v.as_str().map(String::from))
138+
.collect();
139+
return Ok(MetadataFilterResult::StringVecFilter(MetadataFilter {
140+
key,
141+
filter,
142+
value: str_vec,
143+
}));
144+
}
145+
}
146+
147+
if value.is_string() {
148+
let value = value.as_str().unwrap_or_default().to_string();
149+
return Ok(MetadataFilterResult::StringFilter(MetadataFilter {
150+
key,
151+
filter,
152+
value,
153+
}));
154+
}
155+
if value.is_number() {
156+
let value = value.as_u64().unwrap_or_default();
157+
return Ok(MetadataFilterResult::U64Filter(MetadataFilter {
158+
key,
159+
filter,
160+
value,
161+
}));
162+
}
163+
Err(Md2fsError::ParseError)
164+
}
165+
fn eq(self, m: MetadataFilter<T>) -> bool {
166+
self.key == m.key && self.value == m.value
167+
}
168+
fn gt(self, m: MetadataFilter<T>) -> bool {
169+
self.key == m.key && m.value > self.value
170+
}
171+
fn gte(self, m: MetadataFilter<T>) -> bool {
172+
self.key == m.key && m.value >= self.value
173+
}
174+
fn lt(self, m: MetadataFilter<T>) -> bool {
175+
self.key == m.key && m.value < self.value
176+
}
177+
fn lte(self, m: MetadataFilter<T>) -> bool {
178+
self.key == m.key && m.value <= self.value
179+
}
180+
}
181+
182+
fn process_filter(raw_f: &str, raw_m: &str) -> Result<bool, Md2fsError> {
183+
// 1. Parse the filter JSON to get key, op, and filter value.
184+
let filter_result = MetadataFilter::<String>::create_filter(raw_f)?;
185+
186+
// 2. Parse the metadata JSON into a generic Value object.
187+
let meta_json: Value = serde_json::from_str(raw_m).map_err(|_| Md2fsError::SerdeJsonError)?;
188+
let meta_obj = match meta_json.as_object() {
189+
Some(obj) => obj,
190+
None => return Ok(false), // Metadata is not a valid JSON object.
191+
};
192+
193+
// 3. Match on the filter type and perform the check.
194+
match filter_result {
195+
MetadataFilterResult::StringVecFilter(f_vec) => {
196+
if let Some(meta_val) = meta_obj.get(&f_vec.key) {
197+
if let Some(m_str) = meta_val.as_str() {
198+
if f_vec.filter == FilterOperations::In {
199+
return Ok(f_vec.value.contains(&m_str.to_string()));
200+
}
201+
}
202+
}
203+
Ok(false)
204+
}
205+
MetadataFilterResult::StringFilter(f_str) => {
206+
if let Some(meta_val) = meta_obj.get(&f_str.key) {
207+
if let Some(m_str) = meta_val.as_str() {
208+
if f_str.filter == FilterOperations::EqualTo || f_str.filter == FilterOperations::Noop {
209+
return Ok(f_str.value == m_str);
210+
}
211+
}
212+
}
213+
Ok(false)
214+
}
215+
MetadataFilterResult::U64Filter(f_u64) => {
216+
if let Some(meta_val) = meta_obj.get(&f_u64.key) {
217+
if let Some(m_u64) = meta_val.as_u64() {
218+
return Ok(match f_u64.filter {
219+
FilterOperations::EqualTo | FilterOperations::Noop => m_u64 == f_u64.value,
220+
FilterOperations::GreaterThan => m_u64 > f_u64.value,
221+
FilterOperations::GreaterThanEqualTo => m_u64 >= f_u64.value,
222+
FilterOperations::LessThan => m_u64 < f_u64.value,
223+
FilterOperations::LessThanEqualTo => m_u64 <= f_u64.value,
224+
_ => false,
225+
});
226+
}
227+
}
228+
Ok(false)
229+
}
230+
}
231+
}
232+
233+
/// Proces two raw json strings. Let `raw_f` be a valid metadata filter
234+
///
235+
/// and `raw_m` be valid metadata that is not a nested object. Returns true
236+
///
237+
/// on a valid match. The equivalent of an SQL `where` clause.
238+
/// Proces two raw json strings. Let `raw_f` be a valid metadata filter
239+
///
240+
/// and `raw_m` be valid metadata that is not a nested object. Returns true
241+
///
242+
/// on a valid match. The equivalent of an SQL `where` clause.
243+
pub fn filter_where(raw_f: &[String], raw_m: &[String]) -> Result<bool, Md2fsError> {
244+
if raw_f.is_empty() {
245+
return Ok(true);
246+
}
247+
248+
// For each filter, check if it matches at least one of the metadata parts.
249+
for filter in raw_f {
250+
let mut filter_matched = false;
251+
for meta_part in raw_m {
252+
if process_filter(filter, meta_part)? {
253+
filter_matched = true;
254+
break; // This filter is satisfied, move to the next one.
255+
}
256+
}
257+
// If any filter doesn't find a match in the metadata, the whole document fails.
258+
if !filter_matched {
259+
return Ok(false);
260+
}
261+
}
262+
263+
// If all filters found a match, the document passes.
264+
Ok(true)
265+
}
266+
267+
#[cfg(test)]
268+
mod tests {
269+
use super::*;
270+
271+
#[test]
272+
fn test_gte_pass() {
273+
let filter = r#"{"Rating": {"$gte": 4}}"#.to_string();
274+
let meta = r#"{"Rating": 5}"#.to_string();
275+
let result = process_filter(&filter, &meta).unwrap();
276+
assert!(result);
277+
}
278+
279+
#[test]
280+
fn test_gte_fail() {
281+
let filter = r#"{"Rating": {"$gte": 4}}"#.to_string();
282+
let meta = r#"{"Rating": 3}"#.to_string();
283+
let result = process_filter(&filter, &meta).unwrap();
284+
assert!(!result);
285+
}
286+
287+
#[test]
288+
fn test_gte_equal_pass() {
289+
let filter = r#"{"Rating": {"$gte": 4}}"#.to_string();
290+
let meta = r#"{"Rating": 4}"#.to_string();
291+
let result = process_filter(&filter, &meta).unwrap();
292+
assert!(result);
293+
}
294+
295+
#[test]
296+
fn test_lte_pass() {
297+
let filter = r#"{"Rating": {"$lte": 4}}"#.to_string();
298+
let meta = r#"{"Rating": 3}"#.to_string();
299+
let result = process_filter(&filter, &meta).unwrap();
300+
assert!(result);
301+
}
302+
303+
#[test]
304+
fn test_in_pass() {
305+
let filter = r#"{"genre": {"$in": ["music", "history"]}}"#.to_string();
306+
let meta = r#"{"genre": "history"}"#.to_string();
307+
let result = process_filter(&filter, &meta).unwrap();
308+
assert!(result);
309+
}
310+
311+
#[test]
312+
fn test_in_fail() {
313+
let filter = r#"{"genre": {"$in": ["music", "history"]}}"#.to_string();
314+
let meta = r#"{"genre": "sci-fi"}"#.to_string();
315+
let result = process_filter(&filter, &meta).unwrap();
316+
assert!(!result);
317+
}
318+
319+
#[test]
320+
fn test_key_mismatch() {
321+
let filter = r#"{"Rating": {"$gte": 4}}"#.to_string();
322+
let meta = r#"{"Score": 5}"#.to_string();
323+
let result = process_filter(&filter, &meta).unwrap();
324+
assert!(!result);
325+
}
326+
327+
#[test]
328+
fn test_type_mismatch() {
329+
let filter = r#"{"Rating": {"$gte": 4}}"#.to_string();
330+
let meta = r#"{"Rating": "good"}"#.to_string();
331+
let result = process_filter(&filter, &meta).unwrap();
332+
assert!(!result);
333+
}
334+
335+
#[test]
336+
fn test_filter_where_pass() {
337+
let filters = vec![
338+
r#"{"Rating": {"$gte": 4}}"#.to_string(),
339+
r#"{"year": {"$eq": 2020}}"#.to_string()
340+
];
341+
let metadata = vec![
342+
r#"{"Rating": 5, "year": 2020}"#.to_string()
343+
];
344+
let result = filter_where(&filters, &metadata).unwrap();
345+
assert!(result);
346+
}
347+
348+
#[test]
349+
fn test_filter_where_fail() {
350+
let filters = vec![
351+
r#"{"Rating": {"$gte": 4}}"#.to_string(),
352+
r#"{"year": {"$eq": 2020}}"#.to_string()
353+
];
354+
let metadata = vec![
355+
r#"{"Rating": 3, "year": 2020}"#.to_string()
356+
];
357+
let result = filter_where(&filters, &metadata).unwrap();
358+
assert!(!result);
359+
}
360+
361+
#[test]
362+
fn test_filter_where_no_filters() {
363+
let filters = vec![];
364+
let metadata = vec![
365+
r#"{"Rating": 5}"#.to_string()
366+
];
367+
let result = filter_where(&filters, &metadata).unwrap();
368+
assert!(result);
369+
}
370+
371+
#[test]
372+
fn test_filter_where_no_matching_meta() {
373+
let filters = vec![
374+
r#"{"genre": {"$in": ["sci-fi"]}}"#.to_string()
375+
];
376+
let metadata = vec![
377+
r#"{"genre": "history"}"#.to_string(),
378+
r#"{"genre": "music"}"#.to_string()
379+
];
380+
let result = filter_where(&filters, &metadata).unwrap();
381+
assert!(!result);
382+
}
383+
}
384+

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)