|
| 1 | +use log::{debug, info}; |
| 2 | +use serde_json::Value; |
| 3 | + |
| 4 | +/// Possible errors while filtering may be due to |
| 5 | +/// |
| 6 | +/// parsing, invalid operation value etc. |
| 7 | +#[derive(Debug)] |
| 8 | +pub enum Md2fsError { |
| 9 | + SerdeJsonError, |
| 10 | + ParseError, |
| 11 | +} |
| 12 | + |
| 13 | +/// Where clause keys |
| 14 | +#[derive(Debug, PartialEq, Eq)] |
| 15 | +enum FilterOperations { |
| 16 | + EqualTo, |
| 17 | + GreaterThanEqualTo, |
| 18 | + GreaterThan, |
| 19 | + LessThan, |
| 20 | + LessThanEqualTo, |
| 21 | + In, |
| 22 | + Noop, |
| 23 | +} |
| 24 | + |
| 25 | +impl FilterOperations { |
| 26 | + /// Seek and return enum for pattern matching |
| 27 | + fn get_enum(s: &str) -> FilterOperations { |
| 28 | + let op = s.strip_prefix('$').unwrap_or(s); |
| 29 | + match op { |
| 30 | + "eq" => FilterOperations::EqualTo, |
| 31 | + "gt" => FilterOperations::GreaterThan, |
| 32 | + "gte" => FilterOperations::GreaterThanEqualTo, |
| 33 | + "lt" => FilterOperations::LessThan, |
| 34 | + "lte" => FilterOperations::LessThanEqualTo, |
| 35 | + "in" => FilterOperations::In, |
| 36 | + _ => FilterOperations::EqualTo, |
| 37 | + } |
| 38 | + } |
| 39 | +} |
| 40 | + |
| 41 | +#[derive(Debug)] |
| 42 | +enum MetadataFilterResult { |
| 43 | + U64Filter(MetadataFilter<u64>), |
| 44 | + StringFilter(MetadataFilter<String>), |
| 45 | + StringVecFilter(MetadataFilter<Vec<String>>), |
| 46 | +} |
| 47 | + |
| 48 | +/// Metadata filter |
| 49 | +#[derive(Debug)] |
| 50 | +pub struct MetadataFilter<T> { |
| 51 | + /// Key to filter on |
| 52 | + key: String, |
| 53 | + /// Valid json type to filter on |
| 54 | + value: T, |
| 55 | + /// Filter operations eq, gt, gte, in, lt, lte |
| 56 | + filter: FilterOperations, |
| 57 | +} |
| 58 | + |
| 59 | +impl<T: Default> Default for MetadataFilter<T> { |
| 60 | + fn default() -> Self { |
| 61 | + MetadataFilter { |
| 62 | + key: Default::default(), |
| 63 | + value: Default::default(), |
| 64 | + filter: FilterOperations::Noop, |
| 65 | + } |
| 66 | + } |
| 67 | +} |
| 68 | + |
| 69 | + |
| 70 | +pub trait Filter<T> { |
| 71 | + fn create_filter(raw: &str) -> Result<MetadataFilterResult, Md2fsError>; |
| 72 | + fn eq(self, m: MetadataFilter<T>) -> bool; |
| 73 | + fn gt(self, m: MetadataFilter<T>) -> bool; |
| 74 | + fn gte(self, m: MetadataFilter<T>) -> bool; |
| 75 | + fn lt(self, m: MetadataFilter<T>) -> bool; |
| 76 | + fn lte(self, m: MetadataFilter<T>) -> bool; |
| 77 | +} |
| 78 | + |
| 79 | +impl<T> Filter<T> for MetadataFilter<T> |
| 80 | +where |
| 81 | + T: PartialEq + PartialOrd + Default, |
| 82 | +{ |
| 83 | + /// Create a filter on a valid string value |
| 84 | + fn create_filter(raw: &str) -> Result<MetadataFilterResult, Md2fsError> { |
| 85 | + let v: Result<Value, serde_json::Error> = serde_json::from_str(raw); |
| 86 | + if v.is_err() { |
| 87 | + debug!("invalid json string"); |
| 88 | + return Err(Md2fsError::SerdeJsonError); |
| 89 | + } |
| 90 | + let u_v: Value = v.map_err(|_| Md2fsError::ParseError)?; |
| 91 | + let vo = u_v.as_object(); |
| 92 | + if vo.is_none() { |
| 93 | + debug!("could not parse string"); |
| 94 | + return Err(Md2fsError::ParseError); |
| 95 | + } |
| 96 | + let key = match vo { |
| 97 | + Some(v) => v.keys().next().unwrap_or(&String::new()).to_string(), |
| 98 | + _ => String::new(), |
| 99 | + }; |
| 100 | + let vo2 = match vo { |
| 101 | + Some(v) => v[&key].as_object(), |
| 102 | + _ => None, |
| 103 | + }; |
| 104 | + if vo2.is_none() { |
| 105 | + info!("no op key found, processing as metadata"); |
| 106 | + let p_value = &u_v[&key]; |
| 107 | + if p_value.is_string() { |
| 108 | + let value: String = p_value.as_str().unwrap_or_default().to_string(); |
| 109 | + return Ok(MetadataFilterResult::StringFilter(MetadataFilter { |
| 110 | + key, |
| 111 | + filter: FilterOperations::Noop, |
| 112 | + value, |
| 113 | + })); |
| 114 | + } else { |
| 115 | + let value: u64 = p_value.as_u64().unwrap_or_default(); |
| 116 | + return Ok(MetadataFilterResult::U64Filter(MetadataFilter { |
| 117 | + key, |
| 118 | + filter: FilterOperations::Noop, |
| 119 | + value, |
| 120 | + })); |
| 121 | + } |
| 122 | + } |
| 123 | + let op = match vo2 { |
| 124 | + Some(v) => v.keys().next().unwrap_or(&String::new()).to_string(), |
| 125 | + _ => String::new(), |
| 126 | + }; |
| 127 | + let value = match vo2 { |
| 128 | + Some(v) => &v[&op], |
| 129 | + _ => &Value::Null, |
| 130 | + }; |
| 131 | + let filter: FilterOperations = FilterOperations::get_enum(&op); |
| 132 | + |
| 133 | + if filter == FilterOperations::In { |
| 134 | + if let Some(arr) = value.as_array() { |
| 135 | + let str_vec: Vec<String> = arr |
| 136 | + .iter() |
| 137 | + .filter_map(|v| v.as_str().map(String::from)) |
| 138 | + .collect(); |
| 139 | + return Ok(MetadataFilterResult::StringVecFilter(MetadataFilter { |
| 140 | + key, |
| 141 | + filter, |
| 142 | + value: str_vec, |
| 143 | + })); |
| 144 | + } |
| 145 | + } |
| 146 | + |
| 147 | + if value.is_string() { |
| 148 | + let value = value.as_str().unwrap_or_default().to_string(); |
| 149 | + return Ok(MetadataFilterResult::StringFilter(MetadataFilter { |
| 150 | + key, |
| 151 | + filter, |
| 152 | + value, |
| 153 | + })); |
| 154 | + } |
| 155 | + if value.is_number() { |
| 156 | + let value = value.as_u64().unwrap_or_default(); |
| 157 | + return Ok(MetadataFilterResult::U64Filter(MetadataFilter { |
| 158 | + key, |
| 159 | + filter, |
| 160 | + value, |
| 161 | + })); |
| 162 | + } |
| 163 | + Err(Md2fsError::ParseError) |
| 164 | + } |
| 165 | + fn eq(self, m: MetadataFilter<T>) -> bool { |
| 166 | + self.key == m.key && self.value == m.value |
| 167 | + } |
| 168 | + fn gt(self, m: MetadataFilter<T>) -> bool { |
| 169 | + self.key == m.key && m.value > self.value |
| 170 | + } |
| 171 | + fn gte(self, m: MetadataFilter<T>) -> bool { |
| 172 | + self.key == m.key && m.value >= self.value |
| 173 | + } |
| 174 | + fn lt(self, m: MetadataFilter<T>) -> bool { |
| 175 | + self.key == m.key && m.value < self.value |
| 176 | + } |
| 177 | + fn lte(self, m: MetadataFilter<T>) -> bool { |
| 178 | + self.key == m.key && m.value <= self.value |
| 179 | + } |
| 180 | +} |
| 181 | + |
| 182 | +fn process_filter(raw_f: &str, raw_m: &str) -> Result<bool, Md2fsError> { |
| 183 | + // 1. Parse the filter JSON to get key, op, and filter value. |
| 184 | + let filter_result = MetadataFilter::<String>::create_filter(raw_f)?; |
| 185 | + |
| 186 | + // 2. Parse the metadata JSON into a generic Value object. |
| 187 | + let meta_json: Value = serde_json::from_str(raw_m).map_err(|_| Md2fsError::SerdeJsonError)?; |
| 188 | + let meta_obj = match meta_json.as_object() { |
| 189 | + Some(obj) => obj, |
| 190 | + None => return Ok(false), // Metadata is not a valid JSON object. |
| 191 | + }; |
| 192 | + |
| 193 | + // 3. Match on the filter type and perform the check. |
| 194 | + match filter_result { |
| 195 | + MetadataFilterResult::StringVecFilter(f_vec) => { |
| 196 | + if let Some(meta_val) = meta_obj.get(&f_vec.key) { |
| 197 | + if let Some(m_str) = meta_val.as_str() { |
| 198 | + if f_vec.filter == FilterOperations::In { |
| 199 | + return Ok(f_vec.value.contains(&m_str.to_string())); |
| 200 | + } |
| 201 | + } |
| 202 | + } |
| 203 | + Ok(false) |
| 204 | + } |
| 205 | + MetadataFilterResult::StringFilter(f_str) => { |
| 206 | + if let Some(meta_val) = meta_obj.get(&f_str.key) { |
| 207 | + if let Some(m_str) = meta_val.as_str() { |
| 208 | + if f_str.filter == FilterOperations::EqualTo || f_str.filter == FilterOperations::Noop { |
| 209 | + return Ok(f_str.value == m_str); |
| 210 | + } |
| 211 | + } |
| 212 | + } |
| 213 | + Ok(false) |
| 214 | + } |
| 215 | + MetadataFilterResult::U64Filter(f_u64) => { |
| 216 | + if let Some(meta_val) = meta_obj.get(&f_u64.key) { |
| 217 | + if let Some(m_u64) = meta_val.as_u64() { |
| 218 | + return Ok(match f_u64.filter { |
| 219 | + FilterOperations::EqualTo | FilterOperations::Noop => m_u64 == f_u64.value, |
| 220 | + FilterOperations::GreaterThan => m_u64 > f_u64.value, |
| 221 | + FilterOperations::GreaterThanEqualTo => m_u64 >= f_u64.value, |
| 222 | + FilterOperations::LessThan => m_u64 < f_u64.value, |
| 223 | + FilterOperations::LessThanEqualTo => m_u64 <= f_u64.value, |
| 224 | + _ => false, |
| 225 | + }); |
| 226 | + } |
| 227 | + } |
| 228 | + Ok(false) |
| 229 | + } |
| 230 | + } |
| 231 | +} |
| 232 | + |
| 233 | +/// Proces two raw json strings. Let `raw_f` be a valid metadata filter |
| 234 | +/// |
| 235 | +/// and `raw_m` be valid metadata that is not a nested object. Returns true |
| 236 | +/// |
| 237 | +/// on a valid match. The equivalent of an SQL `where` clause. |
| 238 | +/// Proces two raw json strings. Let `raw_f` be a valid metadata filter |
| 239 | +/// |
| 240 | +/// and `raw_m` be valid metadata that is not a nested object. Returns true |
| 241 | +/// |
| 242 | +/// on a valid match. The equivalent of an SQL `where` clause. |
| 243 | +pub fn filter_where(raw_f: &[String], raw_m: &[String]) -> Result<bool, Md2fsError> { |
| 244 | + if raw_f.is_empty() { |
| 245 | + return Ok(true); |
| 246 | + } |
| 247 | + |
| 248 | + // For each filter, check if it matches at least one of the metadata parts. |
| 249 | + for filter in raw_f { |
| 250 | + let mut filter_matched = false; |
| 251 | + for meta_part in raw_m { |
| 252 | + if process_filter(filter, meta_part)? { |
| 253 | + filter_matched = true; |
| 254 | + break; // This filter is satisfied, move to the next one. |
| 255 | + } |
| 256 | + } |
| 257 | + // If any filter doesn't find a match in the metadata, the whole document fails. |
| 258 | + if !filter_matched { |
| 259 | + return Ok(false); |
| 260 | + } |
| 261 | + } |
| 262 | + |
| 263 | + // If all filters found a match, the document passes. |
| 264 | + Ok(true) |
| 265 | +} |
| 266 | + |
| 267 | +#[cfg(test)] |
| 268 | +mod tests { |
| 269 | + use super::*; |
| 270 | + |
| 271 | + #[test] |
| 272 | + fn test_gte_pass() { |
| 273 | + let filter = r#"{"Rating": {"$gte": 4}}"#.to_string(); |
| 274 | + let meta = r#"{"Rating": 5}"#.to_string(); |
| 275 | + let result = process_filter(&filter, &meta).unwrap(); |
| 276 | + assert!(result); |
| 277 | + } |
| 278 | + |
| 279 | + #[test] |
| 280 | + fn test_gte_fail() { |
| 281 | + let filter = r#"{"Rating": {"$gte": 4}}"#.to_string(); |
| 282 | + let meta = r#"{"Rating": 3}"#.to_string(); |
| 283 | + let result = process_filter(&filter, &meta).unwrap(); |
| 284 | + assert!(!result); |
| 285 | + } |
| 286 | + |
| 287 | + #[test] |
| 288 | + fn test_gte_equal_pass() { |
| 289 | + let filter = r#"{"Rating": {"$gte": 4}}"#.to_string(); |
| 290 | + let meta = r#"{"Rating": 4}"#.to_string(); |
| 291 | + let result = process_filter(&filter, &meta).unwrap(); |
| 292 | + assert!(result); |
| 293 | + } |
| 294 | + |
| 295 | + #[test] |
| 296 | + fn test_lte_pass() { |
| 297 | + let filter = r#"{"Rating": {"$lte": 4}}"#.to_string(); |
| 298 | + let meta = r#"{"Rating": 3}"#.to_string(); |
| 299 | + let result = process_filter(&filter, &meta).unwrap(); |
| 300 | + assert!(result); |
| 301 | + } |
| 302 | + |
| 303 | + #[test] |
| 304 | + fn test_in_pass() { |
| 305 | + let filter = r#"{"genre": {"$in": ["music", "history"]}}"#.to_string(); |
| 306 | + let meta = r#"{"genre": "history"}"#.to_string(); |
| 307 | + let result = process_filter(&filter, &meta).unwrap(); |
| 308 | + assert!(result); |
| 309 | + } |
| 310 | + |
| 311 | + #[test] |
| 312 | + fn test_in_fail() { |
| 313 | + let filter = r#"{"genre": {"$in": ["music", "history"]}}"#.to_string(); |
| 314 | + let meta = r#"{"genre": "sci-fi"}"#.to_string(); |
| 315 | + let result = process_filter(&filter, &meta).unwrap(); |
| 316 | + assert!(!result); |
| 317 | + } |
| 318 | + |
| 319 | + #[test] |
| 320 | + fn test_key_mismatch() { |
| 321 | + let filter = r#"{"Rating": {"$gte": 4}}"#.to_string(); |
| 322 | + let meta = r#"{"Score": 5}"#.to_string(); |
| 323 | + let result = process_filter(&filter, &meta).unwrap(); |
| 324 | + assert!(!result); |
| 325 | + } |
| 326 | + |
| 327 | + #[test] |
| 328 | + fn test_type_mismatch() { |
| 329 | + let filter = r#"{"Rating": {"$gte": 4}}"#.to_string(); |
| 330 | + let meta = r#"{"Rating": "good"}"#.to_string(); |
| 331 | + let result = process_filter(&filter, &meta).unwrap(); |
| 332 | + assert!(!result); |
| 333 | + } |
| 334 | + |
| 335 | + #[test] |
| 336 | + fn test_filter_where_pass() { |
| 337 | + let filters = vec![ |
| 338 | + r#"{"Rating": {"$gte": 4}}"#.to_string(), |
| 339 | + r#"{"year": {"$eq": 2020}}"#.to_string() |
| 340 | + ]; |
| 341 | + let metadata = vec![ |
| 342 | + r#"{"Rating": 5, "year": 2020}"#.to_string() |
| 343 | + ]; |
| 344 | + let result = filter_where(&filters, &metadata).unwrap(); |
| 345 | + assert!(result); |
| 346 | + } |
| 347 | + |
| 348 | + #[test] |
| 349 | + fn test_filter_where_fail() { |
| 350 | + let filters = vec![ |
| 351 | + r#"{"Rating": {"$gte": 4}}"#.to_string(), |
| 352 | + r#"{"year": {"$eq": 2020}}"#.to_string() |
| 353 | + ]; |
| 354 | + let metadata = vec![ |
| 355 | + r#"{"Rating": 3, "year": 2020}"#.to_string() |
| 356 | + ]; |
| 357 | + let result = filter_where(&filters, &metadata).unwrap(); |
| 358 | + assert!(!result); |
| 359 | + } |
| 360 | + |
| 361 | + #[test] |
| 362 | + fn test_filter_where_no_filters() { |
| 363 | + let filters = vec![]; |
| 364 | + let metadata = vec![ |
| 365 | + r#"{"Rating": 5}"#.to_string() |
| 366 | + ]; |
| 367 | + let result = filter_where(&filters, &metadata).unwrap(); |
| 368 | + assert!(result); |
| 369 | + } |
| 370 | + |
| 371 | + #[test] |
| 372 | + fn test_filter_where_no_matching_meta() { |
| 373 | + let filters = vec![ |
| 374 | + r#"{"genre": {"$in": ["sci-fi"]}}"#.to_string() |
| 375 | + ]; |
| 376 | + let metadata = vec![ |
| 377 | + r#"{"genre": "history"}"#.to_string(), |
| 378 | + r#"{"genre": "music"}"#.to_string() |
| 379 | + ]; |
| 380 | + let result = filter_where(&filters, &metadata).unwrap(); |
| 381 | + assert!(!result); |
| 382 | + } |
| 383 | +} |
| 384 | + |
0 commit comments