Skip to content

Commit 0d39c1b

Browse files
chore: improve filter sql generation
Signed-off-by: Henry Gressmann <[email protected]>
1 parent d551b71 commit 0d39c1b

File tree

7 files changed

+173
-111
lines changed

7 files changed

+173
-111
lines changed

src/app/core/reports.rs

Lines changed: 83 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,13 @@ use std::collections::BTreeMap;
22
use std::fmt::{Debug, Display};
33

44
use crate::app::DuckDBConn;
5+
use crate::utils::duckdb::{repeat_vars, ParamVec};
56
use crate::web::routes::dashboard::GraphValue;
6-
use duckdb::{params_from_iter, ToSql};
7-
use eyre::Result;
8-
use itertools::Itertools;
7+
use duckdb::params_from_iter;
8+
use eyre::{bail, Result};
99
use poem_openapi::{Enum, Object};
1010
use time::OffsetDateTime;
1111

12-
// TODO: more fine-grained caching (e.g. don't cache for short durations/ending in now)
13-
// use cached::proc_macro::cached;
14-
// use cached::SizedCache;
15-
// const CACHE_SIZE_OVERALL_STATS: usize = 512;
16-
// const CACHE_SIZE_OVERALL_REPORTS: usize = 512;
17-
// const CACHE_SIZE_DIMENSION_REPORTS: usize = 512;
18-
1912
#[derive(Object)]
2013
pub struct DateRange {
2114
pub start: u64,
@@ -43,17 +36,16 @@ impl Display for DateRange {
4336
}
4437
}
4538

46-
#[derive(Debug, Enum)]
39+
#[derive(Debug, Enum, Clone, Copy)]
4740
#[oai(rename_all = "snake_case")]
4841
pub enum Metric {
4942
Views,
5043
Sessions,
5144
UniqueVisitors,
5245
AvgViewsPerSession,
53-
// AvgDuration,
5446
}
5547

56-
#[derive(Debug, Enum)]
48+
#[derive(Debug, Enum, Clone, Copy)]
5749
#[oai(rename_all = "snake_case")]
5850
pub enum Dimension {
5951
Url,
@@ -67,14 +59,16 @@ pub enum Dimension {
6759
City,
6860
}
6961

70-
#[derive(Enum, Debug)]
62+
#[derive(Enum, Debug, Clone, Copy)]
7163
#[oai(rename_all = "snake_case")]
7264
pub enum FilterType {
7365
Equal,
74-
NotEqual,
7566
Contains,
76-
NotContains,
67+
StartsWith,
68+
EndsWith,
7769
IsNull,
70+
IsTrue,
71+
IsFalse,
7872
}
7973

8074
pub type ReportGraph = Vec<GraphValue>;
@@ -92,13 +86,27 @@ pub struct ReportStats {
9286
#[derive(Object, Debug)]
9387
#[oai(rename_all = "camelCase")]
9488
pub struct DimensionFilter {
89+
/// The dimension to filter by
9590
dimension: Dimension,
91+
92+
/// The type of filter to apply
93+
/// Note that some filters may not be applicable to all dimensions
9694
filter_type: FilterType,
97-
value: String,
95+
96+
/// Whether to invert the filter (e.g. not equal, not contains)
97+
/// Defaults to false
98+
inversed: Option<bool>,
99+
100+
/// Whether to filter by the strict value (case-sensitive, exact match)
101+
strict: Option<bool>,
102+
103+
/// The value to filter by
104+
/// For `FilterType::IsNull` this should be `None`
105+
value: Option<String>,
98106
}
99107

100-
fn filter_sql(filters: &[DimensionFilter]) -> Result<(String, Vec<Box<dyn ToSql>>)> {
101-
let mut params: Vec<Box<dyn ToSql>> = Vec::new();
108+
fn filter_sql(filters: &[DimensionFilter]) -> Result<(String, ParamVec)> {
109+
let mut params = ParamVec::new();
102110

103111
if filters.is_empty() {
104112
return Ok(("".to_owned(), params));
@@ -107,27 +115,40 @@ fn filter_sql(filters: &[DimensionFilter]) -> Result<(String, Vec<Box<dyn ToSql>
107115
let filter_clauses = filters
108116
.iter()
109117
.map(|filter| {
110-
let filter_value = match filter.filter_type {
111-
FilterType::Equal => {
112-
params.push(Box::new(filter.value.clone()));
113-
" = ?"
114-
}
115-
FilterType::NotEqual => {
116-
params.push(Box::new(filter.value.clone()));
117-
" != ?"
118+
let filter_value = match (filter.value.clone(), filter.filter_type, filter.inversed.unwrap_or(false)) {
119+
(Some(value), filter_type, inversed) => {
120+
params.push(value);
121+
122+
let strict = filter.strict.unwrap_or(false);
123+
124+
let sql = match (filter_type, strict) {
125+
(FilterType::Equal, false) => "ilike ?",
126+
(FilterType::Equal, true) => "like ?",
127+
(FilterType::Contains, false) => "ilike '%' || ? || '%'",
128+
(FilterType::Contains, true) => "like '%' || ? || '%'",
129+
(FilterType::StartsWith, false) => "ilike ? || '%'",
130+
(FilterType::StartsWith, true) => "like ? || '%'",
131+
(FilterType::EndsWith, false) => "ilike '%' || ?",
132+
(FilterType::EndsWith, true) => "like '%' || ?",
133+
_ => bail!("Invalid filter type for value"),
134+
};
135+
136+
if inversed {
137+
format!("not {}", sql)
138+
} else {
139+
sql.to_owned()
140+
}
118141
}
119-
FilterType::Contains => {
120-
params.push(Box::new(filter.value.clone()));
121-
" like ?"
122-
}
123-
FilterType::NotContains => {
124-
params.push(Box::new(filter.value.clone()));
125-
" not like ?"
126-
}
127-
FilterType::IsNull => " is null",
142+
(None, FilterType::IsNull, false) => "is null".into(),
143+
(None, FilterType::IsNull, true) => "is not null".into(),
144+
(None, FilterType::IsTrue, false) => "is true".into(),
145+
(None, FilterType::IsTrue, true) => "is not true".into(),
146+
(None, FilterType::IsFalse, false) => "is false".into(),
147+
(None, FilterType::IsFalse, true) => "is not false".into(),
148+
_ => bail!("Invalid filter type for value"),
128149
};
129150

130-
match filter.dimension {
151+
Ok(match filter.dimension {
131152
Dimension::Url => format!("concat(fqdn, path) {}", filter_value),
132153
Dimension::Path => format!("path {}", filter_value),
133154
Dimension::Fqdn => format!("fqdn {}", filter_value),
@@ -137,11 +158,11 @@ fn filter_sql(filters: &[DimensionFilter]) -> Result<(String, Vec<Box<dyn ToSql>
137158
Dimension::Mobile => format!("mobile::text {}", filter_value),
138159
Dimension::Country => format!("country {}", filter_value),
139160
Dimension::City => format!("city {}", filter_value),
140-
}
161+
})
141162
})
142-
.join(" and ");
163+
.collect::<Result<Vec<String>>>()?;
143164

144-
Ok((format!("and ({})", filter_clauses), params))
165+
Ok((format!("and ({})", filter_clauses.join(" and ")), params))
145166
}
146167

147168
fn metric_sql(metric: &Metric) -> Result<String> {
@@ -174,12 +195,6 @@ pub fn online_users(conn: &DuckDBConn, entities: &[String]) -> Result<u64> {
174195
Ok(online_users[0])
175196
}
176197

177-
// #[cached(
178-
// ty = "SizedCache<String, ReportGraph>",
179-
// create = "{ SizedCache::with_size(CACHE_SIZE_OVERALL_REPORTS)}",
180-
// convert = r#"{format!("{:?}:{}:{}:{:?}:{:?}:{}", entities, event, range, filters, metric, data_points)}"#,
181-
// result = true
182-
// )]
183198
pub fn overall_report(
184199
conn: &DuckDBConn,
185200
entities: &[String],
@@ -193,21 +208,21 @@ pub fn overall_report(
193208
return Ok(vec![GraphValue::U64(0); data_points as usize]);
194209
}
195210

196-
let mut params: Vec<Box<dyn ToSql>> = Vec::new();
211+
let mut params = ParamVec::new();
197212

198213
let (filters_sql, filters_params) = filter_sql(filters)?;
199214
let metric_sql = metric_sql(metric)?;
200215

201216
let entity_vars = repeat_vars(entities.len());
202217

203-
params.push(Box::new(range.start()));
204-
params.push(Box::new(range.end()));
205-
params.push(Box::new(data_points));
206-
params.push(Box::new(data_points));
207-
params.push(Box::new(event));
208-
params.extend(entities.iter().map(|entity| Box::new(entity.clone()) as Box<dyn ToSql>));
209-
params.extend(filters_params);
210-
params.push(Box::new(range.end()));
218+
params.push(range.start());
219+
params.push(range.end());
220+
params.push(data_points);
221+
params.push(data_points);
222+
params.push(event);
223+
params.extend(entities);
224+
params.extend_from_params(filters_params);
225+
params.push(range.end());
211226

212227
let query = format!("--sql
213228
with
@@ -273,12 +288,6 @@ pub fn overall_report(
273288
}
274289
}
275290

276-
// #[cached(
277-
// ty = "SizedCache<String, ReportStats>",
278-
// create = "{ SizedCache::with_size(CACHE_SIZE_OVERALL_STATS)}",
279-
// convert = r#"{format!("{:?}:{}:{}:{:?}", entities, event, range, filters)}"#,
280-
// result = true
281-
// )]
282291
pub fn overall_stats(
283292
conn: &DuckDBConn,
284293
entities: &[String],
@@ -290,7 +299,7 @@ pub fn overall_stats(
290299
return Ok(ReportStats::default());
291300
}
292301

293-
let mut params: Vec<Box<dyn ToSql>> = Vec::new();
302+
let mut params = ParamVec::new();
294303

295304
let entity_vars = repeat_vars(entities.len());
296305
let (filters_sql, filters_params) = filter_sql(filters)?;
@@ -300,11 +309,11 @@ pub fn overall_stats(
300309
let metric_unique_visitors = metric_sql(&Metric::UniqueVisitors)?;
301310
let metric_avg_views_per_visitor = metric_sql(&Metric::AvgViewsPerSession)?;
302311

303-
params.push(Box::new(range.start()));
304-
params.push(Box::new(range.end()));
305-
params.push(Box::new(event));
306-
params.extend(entities.iter().map(|entity| Box::new(entity) as Box<dyn ToSql>));
307-
params.extend(filters_params);
312+
params.push(range.start());
313+
params.push(range.end());
314+
params.push(event);
315+
params.extend(entities);
316+
params.extend_from_params(filters_params);
308317

309318
let query = format!("--sql
310319
with
@@ -348,22 +357,16 @@ pub fn overall_stats(
348357
Ok(result)
349358
}
350359

351-
// #[cached(
352-
// ty = "SizedCache<String, ReportTable>",
353-
// create = "{ SizedCache::with_size(CACHE_SIZE_DIMENSION_REPORTS)}",
354-
// convert = r#"{format!("{:?}:{}:{}:{:?}:{:?}:{:?}", entities, event, range, dimension, filters, metric)}"#,
355-
// result = true
356-
// )]
357360
pub fn dimension_report(
358361
conn: &DuckDBConn,
359-
entities: &[impl AsRef<str> + Debug],
362+
entities: &[String],
360363
event: &str,
361364
range: &DateRange,
362365
dimension: &Dimension,
363366
filters: &[DimensionFilter],
364367
metric: &Metric,
365368
) -> Result<ReportTable> {
366-
let mut params: Vec<Box<dyn ToSql>> = Vec::new();
369+
let mut params = ParamVec::new();
367370
let entity_vars = repeat_vars(entities.len());
368371
let (filters_sql, filters_params) = filter_sql(filters)?;
369372

@@ -380,11 +383,11 @@ pub fn dimension_report(
380383
Dimension::City => ("concat(country, city)", "country, city"),
381384
};
382385

383-
params.push(Box::new(range.start()));
384-
params.push(Box::new(range.end()));
385-
params.push(Box::new(event));
386-
params.extend(entities.iter().map(|entity| Box::new(entity.as_ref()) as Box<dyn ToSql>));
387-
params.extend(filters_params);
386+
params.push(range.start());
387+
params.push(range.end());
388+
params.push(event);
389+
params.extend(entities);
390+
params.extend_from_params(filters_params);
388391

389392
let query = format!("--sql
390393
with
@@ -444,11 +447,3 @@ pub fn dimension_report(
444447
}
445448
}
446449
}
447-
448-
fn repeat_vars(count: usize) -> String {
449-
assert_ne!(count, 0);
450-
let mut s = "?,".repeat(count);
451-
// Remove trailing comma
452-
s.pop();
453-
s
454-
}

src/utils/duckdb.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
use duckdb::ToSql;
2+
3+
pub struct ParamVec<'a>(Vec<Box<dyn ToSql + 'a>>);
4+
5+
impl<'a> ParamVec<'a> {
6+
pub fn new() -> Self {
7+
Self(Vec::new())
8+
}
9+
10+
pub fn push<T: ToSql + 'a>(&mut self, value: T) {
11+
self.0.push(Box::new(value));
12+
}
13+
14+
pub fn extend_from_params(&mut self, params: ParamVec<'a>) {
15+
self.0.extend(params.0);
16+
}
17+
18+
pub fn extend<T: ToSql + 'a>(&mut self, iter: impl IntoIterator<Item = T>) {
19+
self.0.extend(iter.into_iter().map(|v| Box::new(v) as Box<dyn ToSql + 'a>));
20+
}
21+
}
22+
23+
impl<'a> IntoIterator for ParamVec<'a> {
24+
type Item = Box<dyn ToSql + 'a>;
25+
type IntoIter = std::vec::IntoIter<Self::Item>;
26+
fn into_iter(self) -> Self::IntoIter {
27+
self.0.into_iter()
28+
}
29+
}
30+
31+
pub fn repeat_vars(count: usize) -> String {
32+
assert_ne!(count, 0);
33+
let mut s = "?,".repeat(count);
34+
// Remove trailing comma
35+
s.pop();
36+
s
37+
}

src/utils/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
pub mod duckdb;
12
pub mod geo;
23
pub mod hash;
34
pub mod referrer;

web/src/api/constants.ts

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,20 @@ export const filterNames: Record<DimensionFilter["filterType"], string> = {
2323
contains: "contains",
2424
equal: "is",
2525
is_null: "is null",
26-
not_contains: "does not contain",
27-
not_equal: "is not",
26+
ends_with: "ends with",
27+
is_false: "is false",
28+
is_true: "is true",
29+
starts_with: "starts with",
2830
};
2931

30-
export const filterNamesCapitalized: Record<DimensionFilter["filterType"], string> = {
31-
contains: "Contains",
32-
equal: "Equals",
33-
is_null: "Is Null",
34-
not_contains: "Does Not Contain",
35-
not_equal: "Does Not Equal",
32+
export const filterNamesInverted: Record<DimensionFilter["filterType"], string> = {
33+
contains: "does not contain",
34+
equal: "is not",
35+
is_null: "is not null",
36+
ends_with: "does not end with",
37+
is_false: "is not false",
38+
is_true: "is not true",
39+
starts_with: "does not start with",
3640
};
41+
42+
export const capitalizeAll = (str: string) => str.replace(/(?:^|\s)\S/g, (a) => a.toUpperCase());

0 commit comments

Comments
 (0)