Skip to content

Commit 197310c

Browse files
committed
Adding front_coding window function
1 parent 98f87bf commit 197310c

File tree

4 files changed

+58
-4
lines changed

4 files changed

+58
-4
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
## 0.57.0 (provisional)
4+
5+
*Features*
6+
7+
* Adding `front_coding` window function.
8+
39
## 0.56.0
410

511
*Features*

docs/moonblade/window.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
- **cumsum**(*\<expr\>*) -> `number`: Returns the cumulative sum of the numbers yielded by given expression.
77
- **dense_rank**(*\<expr\>*) -> `number`: Returns the dense rank (there will be no gaps, but ties remain possible for a same rank) of numbers yielded by given expression. This requires buffering whole file or group!
88
- **frac**(*\<expr\>*, *decimals?*) -> `number`: Returns the fraction represented by numbers yielded by given expression over the total sum of them. This requires buffering whole file or group!
9+
- **front_coding**(*\<expr\>*) -> `string`: Compress string returned by given expression using incremental encoding (useful to compress sorted data).<br>See https://en.wikipedia.org/wiki/Incremental_encoding
910
- **lag**(*\<expr\>*, *steps?*, *\<expr\>?*) -> `any`: Returns a value yielded by given expression, lagged by n steps or 1 step by default. Can take a second expression after the number of steps to return a default value for rows that come before first lagged value.
1011
- **lead**(*\<expr\>*, *steps?*, *\<expr\>?*) -> `any`: Returns a value yielded by given expression, leading by n steps or 1 step by default. Can take a second expression after the number of steps to return a default value for rows that come after last lead value.
1112
- **ntile**(*k*, *\<expr\>*) -> `number`: Splits numbers yielded by given expression into `k` nearly equal-sized consecutive groups. This requires buffering whole file or group!

src/moonblade/agg/window.rs

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ impl Ranking {
159159
enum ConcreteWindowAggregation {
160160
Lead(ConcreteExpr, usize, ConcreteExpr),
161161
Lag(ConcreteExpr, usize, ConcreteExpr),
162+
FrontCoding(ConcreteExpr, Option<String>),
162163
RowNumber(usize),
163164
RowIndex(usize),
164165
CumulativeSum(ConcreteExpr, Sum),
@@ -389,6 +390,43 @@ impl ConcreteWindowAggregation {
389390

390391
Ok(value)
391392
}
393+
Self::FrontCoding(expr, last_string_opt) => {
394+
let value = eval_expression(expr, Some(index), record, headers_index)?;
395+
let string = value
396+
.try_as_str()
397+
.map_err(|err| err.anonymous())?
398+
.into_owned();
399+
400+
fn first_mismatch_index(a: &str, b: &str) -> usize {
401+
for (i, (ca, cb)) in a.chars().zip(b.chars()).enumerate() {
402+
if i == b.len() || ca != cb {
403+
return i;
404+
}
405+
}
406+
407+
return a.len();
408+
}
409+
410+
// TODO: try_into_string
411+
match last_string_opt {
412+
None => {
413+
let result = DynamicValue::from("0 ".to_string() + &string);
414+
415+
*last_string_opt = Some(string);
416+
417+
Ok(result)
418+
}
419+
Some(last_string) => {
420+
let i = first_mismatch_index(&string, &last_string);
421+
422+
let result = DynamicValue::from(format!("{} ", i) + &string[i..]);
423+
424+
*last_string_opt = Some(string);
425+
426+
Ok(result)
427+
}
428+
}
429+
}
392430
Self::RowNumber(counter) => {
393431
*counter += 1;
394432
Ok(DynamicValue::from(*counter))
@@ -489,6 +527,9 @@ impl ConcreteWindowAggregation {
489527
welford.clear();
490528
}
491529
Self::Lag(_, _, _) | Self::Lead(_, _, _) => (),
530+
Self::FrontCoding(_, string) => {
531+
*string = None;
532+
}
492533
Self::Frac(_, sum, _) => {
493534
sum.clear();
494535
}
@@ -508,9 +549,8 @@ fn get_function(name: &str) -> Option<FunctionArguments> {
508549
"row_number" | "row_index" => FunctionArguments::nullary(),
509550
"frac" => FunctionArguments::with_range(1..=2),
510551
"lag" | "lead" => FunctionArguments::with_range(1..=3),
511-
"cumsum" | "cummin" | "cummax" | "dense_rank" | "rank" | "cume_dist" | "percent_rank" => {
512-
FunctionArguments::unary()
513-
}
552+
"cumsum" | "cummin" | "cummax" | "dense_rank" | "rank" | "cume_dist" | "percent_rank"
553+
| "front_coding" => FunctionArguments::unary(),
514554
"rolling_sum" | "rolling_mean" | "rolling_avg" | "rolling_var" | "rolling_stddev"
515555
| "ntile" => FunctionArguments::binary(),
516556
_ => return None,
@@ -605,7 +645,7 @@ fn concretize_window_aggregations(
605645

606646
concrete_aggs.push((agg.agg_name, concrete_agg));
607647
}
608-
"cumsum" | "cummin" | "cummax" => {
648+
"cumsum" | "cummin" | "cummax" | "front_coding" => {
609649
let expr = concretize_expression(agg.args.pop().unwrap(), headers, None)?;
610650

611651
concrete_aggs.push((
@@ -614,6 +654,7 @@ fn concretize_window_aggregations(
614654
"cumsum" => ConcreteWindowAggregation::CumulativeSum(expr, Sum::new()),
615655
"cummin" => ConcreteWindowAggregation::CumulativeMin(expr, None),
616656
"cummax" => ConcreteWindowAggregation::CumulativeMax(expr, None),
657+
"front_coding" => ConcreteWindowAggregation::FrontCoding(expr, None),
617658
_ => unreachable!(),
618659
},
619660
))

src/moonblade/doc/window.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@
3535
"returns": "number",
3636
"help": "Returns the fraction represented by numbers yielded by given expression over the total sum of them. This requires buffering whole file or group!"
3737
},
38+
{
39+
"name": "front_coding",
40+
"arguments": ["<expr>"],
41+
"returns": "string",
42+
"help": "Compress string returned by given expression using incremental encoding (useful to compress sorted data).\nSee https://en.wikipedia.org/wiki/Incremental_encoding"
43+
},
3844
{
3945
"name": "lag",
4046
"arguments": ["<expr>", "steps?", "<expr>?"],

0 commit comments

Comments
 (0)