Skip to content

Commit 658f830

Browse files
committed
Support extension syntax without GROUP BY per paper
Views can now define measures without explicit GROUP BY: CREATE VIEW sales_v AS SELECT year, region, SUM(amount) AS MEASURE revenue FROM sales; Yardstick automatically adds GROUP BY ALL to make it valid SQL. This follows Julian Hyde's paper more closely.
1 parent adc7410 commit 658f830

File tree

3 files changed

+69
-32
lines changed

3 files changed

+69
-32
lines changed

README.md

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,7 @@ SELECT
4949
region,
5050
SUM(amount) AS MEASURE revenue,
5151
COUNT(*) AS MEASURE order_count
52-
FROM sales
53-
GROUP BY year, region;
52+
FROM sales;
5453

5554
-- Query with AGGREGATE() and AT modifiers (SEMANTIC prefix required)
5655
SEMANTIC SELECT
@@ -110,11 +109,10 @@ SELECT
110109
dimension1,
111110
dimension2,
112111
AGG(expr) AS MEASURE measure_name
113-
FROM table
114-
GROUP BY dimension1, dimension2;
112+
FROM table;
115113
```
116114

117-
Supported aggregations: `SUM`, `COUNT`, `AVG`, `MIN`, `MAX`
115+
Yardstick automatically handles the grouping. Supported aggregations: `SUM`, `COUNT`, `AVG`, `MIN`, `MAX`
118116

119117
### Querying Measures
120118

test/sql/measures.test

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ INSERT INTO sales VALUES
2323
statement ok
2424
CREATE VIEW sales_v AS
2525
SELECT year, region, SUM(amount) AS MEASURE revenue
26-
FROM sales
27-
GROUP BY year, region;
26+
FROM sales;
2827

2928
# Basic query on view with measure
3029
query IIR rowsort
@@ -87,8 +86,7 @@ GROUP BY year, region;
8786
statement ok
8887
CREATE VIEW sales_yearly AS
8988
SELECT year, SUM(amount) AS MEASURE revenue
90-
FROM sales
91-
GROUP BY year;
89+
FROM sales;
9290

9391
# Prior year comparison
9492
query IR rowsort
@@ -181,8 +179,7 @@ SELECT
181179
SUM(amount) AS MEASURE total_revenue,
182180
COUNT(*) AS MEASURE order_count,
183181
AVG(amount) AS MEASURE avg_order
184-
FROM sales
185-
GROUP BY year;
182+
FROM sales;
186183

187184
query IRR rowsort
188185
SEMANTIC SELECT year, AGGREGATE(total_revenue), AGGREGATE(avg_order) FROM orders_v GROUP BY year;
@@ -349,8 +346,7 @@ INSERT INTO products VALUES
349346
statement ok
350347
CREATE VIEW products_v AS
351348
SELECT year, region, category, SUM(amount) AS MEASURE revenue
352-
FROM products
353-
GROUP BY year, region, category;
349+
FROM products;
354350

355351
# Grand total across all dimensions
356352
query IIIR rowsort
@@ -410,8 +406,7 @@ GROUP BY year, region, category;
410406
statement ok
411407
CREATE VIEW sales_minmax AS
412408
SELECT year, region, MIN(amount) AS MEASURE min_sale, MAX(amount) AS MEASURE max_sale
413-
FROM sales
414-
GROUP BY year, region;
409+
FROM sales;
415410

416411
query IIRR rowsort
417412
SEMANTIC SELECT year, region, AGGREGATE(min_sale), AGGREGATE(max_sale) FROM sales_minmax GROUP BY year, region;
@@ -489,8 +484,7 @@ INSERT INTO adjustments VALUES
489484
statement ok
490485
CREATE VIEW adj_v AS
491486
SELECT year, region, SUM(amount) AS MEASURE adjustment
492-
FROM adjustments
493-
GROUP BY year, region;
487+
FROM adjustments;
494488

495489
query IIR rowsort
496490
SEMANTIC SELECT year, region, AGGREGATE(adjustment) FROM adj_v GROUP BY year, region;
@@ -523,8 +517,7 @@ INSERT INTO yearly_data VALUES (2020, 100), (2021, 120), (2022, 150), (2023, 180
523517
statement ok
524518
CREATE VIEW yearly_v AS
525519
SELECT year, SUM(amount) AS MEASURE revenue
526-
FROM yearly_data
527-
GROUP BY year;
520+
FROM yearly_data;
528521

529522
query IRR rowsort
530523
SEMANTIC SELECT
@@ -647,8 +640,7 @@ GROUP BY year;
647640
statement ok
648641
CREATE VIEW quarterly AS
649642
SELECT year, quarter, SUM(amount) AS MEASURE revenue
650-
FROM (VALUES (2022, 1, 100), (2022, 2, 120), (2022, 3, 90), (2022, 4, 140)) AS t(year, quarter, amount)
651-
GROUP BY year, quarter;
643+
FROM (VALUES (2022, 1, 100), (2022, 2, 120), (2022, 3, 90), (2022, 4, 140)) AS t(year, quarter, amount);
652644

653645
query IIR rowsort
654646
SEMANTIC SELECT
@@ -670,8 +662,7 @@ GROUP BY year, quarter;
670662
statement ok
671663
CREATE OR REPLACE VIEW case_measure AS
672664
SELECT year, CASE WHEN SUM(amount) > 150 THEN 1 ELSE 0 END AS MEASURE high_value
673-
FROM (VALUES (2022, 100), (2022, 50), (2023, 200), (2023, 100)) AS t(year, amount)
674-
GROUP BY year;
665+
FROM (VALUES (2022, 100), (2022, 50), (2023, 200), (2023, 100)) AS t(year, amount);
675666

676667
query II rowsort
677668
SEMANTIC SELECT year, AGGREGATE(high_value)
@@ -699,8 +690,7 @@ SELECT year,
699690
SUM(revenue) AS MEASURE revenue,
700691
SUM(cost) AS MEASURE cost,
701692
revenue - cost AS MEASURE profit
702-
FROM financials
703-
GROUP BY year;
693+
FROM financials;
704694

705695
# Query basic measures
706696
query IRR rowsort
@@ -747,8 +737,7 @@ INSERT INTO daily_orders VALUES
747737
statement ok
748738
CREATE VIEW daily_orders_v AS
749739
SELECT order_date, SUM(amount) AS MEASURE revenue
750-
FROM daily_orders
751-
GROUP BY order_date;
740+
FROM daily_orders;
752741

753742
# SET with expression dimension: fix to a specific month
754743
# This generates WHERE MONTH(_inner.order_date) = 2
@@ -790,10 +779,10 @@ INSERT INTO fact_returns VALUES (2022, 'US', 10), (2022, 'EU', 5), (2023, 'US',
790779

791780
# Create two measure views
792781
statement ok
793-
CREATE VIEW fact_orders_v AS SELECT year, region, SUM(amount) AS MEASURE revenue FROM fact_orders GROUP BY year, region;
782+
CREATE VIEW fact_orders_v AS SELECT year, region, SUM(amount) AS MEASURE revenue FROM fact_orders;
794783

795784
statement ok
796-
CREATE VIEW fact_returns_v AS SELECT year, region, SUM(return_amount) AS MEASURE refunds FROM fact_returns GROUP BY year, region;
785+
CREATE VIEW fact_returns_v AS SELECT year, region, SUM(return_amount) AS MEASURE refunds FROM fact_returns;
797786

798787
# Query measures from both fact tables in a JOIN
799788
query IIRR rowsort

yardstick-rs/src/sql/measures.rs

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -983,9 +983,6 @@ fn extract_measures_from_sql(sql: &str) -> Result<(String, Vec<ViewMeasure>, Opt
983983
replacements.push((remove_start, info.name_end, String::new()));
984984
} else {
985985
// Base measure: replace "AS MEASURE name" with "AS name"
986-
let pattern_start = info.name_end - " AS MEASURE ".len() - info.name.len()
987-
- (sql[info.expr_start..info.name_end].len() - info.expression.len() - " AS MEASURE ".len() - info.name.len());
988-
// Actually, simpler: find " AS MEASURE " before name_end
989986
let chunk = &sql[info.expr_start..info.name_end];
990987
if let Some(am_pos) = chunk.to_uppercase().find(" AS MEASURE ") {
991988
let abs_start = info.expr_start + am_pos;
@@ -1010,6 +1007,32 @@ fn extract_measures_from_sql(sql: &str) -> Result<(String, Vec<ViewMeasure>, Opt
10101007
clean_sql = format!("{}{}{}", &clean_sql[..start], replacement, &clean_sql[end..]);
10111008
}
10121009

1010+
// If there are aggregate measures but no GROUP BY, add GROUP BY ALL
1011+
// This enables the "extension" syntax from the paper where views define
1012+
// measures without explicit grouping
1013+
let has_aggregate_measure = measures.iter().any(|m| find_aggregation_in_expression(&m.expression).is_some());
1014+
let clean_sql_upper = clean_sql.to_uppercase();
1015+
let has_group_by = clean_sql_upper.contains("GROUP BY");
1016+
1017+
if has_aggregate_measure && !has_group_by {
1018+
// Find insertion point: before ORDER BY, LIMIT, or at end
1019+
let insert_pos = ["ORDER BY", "LIMIT", ";"]
1020+
.iter()
1021+
.filter_map(|kw| clean_sql_upper.find(kw))
1022+
.min()
1023+
.unwrap_or(clean_sql.len());
1024+
1025+
clean_sql = format!(
1026+
"{} GROUP BY ALL{}",
1027+
clean_sql[..insert_pos].trim_end(),
1028+
if insert_pos < clean_sql.len() {
1029+
format!(" {}", clean_sql[insert_pos..].trim_start())
1030+
} else {
1031+
String::new()
1032+
}
1033+
);
1034+
}
1035+
10131036
Ok((clean_sql, measures, view_name))
10141037
}
10151038

@@ -2249,6 +2272,33 @@ mod tests {
22492272
assert!(!result.clean_sql.contains("revenue - cost"));
22502273
}
22512274

2275+
#[test]
2276+
#[serial]
2277+
fn test_process_create_view_without_group_by() {
2278+
clear_measure_views();
2279+
2280+
// Per the paper, views can define measures without GROUP BY
2281+
let sql = r#"CREATE VIEW orders_extended AS
2282+
SELECT
2283+
id,
2284+
product,
2285+
region,
2286+
amount,
2287+
SUM(amount) AS MEASURE revenue
2288+
FROM orders"#;
2289+
let result = process_create_view(sql);
2290+
2291+
eprintln!("is_measure_view: {}", result.is_measure_view);
2292+
eprintln!("clean_sql: {}", result.clean_sql);
2293+
eprintln!("measures: {:?}", result.measures);
2294+
2295+
assert!(result.is_measure_view);
2296+
assert_eq!(result.measures.len(), 1);
2297+
assert_eq!(result.measures[0].column_name, "revenue");
2298+
// The clean_sql should NOT contain the measure column at all for no-groupby views
2299+
// because it can't be evaluated without a GROUP BY
2300+
}
2301+
22522302
#[test]
22532303
fn test_has_at_syntax() {
22542304
assert!(has_at_syntax(

0 commit comments

Comments
 (0)