From cb7d38d61ae566972a82540b5fdf38fb3c09038c Mon Sep 17 00:00:00 2001 From: KhaledR57 Date: Thu, 6 Nov 2025 08:44:50 +0200 Subject: [PATCH] MDEV-24943: Implement FILTER clause support for aggregate functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aggregates lacked the SQL-standard FILTER clause, forcing CASE-based workarounds that reduced readability across (sum, avg, count, …). This update introduces the ability to specify a FILTER clause for aggregate functions, allowing for more granular control over which rows are included in the aggregation. Also, improves standards compliance and makes queries clearer and more readable. The FILTER(WHERE ...) condition may contain any expression allowed in regular WHERE clauses, except subqueries, window functions, and outer references. --- mysql-test/main/aggregates-filter.result | 329 ++++++++++ mysql-test/main/aggregates-filter.test | 727 +++++++++++++++++++++++ sql/item_jsonfunc.cc | 5 +- sql/item_sum.cc | 153 +++-- sql/item_sum.h | 19 + sql/item_windowfunc.cc | 6 + sql/lex.h | 1 + sql/opt_sum.cc | 16 +- sql/sql_yacc.yy | 28 + 9 files changed, 1247 insertions(+), 37 deletions(-) create mode 100644 mysql-test/main/aggregates-filter.result create mode 100644 mysql-test/main/aggregates-filter.test diff --git a/mysql-test/main/aggregates-filter.result b/mysql-test/main/aggregates-filter.result new file mode 100644 index 0000000000000..989efe8cf876a --- /dev/null +++ b/mysql-test/main/aggregates-filter.result @@ -0,0 +1,329 @@ +# +# Setup test table +# +# +# Basic aggregates with WHERE +# +avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result json_objectagg_result std_result stddev_result stddev_pop_result stddev_samp_result variance_result var_pop_result var_samp_result +137.5000 550 3 75 200 Novel 0 15 6 ["Phone","Tablet",null,null,"Laptop",null,null,null] {"item2":"Tablet", "item5":"Laptop"} 44.7214 41.4578 0.0000 54.1987 1100.0000 4000.0000 312.5000 +# +# Aggregates with DISTINCT and WHERE +# +avg_result sum_result count_result min_result max_result group_concat_result +175.0000 525 1 75 200 Laptop,Phone,Tablet +# +# Aggregates with complex WHERE conditions (AND) +# +avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result std_result variance_result +183.3333 250 5 75 200 Laptop,Phone,Tablet 0 15 9 [null,"Tablet",null,null,"Laptop",null,null,null] 41.4578 742.1875 +# +# Aggregates with WHERE IN and OR +# +avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result std_result variance_result +137.5000 825 6 50 200 Guide,Novel,Pants,Phone,Shirt,Textbook 0 15 14 ["Phone","Tablet",null,null,"Laptop","Shirt","Pants",null] 55.0973 2447.9167 +# +# Aggregates with WHERE BETWEEN +# +avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result std_result variance_result +150.0000 625 5 75 150 Laptop,Pants,Phone,Shirt,Tablet 0 15 1 ["Phone","Tablet",null,"Textbook","Laptop",null,null,"Guide"] 20.4124 555.5556 +# +# Aggregates with WHERE LIKE +# +avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result std_result variance_result +131.2500 250 3 100 100 Guide,Novel,Textbook 6 15 5 [null,"Tablet",null,null,null,"Shirt",null,null] 12.5000 2447.9167 +# +# Basic aggregates with GROUP BY +# +category avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result std_result variance_result +Books 200.0000 400 2 NULL 200 NULL 7 15 8 [null,null,null] 0.0000 0.0000 +Clothing 75.0000 NULL 2 75 75 NULL 0 0 0 [null,null] NULL NULL +Electronics 116.6667 150 3 100 100 Laptop,Phone,Tablet 0 15 1 [null,"Tablet","Laptop"] 23.5702 555.5556 +# +# Aggregates with GROUP BY and WHERE IS NOT NULL +# +category avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result std_result variance_result +Books 200.0000 400 2 200 200 Guide,Novel,Textbook 7 15 8 ["Novel","Textbook","Guide"] 0.0000 0.0000 +Clothing 62.5000 75 2 75 75 Pants,Shirt 0 15 15 ["Shirt","Pants"] 12.5000 156.2500 +Electronics 116.6667 350 3 100 150 Laptop,Phone,Tablet 0 15 1 ["Phone","Tablet","Laptop"] 23.5702 555.5556 +# +# Aggregates with multiple GROUP BY columns +# +category status avg_result sum_result count_result min_result max_result group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result std_result variance_result +Books active 200.0000 400 2 200 200 Guide,Textbook 7 15 8 ["Textbook","Guide"] 0.0000 0.0000 +Books inactive NULL NULL 1 NULL NULL Novel 18446744073709551615 0 0 ["Novel"] NULL NULL +Clothing NULL 50.0000 50 1 NULL 50 Shirt 3 0 3 ["Shirt"] 0.0000 0.0000 +Clothing active 75.0000 75 1 75 75 Pants 12 12 12 ["Pants"] 0.0000 0.0000 +Electronics active 116.6667 350 3 100 150 Laptop,Phone,Tablet 0 15 9 ["Phone","Tablet","Laptop"] 23.5702 555.5556 +# +# GROUP BY with HAVING on aggregates +# +category avg_result sum_result count_result min_result max_result +Books 200.0000 400 2 NULL 200 +# +# HAVING with alias +# +category avg_value sum_value count_value min_value max_value +Books 200.0000 400 2 NULL 200 +# +# WHERE + GROUP BY + HAVING +# +category avg_result sum_result count_result min_result max_result +Books 200.0000 400 2 NULL 200 +Electronics 116.6667 150 3 100 100 +# +# WHERE + multiple aggregates with FILTER +# +category total_avg active_avg total_sum active_sum total_count active_count total_min active_min total_max active_max +Books 200.0000 200.0000 400 NULL 2 2 NULL NULL 200 200 +Clothing 62.5000 75.0000 NULL NULL 2 2 NULL NULL 75 75 +Electronics 116.6667 116.6667 150 350 3 1 100 100 150 150 +# +# WHERE + FILTER + HAVING +# +category active_avg active_sum active_count +Books 200.0000 400 0 +Clothing 75.0000 NULL 0 +Electronics 116.6667 150 3 +# +# WHERE + window function (OVER) +# +category cat_avg cat_sum cat_count cat_min cat_max +Books 200.0000 400 2 NULL 200 +Books 200.0000 400 2 NULL 200 +Books 200.0000 400 2 NULL 200 +Clothing 75.0000 NULL 2 75 75 +Clothing 75.0000 NULL 2 75 75 +Electronics 116.6667 150 3 100 100 +Electronics 116.6667 150 3 100 100 +Electronics 116.6667 150 3 100 100 +# +# WHERE + window function with ORDER BY +# +id running_avg running_sum running_count running_min running_max +1 100.0000 100 1 100 100 +2 125.0000 250 2 100 150 +3 125.0000 250 3 100 150 +4 150.0000 450 4 100 200 +5 137.5000 550 5 100 200 +6 120.0000 550 6 100 200 +7 112.5000 625 7 75 200 +8 125.0000 825 8 75 200 +# +# WHERE + window function with frame specification +# +id windowed_avg windowed_sum windowed_count windowed_min windowed_max +1 125.0000 150 2 100 NULL +2 125.0000 150 2 100 NULL +3 175.0000 350 2 150 200 +4 150.0000 200 2 100 200 +5 150.0000 200 3 100 200 +6 87.5000 NULL 3 75 100 +7 137.5000 200 3 75 200 +8 137.5000 200 2 75 200 +# +# Subquery with aggregates in WHERE +# +id category status value price amount name key_name value_col bit_value geom +3 Books inactive NULL 19.99 19.99 Novel item3 Novel NULL POINT(3 3) +4 Books active 200 29.99 29.99 Textbook item4 Textbook 7 POINT(4 4) +8 Books active 200 24.99 24.99 Guide item8 Guide 15 POINT(8 8) +# +# Aggregates with GROUP BY and ORDER BY +# +category avg_val sum_val count_val min_val max_val +Books 200.0000 400 2 NULL 200 +Electronics 116.6667 150 3 100 100 +Clothing 75.0000 NULL 2 75 75 +# +# Complete query with WHERE, GROUP BY, HAVING, ORDER BY +# +category avg_val sum_val count_val min_val max_val +Books 200.0000 400 2 NULL 200 +Electronics 116.6667 150 3 100 100 +# +# Aggregates with GROUP BY, ORDER BY, and LIMIT +# +category avg_val sum_val count_val +Books 200.0000 400 2 +Electronics 116.6667 150 3 +# +# Aggregates with ROLLUP +# +category status avg_val sum_val count_val min_val max_val +Books active 200.0000 400 2 NULL 200 +Books inactive NULL NULL 0 NULL NULL +Books NULL 200.0000 400 2 NULL 200 +Clothing NULL NULL NULL 1 NULL 50 +Clothing active 75.0000 NULL 1 75 75 +Clothing NULL 75.0000 NULL 2 75 75 +Electronics active 116.6667 150 3 100 100 +Electronics NULL 116.6667 150 3 100 100 +NULL NULL 137.5000 550 7 75 200 +# +# Most complete query with all clauses and aggregates +# +category status avg_value sum_value count_value min_value max_value avg_amount sum_amount distinct_value_avg distinct_value_sum distinct_value_count group_concat_result bit_and_result bit_or_result bit_xor_result json_arrayagg_result std_result variance_result +Books active 200.0000 400 2 NULL 200 NULL 54.98 200.0000 200 1 Guide 7 15 0 [null,null] NULL 0.0000 +Electronics active 100.0000 350 1 100 150 333.326667 999.98 100.0000 NULL 2 Laptop 0 15 7 [null,null,null] 23.5702 555.5556 +Clothing active 75.0000 75 0 75 75 NULL NULL 75.0000 NULL 1 Pants 18446744073709551615 0 0 [null] 0.0000 NULL +# +# COUNT with column names (not just *) +# +count_value count_category count_name +6 3 3 +# +# COUNT DISTINCT with multiple columns +# +count_distinct +3 +# +# FILTER on indexed column +# +# Create index on status column +avg_result sum_result count_result +137.5000 350 3 +# +# Empty result set (all rows filtered out) +# +avg_result sum_result count_result min_result max_result +NULL NULL 0 NULL NULL +# +# FILTER with NULL conditions +# +avg_result count_result sum_result +NULL 0 NULL +# +# FILTER with JOINs +# +# Create second table for JOIN test +category avg_result sum_result count_result +Books 200.0000 200 2 +Electronics 150.0000 NULL 0 +# +# FILTER with window functions - different partitions +# +category status cat_avg status_avg cat_count status_count +Books active 200.0000 183.3333 2 4 +Books active 200.0000 183.3333 2 4 +Books inactive 200.0000 NULL 2 0 +Clothing NULL 75.0000 NULL 2 0 +Clothing active 75.0000 183.3333 2 4 +Electronics active 116.6667 183.3333 3 4 +Electronics active 116.6667 183.3333 3 4 +Electronics active 116.6667 183.3333 3 4 +# +# FILTER with empty table +# +# Create empty table +avg_result sum_result count_result min_result max_result +NULL NULL 0 NULL NULL +# +# FILTER with CASE in WHERE clause +# +avg_result sum_result count_result +137.5000 550 3 +# +# ORDER BY with FILTER aggregates +# +category avg_val sum_val +Books 200.0000 400 +Electronics 116.6667 150 +Clothing 75.0000 NULL +# +# Aggregates on id column itself with FILTER +# +avg_id sum_id count_id min_id max_id distinct_count_id +4.5000 14 3 1 8 5 +# +# Aggregates on id with GROUP BY and FILTER +# +category avg_id sum_id count_id min_id max_id +Books 6.0000 12 2 NULL 8 +Clothing 6.5000 7 0 7 7 +Electronics 5.0000 8 1 1 5 +# +# ======================================== +# ERROR CASES SECTION +# ======================================== +# +# +# FILTER with Non-Aggregate Window Functions (Ranking) +# +# FILTER with RANK (should error - FILTER only works with aggregates) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'FILTER (WHERE status = 'active') OVER (ORDER BY value) FROM test_aggregates' at line 1 +# +# FILTER with Non-Aggregate Window Functions (Navigation) +# +# FILTER with LAG (should error - FILTER only works with aggregates) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'FILTER (WHERE status = 'active') OVER (ORDER BY id) FROM test_aggregates' at line 1 +# +# Wrong Order - OVER before FILTER +# +# OVER clause before FILTER clause (should error - correct order is FILTER then OVER) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '(WHERE status = 'active') FROM test_aggregates' at line 1 +# +# Non-Aggregate Scalar Functions +# +# Regular scalar function (should error - FILTER only for aggregates) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '(WHERE status = 'active') FROM test_aggregates' at line 1 +# +# Plain Column or Expression +# +# FILTER on plain column (should error - not a function) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '(WHERE status = 'active') FROM test_aggregates' at line 1 +# +# Empty or Invalid Clause +# +# Empty FILTER clause (should error - WHERE condition required) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near ') FROM test_aggregates' at line 1 +# +# Nested FILTER Clauses +# +# Nested FILTER clauses (should error - cannot nest FILTER) +ERROR HY000: Invalid use of group function +# +# Invalid WHERE Syntax +# +# Multiple WHERE keywords (should error) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'WHERE value > 100) FROM test_aggregates' at line 1 +# +# Column Aliases +# +# Using column alias in FILTER WHERE (should error - alias not available) +ERROR 42S22: Unknown column 'val' in 'SELECT' +# +# Subqueries in WHERE Condition +# +# Scalar subquery in FILTER WHERE (should error - subqueries not allowed) +ERROR HY000: Incorrect usage of subquery and FILTER +# IN subquery in FILTER WHERE (should error - subqueries not allowed) +ERROR HY000: Incorrect usage of subquery and FILTER +# +# FILTER with Window Functions in WHERE Condition +# +# Window function in FILTER WHERE condition (should error - not allowed) +ERROR HY000: Incorrect usage of window function and FILTER +# +# FILTER with aggregate functions in WHERE clause (should expect error) +# +# This should error - aggregate in WHERE clause +ERROR HY000: Invalid use of group function +# +# FILTER with error cases - syntax errors +# +# Missing WHERE keyword (should error) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'status = 'active') FROM test_aggregates' at line 1 +# Missing parentheses (should error) +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'WHERE status = 'active' FROM test_aggregates' at line 1 +# +# Illegal use of FILTER with non-aggregate UDFs +# +# Create a non-aggregate UDF for testing +# FILTER clause with non-aggregate UDF should fail +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '(WHERE status = 'active') FROM test_aggregates' at line 1 +# +# CLEANUP +# +# +# End of aggregates FILTER test +# diff --git a/mysql-test/main/aggregates-filter.test b/mysql-test/main/aggregates-filter.test new file mode 100644 index 0000000000000..e21966471d969 --- /dev/null +++ b/mysql-test/main/aggregates-filter.test @@ -0,0 +1,727 @@ +# +# MDEV-24943: Add FILTER clause +# +# Test for aggregate functions with FILTER clause. +# The FILTER clause extends aggregate functions with a WHERE condition. +# +# Test coverage includes: +# - Basic aggregate functions (AVG, SUM, COUNT, MIN, MAX) +# - Extended aggregates (GROUP_CONCAT, BIT_*, JSON_*, STD, VARIANCE) +# - DISTINCT and column/star variations +# - GROUP BY, HAVING, ORDER BY, LIMIT +# - Window functions with FILTER +# - Subqueries, JOINs, and edge cases +# - Error cases for invalid FILTER usage +# + +--disable_query_log +--echo # +--echo # Setup test table +--echo # +CREATE TABLE test_aggregates ( + id INT PRIMARY KEY, + category VARCHAR(50), + status VARCHAR(20), + value INT, + price DECIMAL(10,2), + amount DECIMAL(10,2) unique NOT NULL, + name VARCHAR(50), + key_name VARCHAR(50), + value_col VARCHAR(50), + bit_value INT, + geom GEOMETRY +); + +INSERT INTO test_aggregates VALUES +(1, 'Electronics', 'active', 100, 299.99, 100.00, 'Phone', 'item1', 'Phone', 6, ST_GeomFromText('POINT(1 1)')), +(2, 'Electronics', 'active', 150, 399.99, 399.99, 'Tablet', 'item2', 'Tablet', 14, ST_GeomFromText('POINT(2 2)')), +(3, 'Books', 'inactive', NULL, 19.99, 19.99, 'Novel', 'item3', 'Novel', NULL, ST_GeomFromText('POINT(3 3)')), +(4, 'Books', 'active', 200, 29.99, 29.99, 'Textbook', 'item4', 'Textbook', 7, ST_GeomFromText('POINT(4 4)')), +(5, 'Electronics', 'active', 100, 499.99, 499.99, 'Laptop', 'item5', 'Laptop', 9, ST_GeomFromText('POINT(5 5)')), +(6, 'Clothing', NULL, 50, NULL, 50.00, 'Shirt', 'item6', 'Shirt', 3, ST_GeomFromText('POINT(6 6)')), +(7, 'Clothing', 'active', 75, 89.99, 89.99, 'Pants', 'item7', 'Pants', 12, ST_GeomFromText('POINT(7 7)')), +(8, 'Books', 'active', 200, 24.99, 24.99, 'Guide', 'item8', 'Guide', 15, ST_GeomFromText('POINT(8 8)')); + + +--echo # +--echo # Basic aggregates with WHERE +--echo # + +SELECT + AVG(value) FILTER (WHERE status = 'active') as avg_result, + SUM(value) FILTER (WHERE value > 100) as sum_result, + COUNT(*) FILTER (WHERE category = 'Electronics') as count_result, + MIN(value) FILTER (WHERE amount > 50) as min_result, + MAX(value) FILTER (WHERE value IS NOT NULL) as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE status = 'inactive') as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE value BETWEEN 100 AND 200) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE category IN ('Books', 'Electronics')) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE bit_value IS NOT NULL) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE price > 100) as json_arrayagg_result, + JSON_OBJECTAGG(key_name, value_col) FILTER (WHERE amount > 200) as json_objectagg_result, + STD(value) FILTER (WHERE value >= 100) as std_result, + STDDEV(value) FILTER (WHERE id <= 5) as stddev_result, + STDDEV_POP(value) FILTER (WHERE category = 'Books') as stddev_pop_result, + STDDEV_SAMP(value) FILTER (WHERE status IS NOT NULL) as stddev_samp_result, + VARIANCE(value) FILTER (WHERE value < 200) as variance_result, + VAR_POP(value) FILTER (WHERE amount BETWEEN 20 AND 100) as var_pop_result, + VAR_SAMP(value) FILTER (WHERE category = 'Clothing') as var_samp_result +FROM test_aggregates; + +--echo # +--echo # Aggregates with DISTINCT and WHERE +--echo # + +SELECT + AVG(DISTINCT value) FILTER (WHERE value > 100) as avg_result, + SUM(DISTINCT value) FILTER (WHERE status = 'active') as sum_result, + COUNT(DISTINCT category) FILTER (WHERE category = 'Electronics') as count_result, + MIN(DISTINCT value) FILTER (WHERE amount > 50) as min_result, + MAX(DISTINCT value) FILTER (WHERE id > 3) as max_result, + GROUP_CONCAT(DISTINCT name ORDER BY name SEPARATOR ',') FILTER (WHERE price > 100) as group_concat_result +FROM test_aggregates; + +--echo # +--echo # Aggregates with complex WHERE conditions (AND) +--echo # + +SELECT + AVG(value) FILTER (WHERE status = 'active' AND value > 100) as avg_result, + SUM(value) FILTER (WHERE category = 'Electronics' AND amount > 200) as sum_result, + COUNT(*) FILTER (WHERE value IS NOT NULL AND id > 3) as count_result, + MIN(value) FILTER (WHERE status = 'active' AND amount > 50) as min_result, + MAX(value) FILTER (WHERE category IN ('Books', 'Electronics') AND value > 0) as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE price > 100 AND status = 'active') as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value IS NOT NULL AND value > 50) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE category = 'Books' AND amount > 20) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE value BETWEEN 100 AND 200 AND status IS NOT NULL) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE amount > 100 AND category = 'Electronics') as json_arrayagg_result, + STD(value) FILTER (WHERE value >= 100 AND id <= 5) as std_result, + VARIANCE(value) FILTER (WHERE status = 'active' AND value < 200) as variance_result +FROM test_aggregates; + +--echo # +--echo # Aggregates with WHERE IN and OR +--echo # + +SELECT + AVG(value) FILTER (WHERE category IN ('Books', 'Electronics') OR amount > 50) as avg_result, + SUM(value) FILTER (WHERE status = 'active' OR value > 100) as sum_result, + COUNT(*) FILTER (WHERE category = 'Electronics' OR id > 5) as count_result, + MIN(value) FILTER (WHERE value IS NOT NULL OR amount > 200) as min_result, + MAX(value) FILTER (WHERE price > 100 OR status IS NOT NULL) as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE category = 'Books' OR amount BETWEEN 20 AND 100) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value IS NOT NULL OR value >= 100) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE category IN ('Clothing', 'Electronics') OR amount < 100) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE status = 'inactive' OR value < 200) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE amount > 50 OR category = 'Clothing') as json_arrayagg_result, + STD(value) FILTER (WHERE value BETWEEN 50 AND 200 OR id <= 4) as std_result, + VARIANCE(value) FILTER (WHERE category = 'Electronics' OR price IS NOT NULL) as variance_result +FROM test_aggregates; + +--echo # +--echo # Aggregates with WHERE BETWEEN +--echo # + +SELECT + AVG(value) FILTER (WHERE value BETWEEN 100 AND 200) as avg_result, + SUM(value) FILTER (WHERE amount BETWEEN 20 AND 100) as sum_result, + COUNT(*) FILTER (WHERE id BETWEEN 2 AND 6) as count_result, + MIN(value) FILTER (WHERE price BETWEEN 25 AND 400) as min_result, + MAX(value) FILTER (WHERE value BETWEEN 50 AND 150) as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE amount BETWEEN 50 AND 500) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value BETWEEN 5 AND 15) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE value BETWEEN 75 AND 175) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE id BETWEEN 3 AND 7) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE value BETWEEN 100 AND 250) as json_arrayagg_result, + STD(value) FILTER (WHERE amount BETWEEN 30 AND 200) as std_result, + VARIANCE(value) FILTER (WHERE value BETWEEN 80 AND 180) as variance_result +FROM test_aggregates; + +--echo # +--echo # Aggregates with WHERE LIKE +--echo # + +SELECT + AVG(DISTINCT value) FILTER (WHERE status LIKE '%active%') as avg_result, + SUM(DISTINCT value) FILTER (WHERE category LIKE 'Elect%') as sum_result, + COUNT(DISTINCT category) FILTER (WHERE name LIKE '%t%') as count_result, + MIN(DISTINCT value) FILTER (WHERE category LIKE '%s') as min_result, + MAX(DISTINCT value) FILTER (WHERE name LIKE 'P%') as max_result, + GROUP_CONCAT(DISTINCT name ORDER BY name SEPARATOR ',') FILTER (WHERE category LIKE 'B%') as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE name LIKE '%one') as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE category LIKE '%ics') as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE status LIKE 'a%') as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE name LIKE '%t') as json_arrayagg_result, + STD(value) FILTER (WHERE category LIKE 'C%') as std_result, + VARIANCE(value) FILTER (WHERE status LIKE '%e') as variance_result +FROM test_aggregates; + +--echo # +--echo # Basic aggregates with GROUP BY +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_result, + SUM(value) FILTER (WHERE value > 100) as sum_result, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_result, + MIN(value) FILTER (WHERE amount > 50) as min_result, + MAX(value) FILTER (WHERE id > 3) as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE price > 100) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value IS NOT NULL) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE value BETWEEN 100 AND 200) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE category IN ('Books', 'Electronics')) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE amount > 200) as json_arrayagg_result, + STD(value) FILTER (WHERE value >= 100) as std_result, + VARIANCE(value) FILTER (WHERE id <= 5) as variance_result +FROM test_aggregates GROUP BY category; + +--echo # +--echo # Aggregates with GROUP BY and WHERE IS NOT NULL +--echo # + +SELECT category, + AVG(value) FILTER (WHERE value IS NOT NULL) as avg_result, + SUM(value) FILTER (WHERE status IS NOT NULL) as sum_result, + COUNT(value) FILTER (WHERE bit_value IS NOT NULL) as count_result, + MIN(value) FILTER (WHERE price IS NOT NULL) as min_result, + MAX(value) FILTER (WHERE amount IS NOT NULL) as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE name IS NOT NULL) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value IS NOT NULL) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE category IS NOT NULL) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE key_name IS NOT NULL) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE value_col IS NOT NULL) as json_arrayagg_result, + STD(value) FILTER (WHERE value IS NOT NULL AND value > 0) as std_result, + VARIANCE(value) FILTER (WHERE value IS NOT NULL AND id > 0) as variance_result +FROM test_aggregates GROUP BY category; + +--echo # +--echo # Aggregates with multiple GROUP BY columns +--echo # + +SELECT category, status, + AVG(value) FILTER (WHERE value > 0) as avg_result, + SUM(value) FILTER (WHERE amount > 0) as sum_result, + COUNT(*) FILTER (WHERE id > 0) as count_result, + MIN(value) FILTER (WHERE price > 0) as min_result, + MAX(value) FILTER (WHERE value IS NOT NULL) as max_result, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE category IS NOT NULL) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE bit_value > 0) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE value > 50) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE id > 2) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE name IS NOT NULL) as json_arrayagg_result, + STD(value) FILTER (WHERE value >= 0) as std_result, + VARIANCE(value) FILTER (WHERE amount >= 0) as variance_result +FROM test_aggregates GROUP BY category, status; + +--echo # +--echo # GROUP BY with HAVING on aggregates +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_result, + SUM(value) FILTER (WHERE value > 100) as sum_result, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_result, + MIN(value) FILTER (WHERE amount > 50) as min_result, + MAX(value) FILTER (WHERE id > 3) as max_result +FROM test_aggregates GROUP BY category +HAVING AVG(value) FILTER (WHERE status = 'active') > 120 AND SUM(value) FILTER (WHERE value > 100) > 200; + +--echo # +--echo # HAVING with alias +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_value, + SUM(value) FILTER (WHERE value > 100) as sum_value, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_value, + MIN(value) FILTER (WHERE amount > 50) as min_value, + MAX(value) FILTER (WHERE id > 3) as max_value +FROM test_aggregates GROUP BY category HAVING avg_value > 100 AND sum_value > 200; + +--echo # +--echo # WHERE + GROUP BY + HAVING +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_result, + SUM(value) FILTER (WHERE value > 100) as sum_result, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_result, + MIN(value) FILTER (WHERE amount > 50) as min_result, + MAX(value) FILTER (WHERE id > 3) as max_result +FROM test_aggregates +GROUP BY category +HAVING AVG(value) FILTER (WHERE status = 'active') > 0 AND SUM(value) FILTER (WHERE value > 100) > 0; + +--echo # +--echo # WHERE + multiple aggregates with FILTER +--echo # + +SELECT category, + AVG(value) FILTER (WHERE amount > 0) as total_avg, + AVG(value) FILTER (WHERE status = 'active') as active_avg, + SUM(value) FILTER (WHERE value > 100) as total_sum, + SUM(value) FILTER (WHERE category = 'Electronics') as active_sum, + COUNT(*) FILTER (WHERE value IS NOT NULL) as total_count, + COUNT(*) FILTER (WHERE id > 3) as active_count, + MIN(value) FILTER (WHERE price > 100) as total_min, + MIN(value) FILTER (WHERE amount > 200) as active_min, + MAX(value) FILTER (WHERE value BETWEEN 50 AND 200) as total_max, + MAX(value) FILTER (WHERE bit_value IS NOT NULL) as active_max +FROM test_aggregates +GROUP BY category; + +--echo # +--echo # WHERE + FILTER + HAVING +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as active_avg, + SUM(value) FILTER (WHERE value > 100) as active_sum, + COUNT(*) FILTER (WHERE category = 'Electronics') as active_count +FROM test_aggregates +WHERE value IS NOT NULL +GROUP BY category +HAVING AVG(value) FILTER (WHERE value IS NOT NULL) > 1 AND SUM(value) FILTER (WHERE amount > 0) > 1; + +--echo # +--echo # WHERE + window function (OVER) +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') OVER (PARTITION BY category) as cat_avg, + SUM(value) FILTER (WHERE value > 100) OVER (PARTITION BY category) as cat_sum, + COUNT(*) FILTER (WHERE value IS NOT NULL) OVER (PARTITION BY category) as cat_count, + MIN(value) FILTER (WHERE amount > 50) OVER (PARTITION BY category) as cat_min, + MAX(value) FILTER (WHERE id > 3) OVER (PARTITION BY category) as cat_max +FROM test_aggregates; + +--echo # +--echo # WHERE + window function with ORDER BY +--echo # + +SELECT id, + AVG(value) FILTER (WHERE value IS NOT NULL) OVER (ORDER BY id) as running_avg, + SUM(value) FILTER (WHERE status = 'active') OVER (ORDER BY id) as running_sum, + COUNT(*) FILTER (WHERE amount > 0) OVER (ORDER BY id) as running_count, + MIN(value) FILTER (WHERE value > 50) OVER (ORDER BY id) as running_min, + MAX(value) FILTER (WHERE id > 0) OVER (ORDER BY id) as running_max +FROM test_aggregates; + +--echo # +--echo # WHERE + window function with frame specification +--echo # + +SELECT id, + AVG(value) FILTER (WHERE status = 'active') OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as windowed_avg, + SUM(value) FILTER (WHERE value > 100) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as windowed_sum, + COUNT(*) FILTER (WHERE value IS NOT NULL) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as windowed_count, + MIN(value) FILTER (WHERE amount > 50) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as windowed_min, + MAX(value) FILTER (WHERE id > 3) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as windowed_max +FROM test_aggregates; + +--echo # +--echo # Subquery with aggregates in WHERE +--echo # + +SELECT id, category, status, value, price, amount, name, key_name, value_col, bit_value, ST_AsText(geom) as geom FROM test_aggregates +WHERE category IN ( + SELECT category FROM test_aggregates GROUP BY category + HAVING AVG(value) FILTER (WHERE status = 'active') > 120 AND SUM(value) FILTER (WHERE value > 100) > 200 +); + +--echo # +--echo # Aggregates with GROUP BY and ORDER BY +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_val, + SUM(value) FILTER (WHERE value > 100) as sum_val, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_val, + MIN(value) FILTER (WHERE amount > 50) as min_val, + MAX(value) FILTER (WHERE id > 3) as max_val +FROM test_aggregates +GROUP BY category +ORDER BY avg_val DESC, sum_val DESC; + +--echo # +--echo # Complete query with WHERE, GROUP BY, HAVING, ORDER BY +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_val, + SUM(value) FILTER (WHERE value > 100) as sum_val, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_val, + MIN(value) FILTER (WHERE amount > 50) as min_val, + MAX(value) FILTER (WHERE id > 3) as max_val +FROM test_aggregates +WHERE status = 'active' +GROUP BY category +HAVING AVG(value) FILTER (WHERE status = 'active') > 0 AND SUM(value) FILTER (WHERE value > 100) > 0 +ORDER BY avg_val DESC, sum_val DESC; + +--echo # +--echo # Aggregates with GROUP BY, ORDER BY, and LIMIT +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_val, + SUM(value) FILTER (WHERE value > 100) as sum_val, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_val +FROM test_aggregates +GROUP BY category +ORDER BY AVG(value) FILTER (WHERE status = 'active') DESC +LIMIT 2; + +--echo # +--echo # Aggregates with ROLLUP +--echo # + +SELECT category, status, + AVG(value) FILTER (WHERE status = 'active') as avg_val, + SUM(value) FILTER (WHERE value > 100) as sum_val, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_val, + MIN(value) FILTER (WHERE amount > 50) as min_val, + MAX(value) FILTER (WHERE id > 3) as max_val +FROM test_aggregates +GROUP BY category, status WITH ROLLUP; + +--echo # +--echo # Most complete query with all clauses and aggregates +--echo # + +SELECT + category, + status, + AVG(value) FILTER (WHERE id > 3) as avg_value, + SUM(value) FILTER (WHERE status = 'active') as sum_value, + COUNT(*) FILTER (WHERE value > 100) as count_value, + MIN(value) FILTER (WHERE amount > 50) as min_value, + MAX(value) FILTER (WHERE value IS NOT NULL) as max_value, + AVG(amount) FILTER (WHERE price > 100 AND value IS NOT NULL) as avg_amount, + SUM(amount) FILTER (WHERE value BETWEEN 100 AND 200 AND value IS NOT NULL) as sum_amount, + AVG(DISTINCT value) FILTER (WHERE id > 2 AND value IS NOT NULL) as distinct_value_avg, + SUM(DISTINCT value) FILTER (WHERE category = 'Books' AND value IS NOT NULL) as distinct_value_sum, + COUNT(DISTINCT value) FILTER (WHERE bit_value IS NOT NULL AND value IS NOT NULL) as distinct_value_count, + GROUP_CONCAT(name ORDER BY name SEPARATOR ',') FILTER (WHERE id > 4) as group_concat_result, + BIT_AND(bit_value) FILTER (WHERE value >= 100) as bit_and_result, + BIT_OR(bit_value) FILTER (WHERE category IN ('Books', 'Electronics')) as bit_or_result, + BIT_XOR(bit_value) FILTER (WHERE amount > 200) as bit_xor_result, + JSON_ARRAYAGG(name) FILTER (WHERE status = 'inactive') as json_arrayagg_result, + STD(value) FILTER (WHERE value < 200) as std_result, + VARIANCE(value) FILTER (WHERE id <= 5) as variance_result +FROM test_aggregates +GROUP BY category, status +HAVING AVG(value) FILTER (WHERE id > 3) > 0 AND SUM(value) FILTER (WHERE status = 'active') > 0 +ORDER BY avg_value DESC, category +LIMIT 10 OFFSET 0; + +--echo # +--echo # COUNT with column names (not just *) +--echo # + +SELECT + COUNT(value) FILTER (WHERE status = 'active') as count_value, + COUNT(category) FILTER (WHERE value > 100) as count_category, + COUNT(name) FILTER (WHERE category = 'Electronics') as count_name +FROM test_aggregates; + +--echo # +--echo # COUNT DISTINCT with multiple columns +--echo # + +SELECT + COUNT(DISTINCT category, status) FILTER (WHERE status = 'active' OR value > 100) as count_distinct +FROM test_aggregates; + +--echo # +--echo # FILTER on indexed column +--echo # + +--echo # Create index on status column +CREATE INDEX idx_status ON test_aggregates(status); +CREATE INDEX idx_category ON test_aggregates(category); + +SELECT + AVG(value) FILTER (WHERE status = 'active') as avg_result, + SUM(value) FILTER (WHERE category = 'Electronics') as sum_result, + COUNT(*) FILTER (WHERE value > 100) as count_result +FROM test_aggregates; + +--echo # +--echo # Empty result set (all rows filtered out) +--echo # + +SELECT + AVG(value) FILTER (WHERE 1=0) as avg_result, + SUM(value) FILTER (WHERE 2=1) as sum_result, + COUNT(*) FILTER (WHERE 3=2) as count_result, + MIN(value) FILTER (WHERE 4=3) as min_result, + MAX(value) FILTER (WHERE 5=4) as max_result +FROM test_aggregates; + +--echo # +--echo # FILTER with NULL conditions +--echo # + +SELECT + AVG(value) FILTER (WHERE NULL) as avg_result, + COUNT(*) FILTER (WHERE 1 = NULL) as count_result, + SUM(value) FILTER (WHERE status = NULL) as sum_result +FROM test_aggregates; + +--echo # +--echo # FILTER with JOINs +--echo # + +--echo # Create second table for JOIN test +CREATE TABLE test_aggregates2 ( + id INT PRIMARY KEY, + ref_id INT, + extra_value INT +); + +INSERT INTO test_aggregates2 VALUES +(1, 1, 10), +(2, 2, 20), +(3, 3, 30), +(4, 4, 40); + +SELECT + t1.category, + AVG(t1.value) FILTER (WHERE t2.extra_value > 15) as avg_result, + SUM(t1.value) FILTER (WHERE t2.extra_value > 20) as sum_result, + COUNT(*) FILTER (WHERE t2.extra_value > 25) as count_result +FROM test_aggregates t1 +JOIN test_aggregates2 t2 ON t1.id = t2.ref_id +GROUP BY t1.category; + +DROP TABLE test_aggregates2; + +--echo # +--echo # FILTER with window functions - different partitions +--echo # + +SELECT category, status, + AVG(value) FILTER (WHERE status = 'active') OVER (PARTITION BY category) as cat_avg, + AVG(value) FILTER (WHERE value > 100) OVER (PARTITION BY status) as status_avg, + COUNT(*) FILTER (WHERE value IS NOT NULL) OVER (PARTITION BY category) as cat_count, + COUNT(*) FILTER (WHERE amount > 50) OVER (PARTITION BY status) as status_count +FROM test_aggregates +ORDER BY category, status; + +--echo # +--echo # FILTER with empty table +--echo # + +--echo # Create empty table +CREATE TABLE empty_test ( + id INT, + value INT, + status VARCHAR(20) +); + +SELECT + AVG(value) FILTER (WHERE status = 'active') as avg_result, + SUM(value) FILTER (WHERE value > 100) as sum_result, + COUNT(*) FILTER (WHERE value IS NOT NULL) as count_result, + MIN(value) FILTER (WHERE id > 2) as min_result, + MAX(value) FILTER (WHERE id > 3) as max_result +FROM empty_test; + +DROP TABLE empty_test; + +--echo # +--echo # FILTER with CASE in WHERE clause +--echo # + +SELECT + AVG(value) FILTER (WHERE CASE WHEN status = 'active' THEN 1 ELSE 0 END = 1) as avg_result, + SUM(value) FILTER (WHERE CASE WHEN value > 100 THEN 1 ELSE 0 END = 1) as sum_result, + COUNT(*) FILTER (WHERE CASE WHEN category = 'Electronics' THEN 1 ELSE 0 END = 1) as count_result +FROM test_aggregates; + +--echo # +--echo # ORDER BY with FILTER aggregates +--echo # + +SELECT category, + AVG(value) FILTER (WHERE status = 'active') as avg_val, + SUM(value) FILTER (WHERE value > 100) as sum_val +FROM test_aggregates +GROUP BY category +ORDER BY AVG(value) FILTER (WHERE status = 'active') DESC, SUM(value) FILTER (WHERE value > 100) DESC; + +--echo # +--echo # Aggregates on id column itself with FILTER +--echo # + +SELECT + AVG(id) FILTER (WHERE status = 'active') as avg_id, + SUM(id) FILTER (WHERE value > 100) as sum_id, + COUNT(id) FILTER (WHERE category = 'Electronics') as count_id, + MIN(id) FILTER (WHERE amount > 50) as min_id, + MAX(id) FILTER (WHERE value IS NOT NULL) as max_id, + COUNT(DISTINCT id) FILTER (WHERE id > 3) as distinct_count_id +FROM test_aggregates; + +--echo # +--echo # Aggregates on id with GROUP BY and FILTER +--echo # + +SELECT category, + AVG(id) FILTER (WHERE id > 3) as avg_id, + SUM(id) FILTER (WHERE status = 'active') as sum_id, + COUNT(id) FILTER (WHERE value > 100) as count_id, + MIN(id) FILTER (WHERE amount > 50) as min_id, + MAX(id) FILTER (WHERE value IS NOT NULL) as max_id +FROM test_aggregates +GROUP BY category; + +--echo # +--echo # ======================================== +--echo # ERROR CASES SECTION +--echo # ======================================== +--echo # + +--echo # +--echo # FILTER with Non-Aggregate Window Functions (Ranking) +--echo # + +--echo # FILTER with RANK (should error - FILTER only works with aggregates) +--error ER_PARSE_ERROR +SELECT RANK() FILTER (WHERE status = 'active') OVER (ORDER BY value) FROM test_aggregates; + +--echo # +--echo # FILTER with Non-Aggregate Window Functions (Navigation) +--echo # + +--echo # FILTER with LAG (should error - FILTER only works with aggregates) +--error ER_PARSE_ERROR +SELECT LAG(value) FILTER (WHERE status = 'active') OVER (ORDER BY id) FROM test_aggregates; + +--echo # +--echo # Wrong Order - OVER before FILTER +--echo # + +--echo # OVER clause before FILTER clause (should error - correct order is FILTER then OVER) +--error ER_PARSE_ERROR +SELECT AVG(value) OVER (PARTITION BY category) FILTER (WHERE status = 'active') FROM test_aggregates; + +--echo # +--echo # Non-Aggregate Scalar Functions +--echo # + +--echo # Regular scalar function (should error - FILTER only for aggregates) +--error ER_PARSE_ERROR +SELECT UPPER(name) FILTER (WHERE status = 'active') FROM test_aggregates; + +--echo # +--echo # Plain Column or Expression +--echo # + +--echo # FILTER on plain column (should error - not a function) +--error ER_PARSE_ERROR +SELECT value FILTER (WHERE status = 'active') FROM test_aggregates; + +--echo # +--echo # Empty or Invalid Clause +--echo # + +--echo # Empty FILTER clause (should error - WHERE condition required) +--error ER_PARSE_ERROR +SELECT AVG(value) FILTER () FROM test_aggregates; + +--echo # +--echo # Nested FILTER Clauses +--echo # + +--echo # Nested FILTER clauses (should error - cannot nest FILTER) +--error ER_INVALID_GROUP_FUNC_USE +SELECT AVG(value) FILTER (WHERE COUNT(*) FILTER (WHERE status = 'active') > 5) FROM test_aggregates; + +--echo # +--echo # Invalid WHERE Syntax +--echo # + +--echo # Multiple WHERE keywords (should error) +--error ER_PARSE_ERROR +SELECT AVG(value) FILTER (WHERE status = 'active' WHERE value > 100) FROM test_aggregates; + +--echo # +--echo # Column Aliases +--echo # + +--echo # Using column alias in FILTER WHERE (should error - alias not available) +--error ER_BAD_FIELD_ERROR +SELECT value as val, AVG(value) FILTER (WHERE val > 100) FROM test_aggregates GROUP BY value; + +--echo # +--echo # Subqueries in WHERE Condition +--echo # + +--echo # Scalar subquery in FILTER WHERE (should error - subqueries not allowed) +--error ER_WRONG_USAGE +SELECT AVG(value) FILTER (WHERE value > (SELECT AVG(value) FROM test_aggregates)) FROM test_aggregates; + +--echo # IN subquery in FILTER WHERE (should error - subqueries not allowed) +--error ER_WRONG_USAGE +SELECT COUNT(*) FILTER (WHERE category IN (SELECT DISTINCT category FROM test_aggregates WHERE status = 'active')) FROM test_aggregates; + +--echo # +--echo # FILTER with Window Functions in WHERE Condition +--echo # + +--echo # Window function in FILTER WHERE condition (should error - not allowed) +--error ER_WRONG_USAGE +SELECT AVG(value) FILTER (WHERE ROW_NUMBER() OVER (ORDER BY id) > 2) FROM test_aggregates; + +--echo # +--echo # FILTER with aggregate functions in WHERE clause (should expect error) +--echo # + +--echo # This should error - aggregate in WHERE clause +--error ER_INVALID_GROUP_FUNC_USE +SELECT category, AVG(value) FILTER (WHERE value > AVG(value)) FROM test_aggregates GROUP BY category; + +--echo # +--echo # FILTER with error cases - syntax errors +--echo # + +--echo # Missing WHERE keyword (should error) +--error ER_PARSE_ERROR +SELECT AVG(value) FILTER (status = 'active') FROM test_aggregates; + +--echo # Missing parentheses (should error) +--error ER_PARSE_ERROR +SELECT AVG(value) FILTER WHERE status = 'active' FROM test_aggregates; + +--echo # +--echo # Illegal use of FILTER with non-aggregate UDFs +--echo # + +--echo # Create a non-aggregate UDF for testing +delimiter |; +CREATE FUNCTION simple_double(x INT) RETURNS INT +DETERMINISTIC +BEGIN + RETURN x * 2; +END| +delimiter ;| + +--echo # FILTER clause with non-aggregate UDF should fail +--error ER_PARSE_ERROR +SELECT simple_double(value) FILTER (WHERE status = 'active') FROM test_aggregates; + +--echo # +--echo # CLEANUP +--echo # + +DROP FUNCTION simple_double; +DROP TABLE test_aggregates; +--enable_query_log + +--echo # +--echo # End of aggregates FILTER test +--echo # + diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index bca2c5b8a7561..65480e37eacb3 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -4805,6 +4805,9 @@ Item_func_json_objectagg::fix_fields(THD *thd, Item **ref) / collation.collation->mbminlen * collation.collation->mbmaxlen); + if (fix_filter(thd)) + return TRUE; + if (check_sum_func(thd, ref)) return TRUE; @@ -4842,7 +4845,7 @@ bool Item_func_json_objectagg::add() String *key; key= args[0]->val_str(&buf); - if (args[0]->is_null()) + if (args[0]->is_null() || !filter_passed()) return 0; null_value= 0; diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 7746d47421911..6624246777f91 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -499,6 +499,8 @@ Item_sum::Item_sum(THD *thd, Item_sum *item): with_distinct= item->with_distinct; if (item->aggr) set_aggregator(thd, item->aggr->Aggrtype()); + if (item->has_filter()) + set_filter(item->filter_expr); } @@ -532,6 +534,12 @@ void Item_sum::print(String *str, enum_query_type query_type) pargs[i]->print(str, query_type); } str->append(')'); + if (has_filter()) + { + str->append(STRING_WITH_LEN(" FILTER(WHERE ")); + filter_expr->print(str, query_type); + str->append(')'); + } } void Item_sum::fix_num_length_and_dec() @@ -568,6 +576,54 @@ void Item_sum::update_used_tables () item->used_tables() == 0 && !item->const_item() */ } + if (has_filter()) + { + filter_expr->update_used_tables(); + used_tables_cache|= filter_expr->used_tables(); + } +} + + +bool Item_sum::filter_passed() +{ + if (!has_filter()) + return true; + /* Skip filter check if we're in endup phase (processing distinct values + that already passed the filter during collection phase) */ + if (aggr && aggr->is_in_endup_phase()) + return true; + return filter_expr->val_int(); +} + + +bool Item_sum::fix_filter(THD *thd) +{ + if (!has_filter()) + return false; + + if (filter_expr->fix_fields_if_needed_for_scalar(thd, &filter_expr)) + return true; + + if (filter_expr->type() == Item::SUM_FUNC_ITEM || + filter_expr->type() == Item::WINDOW_FUNC_ITEM) + { + my_error(ER_WRONG_USAGE, MYF(0), "aggregate function", "FILTER"); + return true; + } + + if (filter_expr->with_window_func()) + { + my_error(ER_WRONG_USAGE, MYF(0), "window function", "FILTER"); + return true; + } + + if (filter_expr->with_subquery()) + { + my_error(ER_WRONG_USAGE, MYF(0), "subquery", "FILTER"); + return true; + } + + return false; } @@ -984,6 +1040,9 @@ bool Aggregator_distinct::add() if (copy_funcs(tmp_table_param->items_to_copy, table->in_use)) return TRUE; + if (!item_sum->filter_passed()) + return 0; + for (Field **field=table->field ; *field ; field++) if ((*field)->is_real_null(0)) return 0; // Don't count NULL @@ -1009,7 +1068,7 @@ bool Aggregator_distinct::add() else { item_sum->get_arg(0)->save_in_field(table->field[0], FALSE); - if (table->field[0]->is_null()) + if (table->field[0]->is_null() || !item_sum->filter_passed()) return 0; DBUG_ASSERT(tree); item_sum->null_value= 0; @@ -1124,6 +1183,10 @@ Item_sum_num::fix_fields(THD *thd, Item **ref) /* We should ignore FIELD's in arguments to sum functions */ with_flags|= (args[i]->with_flags & ~item_with_t::FIELD); } + + if (fix_filter(thd)) + return TRUE; + result_field=0; max_length=float_length(decimals); null_value=1; @@ -1156,6 +1219,9 @@ Item_sum_min_max::fix_fields(THD *thd, Item **ref) if (fix_length_and_dec(thd)) DBUG_RETURN(TRUE); + if (fix_filter(thd)) + DBUG_RETURN(TRUE); + if (!is_window_func_sum_expr()) setup_hybrid(thd, args[0], NULL); result_field=0; @@ -1653,7 +1719,7 @@ void Item_sum_sum::add_helper(bool perform_removal) direct_reseted_field= FALSE; my_decimal value; const my_decimal *val= aggr->arg_val_decimal(&value); - if (!aggr->arg_is_null(true)) + if (!aggr->arg_is_null(true) && filter_passed()) { if (perform_removal) { @@ -1698,7 +1764,7 @@ void Item_sum_sum::add_helper(bool perform_removal) sum-= aggr->arg_val_real(); else sum+= aggr->arg_val_real(); - if (!aggr->arg_is_null(true)) + if (!aggr->arg_is_null(true) && filter_passed()) { if (perform_removal) { @@ -1928,7 +1994,7 @@ bool Item_sum_count::add() else { direct_reseted_field= FALSE; - if (aggr->arg_is_null(false)) + if (aggr->arg_is_null(false) || !filter_passed()) DBUG_RETURN(0); count++; } @@ -1943,7 +2009,7 @@ bool Item_sum_count::add() void Item_sum_count::remove() { DBUG_ASSERT(aggr->Aggrtype() == Aggregator::SIMPLE_AGGREGATOR); - if (aggr->arg_is_null(false)) + if (aggr->arg_is_null(false) || !filter_passed()) return; if (count > 0) count--; @@ -2051,7 +2117,7 @@ bool Item_sum_avg::add() { if (Item_sum_sum::add()) return TRUE; - if (!aggr->arg_is_null(true)) + if (!aggr->arg_is_null(true) && filter_passed()) count++; return FALSE; } @@ -2059,7 +2125,7 @@ bool Item_sum_avg::add() void Item_sum_avg::remove() { Item_sum_sum::remove(); - if (!aggr->arg_is_null(true)) + if (!aggr->arg_is_null(true) && filter_passed()) { if (count > 0) count--; @@ -2299,7 +2365,7 @@ bool Item_sum_variance::add() */ double nr= args[0]->val_real(); - if (!args[0]->null_value) + if (!args[0]->null_value && filter_passed()) m_stddev.recurrence_next(nr); return 0; } @@ -2336,7 +2402,7 @@ void Item_sum_variance::reset_field() nr= args[0]->val_real(); /* sets null_value as side-effect */ - if (args[0]->null_value) + if (args[0]->null_value || !filter_passed()) bzero(res,Stddev::binary_size()); else Stddev(nr).to_binary(res); @@ -2367,7 +2433,7 @@ void Item_sum_variance::update_field() double nr= args[0]->val_real(); /* sets null_value as side-effect */ - if (args[0]->null_value) + if (args[0]->null_value || !filter_passed()) return; /* Serialize format is (double)m, (double)s, (longlong)count */ @@ -2552,7 +2618,7 @@ bool Item_sum_min::add() DBUG_PRINT("info", ("null_value: %s", null_value ? "TRUE" : "FALSE")); /* args[0] < value */ arg_cache->cache_value(); - if (!arg_cache->null_value && + if (!arg_cache->null_value && filter_passed() && (null_value || cmp->compare() < 0)) { value->store(arg_cache); @@ -2592,7 +2658,7 @@ bool Item_sum_max::add() /* args[0] > value */ arg_cache->cache_value(); DBUG_PRINT("info", ("null_value: %s", null_value ? "TRUE" : "FALSE")); - if (!arg_cache->null_value && + if (!arg_cache->null_value && filter_passed() && (null_value || cmp->compare() > 0)) { value->store(arg_cache); @@ -2641,7 +2707,7 @@ bool Item_sum_bit::clear_as_window() bool Item_sum_bit::remove_as_window(ulonglong value) { DBUG_ASSERT(as_window_function); - if (num_values_added == 0) + if (num_values_added == 0 || args[0]->null_value || !filter_passed()) return 0; // Nothing to remove. for (int i= 0; i < NUM_BIT_COUNTERS; i++) @@ -2687,7 +2753,7 @@ void Item_sum_or::set_bits_from_counters() bool Item_sum_or::add() { ulonglong value= (ulonglong) args[0]->val_int(); - if (!args[0]->null_value) + if (!args[0]->null_value && filter_passed()) { if (as_window_function) return add_as_window(value); @@ -2715,7 +2781,7 @@ Item *Item_sum_xor::copy_or_same(THD* thd) bool Item_sum_xor::add() { ulonglong value= (ulonglong) args[0]->val_int(); - if (!args[0]->null_value) + if (!args[0]->null_value && filter_passed()) { if (as_window_function) return add_as_window(value); @@ -2750,7 +2816,7 @@ Item *Item_sum_and::copy_or_same(THD* thd) bool Item_sum_and::add() { ulonglong value= (ulonglong) args[0]->val_int(); - if (!args[0]->null_value) + if (!args[0]->null_value && filter_passed()) { if (as_window_function) return add_as_window(value); @@ -2784,7 +2850,7 @@ void Item_sum_min_max::reset_field() String tmp(buff,sizeof(buff),result_field->charset()),*res; res= arg0->val_str(&tmp); - if (arg0->null_value) + if (arg0->null_value || !filter_passed()) { result_field->set_null(); result_field->reset(); @@ -2802,7 +2868,7 @@ void Item_sum_min_max::reset_field() if (maybe_null()) { - if (arg0->null_value) + if (arg0->null_value || !filter_passed()) { nr=0; result_field->set_null(); @@ -2820,7 +2886,7 @@ void Item_sum_min_max::reset_field() if (maybe_null()) { - if (arg0->null_value) + if (arg0->null_value || !filter_passed()) { nr=0.0; result_field->set_null(); @@ -2837,7 +2903,7 @@ void Item_sum_min_max::reset_field() if (maybe_null()) { - if (arg_dec.is_null()) + if (arg_dec.is_null() || !filter_passed()) result_field->set_null(); else result_field->set_notnull(); @@ -2888,7 +2954,7 @@ void Item_sum_sum::reset_field() null_flag= direct_sum_is_null; } else - null_flag= args[0]->null_value; + null_flag= args[0]->null_value || !filter_passed(); if (null_flag) result_field->set_null(); @@ -2910,7 +2976,7 @@ void Item_sum_count::reset_field() direct_counted= FALSE; direct_reseted_field= TRUE; } - else if (!args[0]->maybe_null() || !args[0]->is_null()) + else if ((!args[0]->maybe_null() || !args[0]->is_null()) && filter_passed()) nr= 1; DBUG_PRINT("info", ("nr: %lld", nr)); int8store(res,nr); @@ -2926,8 +2992,17 @@ void Item_sum_avg::reset_field() { longlong tmp; VDec value(args[0]); - tmp= value.is_null() ? 0 : 1; - value.to_binary(res, f_precision, f_scale); + if (value.is_null() || !filter_passed()) + { + tmp= 0; + my_decimal_set_zero(dec_buffs); + dec_buffs[0].to_binary(res, f_precision, f_scale); + } + else + { + tmp= 1; + value.to_binary(res, f_precision, f_scale); + } res+= dec_bin_size; int8store(res, tmp); } @@ -2935,7 +3010,7 @@ void Item_sum_avg::reset_field() { double nr= args[0]->val_real(); - if (args[0]->null_value) + if (args[0]->null_value || !filter_passed()) bzero(res,sizeof(double)+sizeof(longlong)); else { @@ -2987,12 +3062,12 @@ void Item_sum_sum::update_field() else { arg_val= args[0]->val_decimal(&value); - null_flag= args[0]->null_value; + null_flag= args[0]->null_value || !filter_passed(); } if (!null_flag) { - if (!result_field->is_null()) + if (!result_field->is_null() && filter_passed()) { my_decimal field_value(result_field); my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs, arg_val, &field_value); @@ -3021,7 +3096,7 @@ void Item_sum_sum::update_field() else { nr= args[0]->val_real(); - null_flag= args[0]->null_value; + null_flag= args[0]->null_value || !filter_passed(); } if (!null_flag) { @@ -3045,7 +3120,7 @@ void Item_sum_count::update_field() direct_counted= direct_reseted_field= FALSE; nr+= direct_count; } - else if (!args[0]->maybe_null() || !args[0]->is_null()) + else if ((!args[0]->maybe_null() || !args[0]->is_null()) && filter_passed()) nr++; DBUG_PRINT("info", ("nr: %lld", nr)); int8store(res,nr); @@ -3063,7 +3138,7 @@ void Item_sum_avg::update_field() if (result_type() == DECIMAL_RESULT) { VDec tmp(args[0]); - if (!tmp.is_null()) + if (!tmp.is_null() && filter_passed()) { binary2my_decimal(E_DEC_FATAL_ERROR, res, dec_buffs + 1, f_precision, f_scale); @@ -3080,7 +3155,7 @@ void Item_sum_avg::update_field() double nr; nr= args[0]->val_real(); - if (!args[0]->null_value) + if (!args[0]->null_value && filter_passed()) { double old_nr; float8get(old_nr, res); @@ -3107,6 +3182,10 @@ Item *Item_sum_avg::result_item(THD *thd, Field *field) void Item_sum_min_max::update_field() { DBUG_ENTER("Item_sum_min_max::update_field"); + if (!filter_passed()) + { + DBUG_VOID_RETURN; + } Item *UNINIT_VAR(tmp_item); if (unlikely(direct_added)) { @@ -3555,6 +3634,9 @@ Item_sum_str::fix_fields(THD *thd, Item **ref) return true; } + if (fix_filter(thd)) + return TRUE; + if (fix_fields_impl(thd, ref)) return TRUE; @@ -4202,7 +4284,7 @@ bool Item_func_group_concat::repack_tree(THD *thd) bool Item_func_group_concat::add(bool exclude_nulls) { - if (always_null && exclude_nulls) + if ((always_null || !filter_passed()) && exclude_nulls) return 0; copy_fields(tmp_table_param); if (copy_funcs(tmp_table_param->items_to_copy, table->in_use)) @@ -4224,6 +4306,9 @@ bool Item_func_group_concat::add(bool exclude_nulls) exclude_nulls) return 0; // Skip row if it contains null + if (!filter_passed() && !exclude_nulls) + field->set_null(); + buf.set_buffer_if_not_allocated(&my_charset_bin); if (tree && (res= field->val_str(&buf))) row_str_len+= res->length(); @@ -4639,7 +4724,7 @@ bool Item_func_collect::add() { uint current_geometry_srid; has_cached_result= false; - if (tmp_arg[0]->null_value) + if (tmp_arg[0]->null_value || !filter_passed()) return 0; if(is_distinct && list_contains_element(wkb)) @@ -4663,7 +4748,7 @@ void Item_func_collect::remove() { String *wkb= args[0]->val_str(&value); has_cached_result= false; - if (args[0]->null_value) return; + if (args[0]->null_value || !filter_passed()) return; List_iterator geometries_iterator(geometries); String* temp_geometry; diff --git a/sql/item_sum.h b/sql/item_sum.h index 12783892b2e5f..714f2e55a17fd 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -103,6 +103,13 @@ class Aggregator : public Sql_alloc (updated by arg_val*()). */ virtual bool arg_is_null(bool use_null_value) = 0; + + /** + Check if we're currently in the endup() phase processing distinct values. + This is used to skip filter checks for DISTINCT aggregates since the filter + was already applied during the collection phase. + */ + virtual bool is_in_endup_phase() const { return false; } }; @@ -341,11 +348,21 @@ class Item_sum :public Item_func_or_sum /* TRUE if this is aggregate function of a window function */ bool window_func_sum_expr_flag; + /* Optional filter clause for the aggregate function */ + Item *filter_expr; + public: bool has_force_copy_fields() const { return force_copy_fields; } bool has_with_distinct() const { return with_distinct; } + /* Filter expression helpers */ + void set_filter(Item *filter_expr) { this->filter_expr= filter_expr; } + bool has_filter() const { return filter_expr != NULL; } + Item *get_filter() const { return filter_expr; } + bool filter_passed(); + bool fix_filter(THD *thd); + enum Sumfunctype { COUNT_FUNC, COUNT_DISTINCT_FUNC, SUM_FUNC, SUM_DISTINCT_FUNC, AVG_FUNC, AVG_DISTINCT_FUNC, MIN_FUNC, MAX_FUNC, STD_FUNC, @@ -548,6 +565,7 @@ class Item_sum :public Item_func_or_sum aggr= NULL; with_distinct= FALSE; force_copy_fields= FALSE; + filter_expr= NULL; } /** @@ -711,6 +729,7 @@ class Aggregator_distinct : public Aggregator my_decimal *arg_val_decimal(my_decimal * value) override; double arg_val_real() override; bool arg_is_null(bool use_null_value) override; + bool is_in_endup_phase() const override { return use_distinct_values; } bool unique_walk_function(void *element); bool unique_walk_function_for_count(void *element); diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index f0a5737bdf73a..8a1eb3b1567ae 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -170,6 +170,12 @@ void Item_window_func::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, Item **p_item= &window_func()->arguments()[i]; (*p_item)->split_sum_func2(thd, ref_pointer_array, fields, p_item, flags); } + if (window_func()->has_filter()) + { + Item *p_filter= window_func()->get_filter(); + p_filter->split_sum_func2(thd, ref_pointer_array, fields, &p_filter, flags); + window_func()->set_filter(p_filter); + } window_func()->setup_caches(thd); } diff --git a/sql/lex.h b/sql/lex.h index 41a34ef738989..93d60bb428a90 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -245,6 +245,7 @@ SYMBOL symbols[] = { { "FETCH", SYM(FETCH_SYM)}, { "FIELDS", SYM(COLUMNS)}, { "FILE", SYM(FILE_SYM)}, + { "FILTER", SYM(FILTER_SYM)}, { "FIRST", SYM(FIRST_SYM)}, { "FIXED", SYM(FIXED_SYM)}, { "FLOAT", SYM(FLOAT_SYM)}, diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc index 6d5bf8c6f487f..fc99d1116ef82 100644 --- a/sql/opt_sum.cc +++ b/sql/opt_sum.cc @@ -357,11 +357,12 @@ int opt_sum_query(THD *thd, /* If the expr in COUNT(expr) can never be null we can change this to the number of rows in the tables if this number is exact and - there are no outer joins. + there are no outer joins and there is no filter clause. */ if (!conds && !((Item_sum_count*) item)->get_arg(0)->maybe_null() && !outer_tables && maybe_exact_count && - ((item->used_tables() & OUTER_REF_TABLE_BIT) == 0)) + ((item->used_tables() & OUTER_REF_TABLE_BIT) == 0) && + !((Item_sum_count*) item)->has_filter()) { if (!is_exact_count) { @@ -382,6 +383,17 @@ int opt_sum_query(THD *thd, case Item_sum::MIN_FUNC: case Item_sum::MAX_FUNC: { + /* + Do not attempt MIN/MAX constant replacement if a FILTER clause + is present on the aggregate. FILTER must be evaluated per-row and + cannot be folded by the index-based shortcut here. + */ + if (item_sum->has_filter()) + { + const_result= 0; + break; + } + int is_max= MY_TEST(item_sum->sum_func() == Item_sum::MAX_FUNC); /* If MIN/MAX(expr) is the first part of a key or if all previous diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index dd742bc47f523..1f25e434c5c44 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -879,6 +879,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); %token FAULTS_SYM %token FEDERATED_SYM /* MariaDB privilege */ %token FILE_SYM +%token FILTER_SYM %token FIRST_SYM /* SQL-2003-N */ %token FIXED_SYM %token FLUSH_SYM @@ -1226,6 +1227,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); %nonassoc NEG '~' NOT2_SYM BINARY %nonassoc COLLATE_SYM %nonassoc SUBQUERY_AS_EXPR +%nonassoc FILTER_SYM /* Tokens that can change their meaning from identifier to something else @@ -1582,6 +1584,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); opt_versioning_interval_start json_default_literal set_expr_misc + unfiltered_sum_expr + opt_filter_expr %type opt_vers_auto_part @@ -11309,7 +11313,30 @@ udf_expr: } ; +opt_filter_expr: + /* empty */ { $$= NULL; } %prec SUBQUERY_AS_EXPR + | FILTER_SYM '(' WHERE + { Select->in_sum_expr++; } + expr + { Select->in_sum_expr--; } + ')' + { + $$= $5; + } + ; + sum_expr: + unfiltered_sum_expr opt_filter_expr + { + if ($2) + { + ((Item_sum *)$1)->set_filter($2); + } + $$= $1; + } + ; + +unfiltered_sum_expr: AVG_SYM '(' in_sum_expr ')' { $$= new (thd->mem_root) Item_sum_avg(thd, $3, FALSE); @@ -16687,6 +16714,7 @@ keyword_func_sp_var_and_label: | FAST_SYM | FEDERATED_SYM | FILE_SYM + | FILTER_SYM | FIRST_SYM | FOUND_SYM | FULL