diff --git a/Cargo.toml b/Cargo.toml index 4df5115..1aa4f5a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,17 +30,27 @@ rust-version = "1.76" name = "datafusion_functions_extra" path = "src/lib.rs" +[[test]] +name= "sqllogictest" +path = "tests/sqllogictest/sqllogictest.rs" + [dependencies] arrow = { version = "53.0.0", features = ["test_utils"] } datafusion = "42" log = "^0.4" paste = "1" +async-trait = "0.1.83" +sqlparser = { version = "0.51.0", features = ["visitor"] } +thiserror = "1.0.44" [dev-dependencies] arrow = { version = "53.0.0", features = ["test_utils"] } criterion = { version = "0.5", features = ["async_tokio"] } insta = { version = "1.40.0", features = ["yaml"] } tokio = { version = "1.36", features = ["full"] } +futures = "0.3" +tempfile = "3" +sqllogictest = { version = "0.22.0" } [lints.clippy] dbg_macro = "deny" diff --git a/tests/main.rs b/tests/main.rs index 8a07ace..f683ea2 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -1,423 +1,421 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::utils::TestExecution; - -mod utils; - -static TEST_TABLE: &str = r#" -CREATE TABLE test_table ( - utf8_col VARCHAR, - int64_col BIGINT, - float64_col DOUBLE, - date64_col DATE, - time64_col TIME -) AS VALUES - ('apple', 1, 1.0, DATE '2021-01-01', TIME '01:00:00'), - ('banana', 2, 2.0, DATE '2021-01-02', TIME '02:00:00'), - ('apple', 2, 2.0, DATE '2021-01-02', TIME '02:00:00'), - ('orange', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), - ('banana', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), - ('apple', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), - (NULL, NULL, NULL, NULL, NULL); -"#; - -#[tokio::test] -async fn test_mode() { - let mut execution = TestExecution::new().await.unwrap().with_setup(TEST_TABLE).await; - - let actual = execution.run_and_format("SELECT MODE(utf8_col) FROM test_table").await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +---------------------------+ - - "| mode(test_table.utf8_col) |" - - +---------------------------+ - - "| apple |" - - +---------------------------+ - "###); - - let actual = execution.run_and_format("SELECT MODE(int64_col) FROM test_table").await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +----------------------------+ - - "| mode(test_table.int64_col) |" - - +----------------------------+ - - "| 3 |" - - +----------------------------+ - "###); - - let actual = execution - .run_and_format("SELECT MODE(float64_col) FROM test_table") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +------------------------------+ - - "| mode(test_table.float64_col) |" - - +------------------------------+ - - "| 3.0 |" - - +------------------------------+ - "###); - - let actual = execution - .run_and_format("SELECT MODE(date64_col) FROM test_table") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +-----------------------------+ - - "| mode(test_table.date64_col) |" - - +-----------------------------+ - - "| 2021-01-03 |" - - +-----------------------------+ - "###); -} - -#[tokio::test] -async fn test_mode_time64() { - let mut execution = TestExecution::new().await.unwrap().with_setup(TEST_TABLE).await; - - let actual = execution - .run_and_format("SELECT MODE(time64_col) FROM test_table") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +-----------------------------+ - - "| mode(test_table.time64_col) |" - - +-----------------------------+ - - "| 03:00:00 |" - - +-----------------------------+ - "###); -} - -#[tokio::test] -async fn test_max_by_and_min_by() { - let mut execution = TestExecution::new().await.unwrap(); - - // Test max_by with numbers - let actual = execution - .run_and_format("SELECT max_by(x, y) FROM VALUES (1, 10), (2, 5), (3, 15), (4, 8) as tab(x, y);") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +---------------------+ - - "| max_by(tab.x,tab.y) |" - - +---------------------+ - - "| 3 |" - - +---------------------+ - "###); - - // Test min_by with numbers - let actual = execution - .run_and_format("SELECT min_by(x, y) FROM VALUES (1, 10), (2, 5), (3, 15), (4, 8) as tab(x, y);") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +---------------------+ - - "| min_by(tab.x,tab.y) |" - - +---------------------+ - - "| 2 |" - - +---------------------+ - "###); - - // Test max_by with strings - let actual = execution - .run_and_format("SELECT max_by(name, length(name)) FROM VALUES ('Alice'), ('Bob'), ('Charlie') as tab(name);") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +---------------------------------------------+ - - "| max_by(tab.name,character_length(tab.name)) |" - - +---------------------------------------------+ - - "| Charlie |" - - +---------------------------------------------+ - "###); - - // Test min_by with strings - let actual = execution - .run_and_format("SELECT min_by(name, length(name)) FROM VALUES ('Alice'), ('Bob'), ('Charlie') as tab(name);") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +---------------------------------------------+ - - "| min_by(tab.name,character_length(tab.name)) |" - - +---------------------------------------------+ - - "| Bob |" - - +---------------------------------------------+ - "###); - - // Test max_by with null values - let actual = execution - .run_and_format("SELECT max_by(x, y) FROM VALUES (1, 10), (2, null), (3, 15), (null, 8) as tab(x, y);") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +---------------------+ - - "| max_by(tab.x,tab.y) |" - - +---------------------+ - - "| 2 |" - - +---------------------+ - "###); - - // Test min_by with null values - let actual = execution - .run_and_format("SELECT min_by(x, y) FROM VALUES (1, 10), (2, null), (3, 15), (null, 8) as tab(x, y);") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +---------------------+ - - "| min_by(tab.x,tab.y) |" - - +---------------------+ - - "| 2 |" - - +---------------------+ - "###); - - // Test max_by with a single value - let actual = execution - .run_and_format("SELECT max_by(x, y) FROM VALUES (1, 10) as tab(x, y);") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +---------------------+ - - "| max_by(tab.x,tab.y) |" - - +---------------------+ - - "| 1 |" - - +---------------------+ - "###); - - // Test min_by with a single value - let actual = execution - .run_and_format("SELECT min_by(x, y) FROM VALUES (1, 10) as tab(x, y);") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +---------------------+ - - "| min_by(tab.x,tab.y) |" - - +---------------------+ - - "| 1 |" - - +---------------------+ - "###); - - // Test max_by with an empty set - let actual = execution - .run_and_format("SELECT max_by(x, y) FROM (SELECT * FROM (VALUES (1, 10)) WHERE 1=0) as tab(x, y);") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +---------------------+ - - "| max_by(tab.x,tab.y) |" - - +---------------------+ - - "| |" - - +---------------------+ - "###); - - // Test min_by with an empty set - let actual = execution - .run_and_format("SELECT min_by(x, y) FROM (SELECT * FROM (VALUES (1, 10)) WHERE 1=0) as tab(x, y);") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +---------------------+ - - "| min_by(tab.x,tab.y) |" - - +---------------------+ - - "| |" - - +---------------------+ - "###); -} - -#[tokio::test] -async fn test_kurtosis_pop() { - let mut execution = TestExecution::new().await.unwrap().with_setup(TEST_TABLE).await; - - // Test with int64 - let actual = execution - .run_and_format("SELECT kurtosis_pop(int64_col) FROM test_table") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +------------------------------------+ - - "| kurtosis_pop(test_table.int64_col) |" - - +------------------------------------+ - - "| -0.9599999999999755 |" - - +------------------------------------+ - "###); - - // Test with float64 - let actual = execution - .run_and_format("SELECT kurtosis_pop(float64_col) FROM test_table") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +--------------------------------------+ - - "| kurtosis_pop(test_table.float64_col) |" - - +--------------------------------------+ - - "| -0.9599999999999755 |" - - +--------------------------------------+ -"###); - - let actual = execution - .run_and_format("SELECT kurtosis_pop(col) FROM VALUES (1.0) as tab(col)") - .await; - insta::assert_yaml_snapshot!(actual, @r###" - - +-----------------------+ - - "| kurtosis_pop(tab.col) |" - - +-----------------------+ - - "| |" - - +-----------------------+ -"###); - - let actual = execution.run_and_format("SELECT kurtosis_pop(1.0)").await; - insta::assert_yaml_snapshot!(actual, @r###" - - +--------------------------+ - - "| kurtosis_pop(Float64(1)) |" - - +--------------------------+ - - "| |" - - +--------------------------+ -"###); - - let actual = execution.run_and_format("SELECT kurtosis_pop(null)").await; - insta::assert_yaml_snapshot!(actual, @r###" -- +--------------------+ -- "| kurtosis_pop(NULL) |" -- +--------------------+ -- "| |" -- +--------------------+ -"###); -} - -#[tokio::test] -async fn test_skewness() { - let mut execution = TestExecution::new().await.unwrap().with_setup(TEST_TABLE).await; - - // Test with int64 - let actual = execution - .run_and_format("SELECT skewness(int64_col) FROM test_table") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +--------------------------------+ - - "| skewness(test_table.int64_col) |" - - +--------------------------------+ - - "| -0.8573214099741201 |" - - +--------------------------------+ - "###); - - // Test with float64 - let actual = execution - .run_and_format("SELECT skewness(float64_col) FROM test_table") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +----------------------------------+ - - "| skewness(test_table.float64_col) |" - - +----------------------------------+ - - "| -0.8573214099741201 |" - - +----------------------------------+ -"###); - - // Test with single value - let actual = execution.run_and_format("SELECT skewness(1.0)").await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +----------------------+ - - "| skewness(Float64(1)) |" - - +----------------------+ - - "| |" - - +----------------------+ - "###); - - let actual = execution - .run_and_format("SELECT skewness(col) FROM VALUES (1.0), (2.0) as tab(col)") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +-------------------+ - - "| skewness(tab.col) |" - - +-------------------+ - - "| |" - - +-------------------+ - "###); -} - -#[tokio::test] -async fn test_kurtosis() { - let mut execution = TestExecution::new().await.unwrap(); - - let actual = execution - .run_and_format("SELECT kurtosis(col) FROM VALUES (1.0), (10.0), (100.0), (10.0), (1.0) as tab(col);") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +-------------------+ - - "| kurtosis(tab.col) |" - - +-------------------+ - - "| 4.777292927667962 |" - - +-------------------+ - "###); - - let actual = execution - .run_and_format("SELECT kurtosis(col) FROM VALUES ('1'), ('10'), ('100'), ('10'), ('1') as tab(col);") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +-------------------+ - - "| kurtosis(tab.col) |" - - +-------------------+ - - "| 4.777292927667962 |" - - +-------------------+ - "###); - - let actual = execution - .run_and_format("SELECT kurtosis(col) FROM VALUES (1.0), (2.0), (3.0) as tab(col);") - .await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +-------------------+ - - "| kurtosis(tab.col) |" - - +-------------------+ - - "| |" - - +-------------------+ - "###); - - let actual = execution.run_and_format("SELECT kurtosis(1);").await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +--------------------+ - - "| kurtosis(Int64(1)) |" - - +--------------------+ - - "| |" - - +--------------------+ - "###); - - let actual = execution.run_and_format("SELECT kurtosis(1.0);").await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +----------------------+ - - "| kurtosis(Float64(1)) |" - - +----------------------+ - - "| |" - - +----------------------+ - "###); - - let actual = execution.run_and_format("SELECT kurtosis(null);").await; - - insta::assert_yaml_snapshot!(actual, @r###" - - +----------------+ - - "| kurtosis(NULL) |" - - +----------------+ - - "| |" - - +----------------+ - "###); -} +// // Licensed to the Apache Software Foundation (ASF) under one +// // or more contributor license agreements. See the NOTICE file +// // distributed with this work for additional information +// // regarding copyright ownership. The ASF licenses this file +// // to you under the Apache License, Version 2.0 (the +// // "License"); you may not use this file except in compliance +// // with the License. You may obtain a copy of the License at +// // +// // http://www.apache.org/licenses/LICENSE-2.0 +// // +// // Unless required by applicable law or agreed to in writing, +// // software distributed under the License is distributed on an +// // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// // KIND, either express or implied. See the License for the +// // specific language governing permissions and limitations +// // under the License. +// +// use sqllogictest::::TestExecution; +// +// static TEST_TABLE: &str = r#" +// CREATE TABLE test_table ( +// utf8_col VARCHAR, +// int64_col BIGINT, +// float64_col DOUBLE, +// date64_col DATE, +// time64_col TIME +// ) AS VALUES +// ('apple', 1, 1.0, DATE '2021-01-01', TIME '01:00:00'), +// ('banana', 2, 2.0, DATE '2021-01-02', TIME '02:00:00'), +// ('apple', 2, 2.0, DATE '2021-01-02', TIME '02:00:00'), +// ('orange', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), +// ('banana', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), +// ('apple', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), +// (NULL, NULL, NULL, NULL, NULL); +// "#; +// +// #[tokio::test] +// async fn test_mode() { +// let mut execution = TestExecution::new().await.unwrap().with_setup(TEST_TABLE).await; +// +// let actual = execution.run_and_format("SELECT MODE(utf8_col) FROM test_table").await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +---------------------------+ +// - "| mode(test_table.utf8_col) |" +// - +---------------------------+ +// - "| apple |" +// - +---------------------------+ +// "###); +// +// let actual = execution.run_and_format("SELECT MODE(int64_col) FROM test_table").await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +----------------------------+ +// - "| mode(test_table.int64_col) |" +// - +----------------------------+ +// - "| 3 |" +// - +----------------------------+ +// "###); +// +// let actual = execution +// .run_and_format("SELECT MODE(float64_col) FROM test_table") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +------------------------------+ +// - "| mode(test_table.float64_col) |" +// - +------------------------------+ +// - "| 3.0 |" +// - +------------------------------+ +// "###); +// +// let actual = execution +// .run_and_format("SELECT MODE(date64_col) FROM test_table") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +-----------------------------+ +// - "| mode(test_table.date64_col) |" +// - +-----------------------------+ +// - "| 2021-01-03 |" +// - +-----------------------------+ +// "###); +// } +// +// #[tokio::test] +// async fn test_mode_time64() { +// let mut execution = TestExecution::new().await.unwrap().with_setup(TEST_TABLE).await; +// +// let actual = execution +// .run_and_format("SELECT MODE(time64_col) FROM test_table") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +-----------------------------+ +// - "| mode(test_table.time64_col) |" +// - +-----------------------------+ +// - "| 03:00:00 |" +// - +-----------------------------+ +// "###); +// } +// +// #[tokio::test] +// async fn test_max_by_and_min_by() { +// let mut execution = TestExecution::new().await.unwrap(); +// +// // Test max_by with numbers +// let actual = execution +// .run_and_format("SELECT max_by(x, y) FROM VALUES (1, 10), (2, 5), (3, 15), (4, 8) as tab(x, y);") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +---------------------+ +// - "| max_by(tab.x,tab.y) |" +// - +---------------------+ +// - "| 3 |" +// - +---------------------+ +// "###); +// +// // Test min_by with numbers +// let actual = execution +// .run_and_format("SELECT min_by(x, y) FROM VALUES (1, 10), (2, 5), (3, 15), (4, 8) as tab(x, y);") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +---------------------+ +// - "| min_by(tab.x,tab.y) |" +// - +---------------------+ +// - "| 2 |" +// - +---------------------+ +// "###); +// +// // Test max_by with strings +// let actual = execution +// .run_and_format("SELECT max_by(name, length(name)) FROM VALUES ('Alice'), ('Bob'), ('Charlie') as tab(name);") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +---------------------------------------------+ +// - "| max_by(tab.name,character_length(tab.name)) |" +// - +---------------------------------------------+ +// - "| Charlie |" +// - +---------------------------------------------+ +// "###); +// +// // Test min_by with strings +// let actual = execution +// .run_and_format("SELECT min_by(name, length(name)) FROM VALUES ('Alice'), ('Bob'), ('Charlie') as tab(name);") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +---------------------------------------------+ +// - "| min_by(tab.name,character_length(tab.name)) |" +// - +---------------------------------------------+ +// - "| Bob |" +// - +---------------------------------------------+ +// "###); +// +// // Test max_by with null values +// let actual = execution +// .run_and_format("SELECT max_by(x, y) FROM VALUES (1, 10), (2, null), (3, 15), (null, 8) as tab(x, y);") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +---------------------+ +// - "| max_by(tab.x,tab.y) |" +// - +---------------------+ +// - "| 2 |" +// - +---------------------+ +// "###); +// +// // Test min_by with null values +// let actual = execution +// .run_and_format("SELECT min_by(x, y) FROM VALUES (1, 10), (2, null), (3, 15), (null, 8) as tab(x, y);") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +---------------------+ +// - "| min_by(tab.x,tab.y) |" +// - +---------------------+ +// - "| 2 |" +// - +---------------------+ +// "###); +// +// // Test max_by with a single value +// let actual = execution +// .run_and_format("SELECT max_by(x, y) FROM VALUES (1, 10) as tab(x, y);") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +---------------------+ +// - "| max_by(tab.x,tab.y) |" +// - +---------------------+ +// - "| 1 |" +// - +---------------------+ +// "###); +// +// // Test min_by with a single value +// let actual = execution +// .run_and_format("SELECT min_by(x, y) FROM VALUES (1, 10) as tab(x, y);") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +---------------------+ +// - "| min_by(tab.x,tab.y) |" +// - +---------------------+ +// - "| 1 |" +// - +---------------------+ +// "###); +// +// // Test max_by with an empty set +// let actual = execution +// .run_and_format("SELECT max_by(x, y) FROM (SELECT * FROM (VALUES (1, 10)) WHERE 1=0) as tab(x, y);") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +---------------------+ +// - "| max_by(tab.x,tab.y) |" +// - +---------------------+ +// - "| |" +// - +---------------------+ +// "###); +// +// // Test min_by with an empty set +// let actual = execution +// .run_and_format("SELECT min_by(x, y) FROM (SELECT * FROM (VALUES (1, 10)) WHERE 1=0) as tab(x, y);") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +---------------------+ +// - "| min_by(tab.x,tab.y) |" +// - +---------------------+ +// - "| |" +// - +---------------------+ +// "###); +// } +// +// #[tokio::test] +// async fn test_kurtosis_pop() { +// let mut execution = TestExecution::new().await.unwrap().with_setup(TEST_TABLE).await; +// +// // Test with int64 +// let actual = execution +// .run_and_format("SELECT kurtosis_pop(int64_col) FROM test_table") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +------------------------------------+ +// - "| kurtosis_pop(test_table.int64_col) |" +// - +------------------------------------+ +// - "| -0.9599999999999755 |" +// - +------------------------------------+ +// "###); +// +// // Test with float64 +// let actual = execution +// .run_and_format("SELECT kurtosis_pop(float64_col) FROM test_table") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +--------------------------------------+ +// - "| kurtosis_pop(test_table.float64_col) |" +// - +--------------------------------------+ +// - "| -0.9599999999999755 |" +// - +--------------------------------------+ +// "###); +// +// let actual = execution +// .run_and_format("SELECT kurtosis_pop(col) FROM VALUES (1.0) as tab(col)") +// .await; +// insta::assert_yaml_snapshot!(actual, @r###" +// - +-----------------------+ +// - "| kurtosis_pop(tab.col) |" +// - +-----------------------+ +// - "| |" +// - +-----------------------+ +// "###); +// +// let actual = execution.run_and_format("SELECT kurtosis_pop(1.0)").await; +// insta::assert_yaml_snapshot!(actual, @r###" +// - +--------------------------+ +// - "| kurtosis_pop(Float64(1)) |" +// - +--------------------------+ +// - "| |" +// - +--------------------------+ +// "###); +// +// let actual = execution.run_and_format("SELECT kurtosis_pop(null)").await; +// insta::assert_yaml_snapshot!(actual, @r###" +// - +--------------------+ +// - "| kurtosis_pop(NULL) |" +// - +--------------------+ +// - "| |" +// - +--------------------+ +// "###); +// } +// +// #[tokio::test] +// async fn test_skewness() { +// let mut execution = TestExecution::new().await.unwrap().with_setup(TEST_TABLE).await; +// +// // Test with int64 +// let actual = execution +// .run_and_format("SELECT skewness(int64_col) FROM test_table") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +--------------------------------+ +// - "| skewness(test_table.int64_col) |" +// - +--------------------------------+ +// - "| -0.8573214099741201 |" +// - +--------------------------------+ +// "###); +// +// // Test with float64 +// let actual = execution +// .run_and_format("SELECT skewness(float64_col) FROM test_table") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +----------------------------------+ +// - "| skewness(test_table.float64_col) |" +// - +----------------------------------+ +// - "| -0.8573214099741201 |" +// - +----------------------------------+ +// "###); +// +// // Test with single value +// let actual = execution.run_and_format("SELECT skewness(1.0)").await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +----------------------+ +// - "| skewness(Float64(1)) |" +// - +----------------------+ +// - "| |" +// - +----------------------+ +// "###); +// +// let actual = execution +// .run_and_format("SELECT skewness(col) FROM VALUES (1.0), (2.0) as tab(col)") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +-------------------+ +// - "| skewness(tab.col) |" +// - +-------------------+ +// - "| |" +// - +-------------------+ +// "###); +// } +// +// #[tokio::test] +// async fn test_kurtosis() { +// let mut execution = TestExecution::new().await.unwrap(); +// +// let actual = execution +// .run_and_format("SELECT kurtosis(col) FROM VALUES (1.0), (10.0), (100.0), (10.0), (1.0) as tab(col);") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +-------------------+ +// - "| kurtosis(tab.col) |" +// - +-------------------+ +// - "| 4.777292927667962 |" +// - +-------------------+ +// "###); +// +// let actual = execution +// .run_and_format("SELECT kurtosis(col) FROM VALUES ('1'), ('10'), ('100'), ('10'), ('1') as tab(col);") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +-------------------+ +// - "| kurtosis(tab.col) |" +// - +-------------------+ +// - "| 4.777292927667962 |" +// - +-------------------+ +// "###); +// +// let actual = execution +// .run_and_format("SELECT kurtosis(col) FROM VALUES (1.0), (2.0), (3.0) as tab(col);") +// .await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +-------------------+ +// - "| kurtosis(tab.col) |" +// - +-------------------+ +// - "| |" +// - +-------------------+ +// "###); +// +// let actual = execution.run_and_format("SELECT kurtosis(1);").await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +--------------------+ +// - "| kurtosis(Int64(1)) |" +// - +--------------------+ +// - "| |" +// - +--------------------+ +// "###); +// +// let actual = execution.run_and_format("SELECT kurtosis(1.0);").await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +----------------------+ +// - "| kurtosis(Float64(1)) |" +// - +----------------------+ +// - "| |" +// - +----------------------+ +// "###); +// +// let actual = execution.run_and_format("SELECT kurtosis(null);").await; +// +// insta::assert_yaml_snapshot!(actual, @r###" +// - +----------------+ +// - "| kurtosis(NULL) |" +// - +----------------+ +// - "| |" +// - +----------------+ +// "###); +// } diff --git a/tests/sqllogictest/slt/create_table.slt b/tests/sqllogictest/slt/create_table.slt new file mode 100644 index 0000000..54c1abd --- /dev/null +++ b/tests/sqllogictest/slt/create_table.slt @@ -0,0 +1,15 @@ +statement ok +CREATE TABLE test_table ( + utf8_col VARCHAR, + int64_col BIGINT, + float64_col DOUBLE, + date64_col DATE, + time64_col TIME +) AS VALUES + ('apple', 1, 1.0, DATE '2021-01-01', TIME '01:00:00'), + ('banana', 2, 2.0, DATE '2021-01-02', TIME '02:00:00'), + ('apple', 2, 2.0, DATE '2021-01-02', TIME '02:00:00'), + ('orange', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), + ('banana', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), + ('apple', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), + (NULL, NULL, NULL, NULL, NULL); \ No newline at end of file diff --git a/tests/sqllogictest/slt/kurtosis.slt b/tests/sqllogictest/slt/kurtosis.slt new file mode 100644 index 0000000..a9c289c --- /dev/null +++ b/tests/sqllogictest/slt/kurtosis.slt @@ -0,0 +1,34 @@ +# Kurtosis test cases + +statement ok +CREATE TABLE tab(col DOUBLE); + +query I +SELECT kurtosis(col) FROM VALUES (1.0), (10.0), (100.0), (10.0), (1.0) as tab(col); +---- +4.777292927667962 + +query I +SELECT kurtosis(col) FROM VALUES ('1'), ('10'), ('100'), ('10'), ('1') as tab(col); +---- +4.777292927667962 + +query I +SELECT kurtosis(col) FROM VALUES (1.0), (2.0), (3.0) as tab(col); +---- +NULL + +query I +SELECT kurtosis(1); +---- +NULL + +query I +SELECT kurtosis(1.0); +---- +NULL + +query I +SELECT kurtosis(null); +---- +NULL diff --git a/tests/sqllogictest/slt/kurtosis_pop.slt b/tests/sqllogictest/slt/kurtosis_pop.slt new file mode 100644 index 0000000..aa4b291 --- /dev/null +++ b/tests/sqllogictest/slt/kurtosis_pop.slt @@ -0,0 +1,48 @@ +# Kurtosis Population test cases + +# Create test table and populate it +statement ok +CREATE TABLE test_table ( + utf8_col VARCHAR, + int64_col BIGINT, + float64_col DOUBLE, + date64_col DATE, + time64_col TIME +) AS VALUES + ('apple', 1, 1.0, DATE '2021-01-01', TIME '01:00:00'), + ('banana', 2, 2.0, DATE '2021-01-02', TIME '02:00:00'), + ('apple', 2, 2.0, DATE '2021-01-02', TIME '02:00:00'), + ('orange', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), + ('banana', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), + ('apple', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), + (NULL, NULL, NULL, NULL, NULL); + +# Test kurtosis_pop with int64 column +query I +SELECT kurtosis_pop(int64_col) FROM test_table; +---- +-0.9599999999999755 + +# Test kurtosis_pop with float64 column +query I +SELECT kurtosis_pop(float64_col) FROM test_table; +---- +-0.9599999999999755 + +# Test kurtosis_pop with a single value +query I +SELECT kurtosis_pop(col) FROM VALUES (1.0) as tab(col); +---- +NULL + +# Test kurtosis_pop with a literal value +query I +SELECT kurtosis_pop(1.0); +---- +NULL + +# Test kurtosis_pop with NULL +query I +SELECT kurtosis_pop(null); +---- +NULL \ No newline at end of file diff --git a/tests/sqllogictest/slt/max_by_and_min_by.slt b/tests/sqllogictest/slt/max_by_and_min_by.slt new file mode 100644 index 0000000..49bc302 --- /dev/null +++ b/tests/sqllogictest/slt/max_by_and_min_by.slt @@ -0,0 +1,62 @@ +# MAX_BY and MIN_BY test cases + + +# Test max_by with numbers +query I +SELECT max_by(x, y) FROM VALUES (1, 10), (2, 5), (3, 15), (4, 8) as tab(x, y); +---- +3 + +# Test min_by with numbers +query I +SELECT min_by(x, y) FROM VALUES (1, 10), (2, 5), (3, 15), (4, 8) as tab(x, y); +---- +2 + +# Test max_by with strings +query I +SELECT max_by(name, length(name)) FROM VALUES ('Alice'), ('Bob'), ('Charlie') as tab(name); +---- +Charlie + +# Test min_by with strings +query I +SELECT min_by(name, length(name)) FROM VALUES ('Alice'), ('Bob'), ('Charlie') as tab(name); +---- +Bob + +# Test max_by with null values +query I +SELECT max_by(x, y) FROM VALUES (1, 10), (2, null), (3, 15), (null, 8) as tab(x, y); +---- +2 + +# Test min_by with null values +query I +SELECT min_by(x, y) FROM VALUES (1, 10), (2, null), (3, 15), (null, 8) as tab(x, y); +---- +2 + +# Test max_by with a single value +query I +SELECT max_by(x, y) FROM VALUES (1, 10) as tab(x, y); +---- +1 + +# Test min_by with a single value +query I +SELECT min_by(x, y) FROM VALUES (1, 10) as tab(x, y); +---- +1 + +# Test max_by with an empty set +query I +SELECT max_by(x, y) FROM (SELECT * FROM (VALUES (1, 10)) WHERE 1=0) as tab(x, y); +---- +NULL + +# Test min_by with an empty set +query I +SELECT min_by(x, y) FROM (SELECT * FROM (VALUES (1, 10)) WHERE 1=0) as tab(x, y); +---- +NULL \ No newline at end of file diff --git a/tests/sqllogictest/slt/mode.slt b/tests/sqllogictest/slt/mode.slt new file mode 100644 index 0000000..cd114ef --- /dev/null +++ b/tests/sqllogictest/slt/mode.slt @@ -0,0 +1,48 @@ +# Mode test cases + +# Create test table and populate it +statement ok +CREATE TABLE test_table ( + utf8_col VARCHAR, + int64_col BIGINT, + float64_col DOUBLE, + date64_col DATE, + time64_col TIME +) AS VALUES + ('apple', 1, 1.0, DATE '2021-01-01', TIME '01:00:00'), + ('banana', 2, 2.0, DATE '2021-01-02', TIME '02:00:00'), + ('apple', 2, 2.0, DATE '2021-01-02', TIME '02:00:00'), + ('orange', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), + ('banana', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), + ('apple', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), + (NULL, NULL, NULL, NULL, NULL); + +# Test mode with utf8 column +query I +SELECT MODE(utf8_col) FROM test_table; +---- +apple + +# Test mode with int64 column +query I +SELECT MODE(int64_col) FROM test_table; +---- +3 + +# Test mode with float64 column +query I +SELECT MODE(float64_col) FROM test_table; +---- +3.0 + +# Test mode with date64 column +query I +SELECT MODE(date64_col) FROM test_table; +---- +2021-01-03 + +# Test mode with time64 column +query I +SELECT MODE(time64_col) FROM test_table; +---- +03:00:00 \ No newline at end of file diff --git a/tests/sqllogictest/slt/skewness.slt b/tests/sqllogictest/slt/skewness.slt new file mode 100644 index 0000000..4b7e5cc --- /dev/null +++ b/tests/sqllogictest/slt/skewness.slt @@ -0,0 +1,42 @@ +# Create test table and populate it +statement ok +CREATE TABLE test_table ( + utf8_col VARCHAR, + int64_col BIGINT, + float64_col DOUBLE, + date64_col DATE, + time64_col TIME +) AS VALUES + ('apple', 1, 1.0, DATE '2021-01-01', TIME '01:00:00'), + ('banana', 2, 2.0, DATE '2021-01-02', TIME '02:00:00'), + ('apple', 2, 2.0, DATE '2021-01-02', TIME '02:00:00'), + ('orange', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), + ('banana', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), + ('apple', 3, 3.0, DATE '2021-01-03', TIME '03:00:00'), + (NULL, NULL, NULL, NULL, NULL); + +# Skewness test cases + +# Test with int64 column +query I +SELECT skewness(int64_col) FROM test_table; +---- +-0.8573214099741201 + +# Test with float64 column +query I +SELECT skewness(float64_col) FROM test_table; +---- +-0.8573214099741201 + +# Test with a single value +query I +SELECT skewness(1.0); +---- +NULL + +# Test with two values +query I +SELECT skewness(col) FROM VALUES (1.0), (2.0) as tab(col); +---- +NULL diff --git a/tests/sqllogictest/sqllogictest.rs b/tests/sqllogictest/sqllogictest.rs new file mode 100644 index 0000000..1e10729 --- /dev/null +++ b/tests/sqllogictest/sqllogictest.rs @@ -0,0 +1,104 @@ +pub mod test_context; +pub mod utils; + +use crate::utils::TestExecution; +use datafusion::common::runtime::SpawnedTask; +use datafusion::common::{exec_datafusion_err, DataFusionError, Result}; +use futures::stream::StreamExt; +use log::info; +// use sqllogictest::strict_column_validator; +use std::ffi::OsStr; +use std::path::{Path, PathBuf}; +use test_context::TestContext; + +const TEST_DIRECTORY: &str = "./tests/sqllogictest/"; + +/// Represents a parsed test file +#[derive(Debug)] +struct TestFile { + /// The absolute path to the file + pub path: PathBuf, + /// The relative path of the file (used for display) + pub relative_path: PathBuf, +} + +impl TestFile { + fn new(path: PathBuf) -> Self { + let relative_path = PathBuf::from(path.to_string_lossy().strip_prefix(TEST_DIRECTORY).unwrap_or("")); + + Self { path, relative_path } + } + + fn is_slt_file(&self) -> bool { + self.path.extension() == Some(OsStr::new("slt")) + } +} + +#[tokio::test] +async fn sqllogictest() { + // let test_files = read_test_files().unwrap(); + let errors = futures::stream::iter(read_test_files().unwrap()) + .map(|test_file| { + SpawnedTask::spawn(async move { + run_test_file(test_file).await?; + + Ok(()) as Result<()> + }) + .join() + }) + .collect() + .await; +} + +fn read_test_files() -> Result>> { + Ok(Box::new( + read_dir_recursive(TEST_DIRECTORY)? + .into_iter() + .map(TestFile::new) + .filter(|f| f.is_slt_file()), + )) +} + +fn read_dir_recursive>(path: P) -> Result> { + let mut dst = vec![]; + read_dir_recursive_impl(&mut dst, path.as_ref())?; + Ok(dst) +} + +/// Append all paths recursively to dst +fn read_dir_recursive_impl(dst: &mut Vec, path: &Path) -> Result<()> { + let entries = std::fs::read_dir(path).map_err(|e| exec_datafusion_err!("Error reading directory {path:?}: {e}"))?; + for entry in entries { + let path = entry + .map_err(|e| exec_datafusion_err!("Error reading entry in directory {path:?}: {e}"))? + .path(); + + if path.is_dir() { + read_dir_recursive_impl(dst, &path)?; + } else { + dst.push(path); + } + } + + Ok(()) +} + +async fn run_test_file(test_file: TestFile) -> Result<()> { + let TestFile { path, relative_path } = test_file; + info!("Running with DataFusion runner: {}", path.display()); + let Some(test_ctx) = TestContext::try_new_for_test_file(&relative_path).await else { + info!("Skipping: {}", path.display()); + return Ok(()); + }; + let mut runner = sqllogictest::Runner::new(|| async { + Ok(TestExecution::new( + test_ctx.session_ctx().clone(), + relative_path.clone(), + )) + }); + // runner.with_column_validator(strict_column_validator); + runner + .run_file_async(path) + .await + .map_err(|e| DataFusionError::External(Box::new(e))) +} diff --git a/tests/sqllogictest/test_context.rs b/tests/sqllogictest/test_context.rs new file mode 100644 index 0000000..9434d12 --- /dev/null +++ b/tests/sqllogictest/test_context.rs @@ -0,0 +1,35 @@ +use datafusion::prelude::SessionConfig; +use datafusion::prelude::SessionContext; +use datafusion_functions_extra::register_all_extra_functions; +use std::path::Path; +use tempfile::TempDir; + +pub struct TestContext { + /// Context for running queries + ctx: SessionContext, +} + +impl TestContext { + pub fn new(ctx: SessionContext) -> Self { + register_all_extra_functions(&ctx); + Self { ctx } + } + + /// Create a SessionContext, configured for the specific sqllogictest + /// test(.slt file) , if possible. + /// + /// If `None` is returned (e.g. because some needed feature is not + /// enabled), the file should be skipped + pub async fn try_new_for_test_file(relative_path: &Path) -> Option { + let config = SessionConfig::new(); + + let test_ctx = TestContext::new(SessionContext::new_with_config(config)); + + Some(test_ctx) + } + + /// Returns a reference to the internal SessionContext + pub fn session_ctx(&self) -> &SessionContext { + &self.ctx + } +} diff --git a/tests/sqllogictest/utils/mod.rs b/tests/sqllogictest/utils/mod.rs new file mode 100644 index 0000000..6415073 --- /dev/null +++ b/tests/sqllogictest/utils/mod.rs @@ -0,0 +1,138 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::error::ArrowError; +use arrow::record_batch::RecordBatch; +use arrow::util::pretty::pretty_format_batches; +use async_trait::async_trait; +use datafusion::common::DataFusionError; +use datafusion::error::Result; +use datafusion::execution::context::SessionContext; +use datafusion::physical_plan::common::collect; +use datafusion::physical_plan::execute_stream; +// use datafusion::sql::parser::DFParser; +use datafusion_functions_extra::register_all_extra_functions; +use log::{debug, info}; +use sqllogictest::{DBOutput, TestError}; +use sqlparser::parser::ParserError; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; +use thiserror::Error; + +/// DataFusion sql-logicaltest error +#[derive(Debug, Error)] +pub enum DFSqlLogicTestError { + /// Error from sqllogictest-rs + #[error("SqlLogicTest error(from sqllogictest-rs crate): {0}")] + SqlLogicTest(#[from] TestError), + /// Error from datafusion + #[error("DataFusion error: {0}")] + DataFusion(#[from] DataFusionError), + /// Error returned when SQL is syntactically incorrect. + #[error("SQL Parser error: {0}")] + Sql(#[from] ParserError), + /// Error from arrow-rs + #[error("Arrow error: {0}")] + Arrow(#[from] ArrowError), + /// Generic error + #[error("Other Error: {0}")] + Other(String), +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum DFColumnType { + Boolean, + DateTime, + Integer, + Float, + Text, + Timestamp, + Another, +} + +pub struct TestExecution { + ctx: SessionContext, + relative_path: PathBuf, +} + +impl TestExecution { + pub async fn new(mut ctx: SessionContext, relative_path: PathBuf) -> Self { + register_all_extra_functions(&mut ctx).unwrap(); + Self { ctx, relative_path } + } +} + +#[async_trait] +impl sqllogictest::AsyncDB for TestExecution { + type Error = DFSqlLogicTestError; + type ColumnType = DFColumnType; + + async fn run(&mut self, sql: &str) -> Result> { + info!("[{}] Running query: \"{}\"", self.relative_path.display(), sql); + run_query(&self.ctx, sql).await + } + + /// Engine name of current database. + fn engine_name(&self) -> &str { + "TestExecution" + } + + async fn sleep(dur: Duration) { + tokio::time::sleep(dur).await; + } +} + +fn format_results(results: &[RecordBatch]) -> Vec { + let formatted = pretty_format_batches(results).unwrap().to_string(); + + formatted.lines().map(|s| s.to_string()).collect() +} + +pub async fn run_query(ctx: &SessionContext, sql: &str) -> Result> { + debug!("Running query: {sql}"); + let df = ctx.sql(sql).await?; + + let task_ctx = Arc::new(df.task_ctx()); + let plan = df.create_physical_plan().await?; + + // let stream = execute_stream(plan, task_ctx)?; + // let types = normalize::convert_schema_to_types(stream.schema().fields()); + // let results: Vec = collect(stream).await?; + // let rows = normalize::convert_batches(results)?; + // + // if rows.is_empty() && types.is_empty() { + // Ok(DBOutput::StatementComplete(0)) + // } else { + // Ok(DBOutput::Rows { types, rows }) + // } + let stream = execute_stream(plan, task_ctx)?; + let results: Vec = collect(stream).await?; + + if results.is_empty() { + Ok(DBOutput::StatementComplete(0)) + } else { + let formatted_results = pretty_format_batches(&results).unwrap().to_string(); + let rows: Vec> = formatted_results + .lines() + .map(|line| line.split_whitespace().map(|s| s.to_string()).collect()) + .collect(); + + let types = Vec::new(); + Ok(DBOutput::Rows { types, rows }) + } +} diff --git a/tests/utils/mod.rs b/tests/utils/mod.rs deleted file mode 100644 index 4292685..0000000 --- a/tests/utils/mod.rs +++ /dev/null @@ -1,71 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use arrow::record_batch::RecordBatch; -use arrow::util::pretty::pretty_format_batches; -use datafusion::error::Result; -use datafusion::execution::context::SessionContext; -use datafusion::prelude::SessionConfig; -use datafusion::sql::parser::DFParser; -use datafusion_functions_extra::register_all_extra_functions; -use log::debug; - -pub struct TestExecution { - ctx: SessionContext, -} - -impl TestExecution { - pub async fn new() -> Result { - let config = SessionConfig::new(); - let mut ctx = SessionContext::new_with_config(config); - register_all_extra_functions(&mut ctx)?; - Ok(Self { ctx }) - } - - pub async fn with_setup(self, sql: &str) -> Self { - debug!("Running setup query: {sql}"); - let statements = DFParser::parse_sql(sql).expect("Error parsing setup query"); - for statement in statements { - debug!("Running setup statement: {statement}"); - let statement_sql = statement.to_string(); - self.ctx - .sql(&statement_sql) - .await - .expect("Error planning setup failed") - .collect() - .await - .expect("Error executing setup query"); - } - self - } - - pub async fn run(&mut self, sql: &str) -> Result> { - debug!("Running query: {sql}"); - self.ctx.sql(sql).await?.collect().await - } - - pub async fn run_and_format(&mut self, sql: &str) -> Vec { - let results = self.run(sql).await.expect("Error running query"); - format_results(&results) - } -} - -fn format_results(results: &[RecordBatch]) -> Vec { - let formatted = pretty_format_batches(results).unwrap().to_string(); - - formatted.lines().map(|s| s.to_string()).collect() -}