diff --git a/tests/sqllogictests/sql/add_files/add_empty_file.test b/tests/sqllogictests/sql/add_files/add_empty_file.test new file mode 100644 index 0000000..9f4ab1a --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_empty_file.test @@ -0,0 +1,44 @@ +# name: test/sql/add_files/add_empty_file.test +# description: test ducklake adding empty files +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_empty_files/', METADATA_CATALOG 'metadata'); + +statement ok +CREATE TABLE ducklake.test(col1 INTEGER, col2 VARCHAR); + +statement ok +INSERT INTO ducklake.test VALUES (100, 'hello'); + +# write a parquet file outside of DuckLake +statement ok +COPY (SELECT 200 col1, 'world' col2 LIMIT 0) TO '${DATA_PATH}/ducklake_empty_files/main/test/empty_file.parquet'; + +statement ok +BEGIN + +# make its existence known to DuckLake +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/ducklake_empty_files/main/test/empty_file.parquet') + +query II +FROM ducklake.test +---- +100 hello + +statement ok +COMMIT + +query II +FROM ducklake.test +---- +100 hello diff --git a/tests/sqllogictests/sql/add_files/add_file_footer_size.test b/tests/sqllogictests/sql/add_files/add_file_footer_size.test new file mode 100644 index 0000000..701a6fc --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_file_footer_size.test @@ -0,0 +1,30 @@ +# name: test/sql/add_files/add_file_footer_size.test +# description: test if ducklake adds file footer size +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_empty_files/', METADATA_CATALOG 'metadata'); + +statement ok +CREATE TABLE ducklake.test(col1 INTEGER, col2 VARCHAR); + +# write a parquet file outside of DuckLake +statement ok +COPY (SELECT 200 col1, 'world' col2) TO '${DATA_PATH}/data.parquet'; + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/data.parquet') + +query I +SELECT data_file_footer_size > 0 FROM ducklake_list_files('ducklake', 'test') +---- +True diff --git a/tests/sqllogictests/sql/add_files/add_file_specific_schema.test b/tests/sqllogictests/sql/add_files/add_file_specific_schema.test new file mode 100644 index 0000000..71eb553 --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_file_specific_schema.test @@ -0,0 +1,40 @@ +# name: test/sql/add_files/add_file_specific_schema.test +# description: test ducklake adding files to a specified schema +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/add_file_specific_schema/', METADATA_CATALOG 'metadata'); + +statement ok +USE ducklake; + +statement ok +CREATE SCHEMA schema_test; + +statement ok +CREATE TABLE schema_test.test(col1 INTEGER, col2 VARCHAR); + +statement ok +COPY (SELECT 200 col1, 'world' col2) TO '${DATA_PATH}/file.parquet'; + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/file.parquet'); +---- +Did you mean "schema_test.test"? + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/file.parquet', schema = 'schema_test'); + +query II +FROM schema_test.test; +---- +200 world \ No newline at end of file diff --git a/tests/sqllogictests/sql/add_files/add_files.test b/tests/sqllogictests/sql/add_files/add_files.test new file mode 100644 index 0000000..bd11d6e --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files.test @@ -0,0 +1,184 @@ +# name: test/sql/add_files/add_files.test +# description: test ducklake adding files directly +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files/', METADATA_CATALOG 'metadata'); + +statement ok +CREATE TABLE ducklake.test(col1 INTEGER, col2 VARCHAR); + +statement ok +INSERT INTO ducklake.test VALUES (100, 'hello'); + +# write a parquet file outside of DuckLake +statement ok +COPY (SELECT 200 col1, 'world' col2) TO '${DATA_PATH}/ducklake_add_files/main/test/my_file.parquet'; + +statement ok +BEGIN + +# make its existence known to DuckLake +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/ducklake_add_files/main/test/my_file.parquet') + +query II +FROM ducklake.test +---- +100 hello +200 world + +statement ok +COMMIT + +query II rowsort +FROM ducklake.test +---- +100 hello +200 world + +# reverse the order +statement ok +COPY (SELECT 'abcd' col2, 300 col1) TO '${DATA_PATH}/ducklake_add_files/main/test/my_file2.parquet'; + +statement ok +BEGIN + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/ducklake_add_files/main/test/my_file2.parquet') + +query II +FROM ducklake.test +---- +100 hello +200 world +300 abcd + +statement ok +COMMIT + +query II +FROM ducklake.test +---- +100 hello +200 world +300 abcd + +# ensure the column mappings are duplicate-eliminated +query I +SELECT COUNT(*) FROM metadata.ducklake_column_mapping +---- +1 + +# let's alter the table +statement ok +ALTER TABLE ducklake.test ADD COLUMN col3 TINYINT + +# we can still query from the existing files +query III +FROM ducklake.test +---- +100 hello NULL +200 world NULL +300 abcd NULL + +# we can add a new file +statement ok +COPY (SELECT 'efgh' col2, 400 col1, 50::TINYINT col3) TO '${DATA_PATH}/ducklake_add_files/main/test/my_file3.parquet'; + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/ducklake_add_files/main/test/my_file3.parquet') + +query III +FROM ducklake.test +---- +100 hello NULL +200 world NULL +300 abcd NULL +400 efgh 50 + +# we now have two name mappings +query I +SELECT COUNT(*) FROM metadata.ducklake_column_mapping +---- +2 + +# drop a column +statement ok +ALTER TABLE ducklake.test DROP COLUMN col2 + +query II +FROM ducklake.test +---- +100 NULL +200 NULL +300 NULL +400 50 + +# schema-level time travel works, also on files we added here +query III +FROM ducklake.test AT (VERSION => 6) +---- +100 hello NULL +200 world NULL +300 abcd NULL +400 efgh 50 + +# we can re-add the column and write a new file +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.test ADD COLUMN col2 VARCHAR + +statement ok +COPY (SELECT 'ijkl' col2, 500 col1, 100::TINYINT col3) TO '${DATA_PATH}/ducklake_add_files/main/test/my_file4.parquet'; + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/ducklake_add_files/main/test/my_file4.parquet') + +# col2 is NULL for the old columns, even though they have a column called col2 +query III +SELECT col1, col2, col3 FROM ducklake.test +---- +100 NULL NULL +200 NULL NULL +300 NULL NULL +400 NULL 50 +500 ijkl 100 + +statement ok +COMMIT + +query III +SELECT col1, col2, col3 FROM ducklake.test +---- +100 NULL NULL +200 NULL NULL +300 NULL NULL +400 NULL 50 +500 ijkl 100 + +# time travel still works +query III +FROM ducklake.test AT (VERSION => 6) +---- +100 hello NULL +200 world NULL +300 abcd NULL +400 efgh 50 + +# file does not exist +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/abcd.parquet') +---- +Failed to add data files diff --git a/tests/sqllogictests/sql/add_files/add_files_compaction.test b/tests/sqllogictests/sql/add_files/add_files_compaction.test new file mode 100644 index 0000000..c04ac0f --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_compaction.test @@ -0,0 +1,147 @@ +# name: test/sql/add_files/add_files_compaction.test +# description: test ducklake compaction of manually added files +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_compaction/') + +# snapshot 1 +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +# snapshot 2 +statement ok +CREATE TABLE ducklake.test2(i INTEGER); + +# snapshot 3 +statement ok +INSERT INTO ducklake.test VALUES (1); + +# snapshot 4: random change that does not modify the "test" table +statement ok +INSERT INTO ducklake.test2 VALUES (42); + +# snapshot 5..8 +loop i 2 6 + +statement ok +COPY (SELECT ${i} AS i) TO '${DATA_PATH}/ducklake_add_files_compaction/my_file${i}.parquet'; + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/ducklake_add_files_compaction/my_file${i}.parquet') + +endloop + +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_add_files_compaction/**/*') +---- +6 + +query III +SELECT snapshot_id, rowid, * FROM ducklake.test ORDER BY ALL +---- +3 0 1 +5 1 2 +6 2 3 +7 3 4 +8 4 5 + +statement ok +CALL ducklake.merge_adjacent_files(); + +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +# files are not immediately deleted - but are added to the deletion queue +# we actually gain a file here +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_add_files_compaction/**/*') +---- +7 + +# force clean-up of the files +# test dry run - this just lists the files to be cleaned up +query I +SELECT COUNT(*) FROM ducklake_cleanup_old_files('ducklake', cleanup_all => true, dry_run => true); +---- +5 + +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +# no more files to be deleted +query I +SELECT COUNT(*) FROM ducklake_cleanup_old_files('ducklake', cleanup_all => true, dry_run => true); +---- +0 + +# now the files are gone +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_add_files_compaction/**/*') +---- +2 + +# verify correct behavior when operating on the compacted file +# time travel +query I +SELECT * FROM ducklake.test AT (VERSION => 3) +---- +1 + +query I +SELECT * FROM ducklake.test AT (VERSION => 4) +---- +1 + + +query I +SELECT * FROM ducklake.test AT (VERSION => 5) ORDER BY ALL +---- +1 +2 + +# reading snapshot id and row id +query III +SELECT snapshot_id, rowid, * FROM ducklake.test ORDER BY ALL +---- +3 0 1 +5 1 2 +6 2 3 +7 3 4 +8 4 5 + +# table insertions function +query II +SELECT rowid, * FROM ducklake_table_insertions('ducklake', 'main', 'test', 0, 3) ORDER BY ALL +---- +0 1 + +query II +SELECT rowid, * FROM ducklake_table_insertions('ducklake', 'main', 'test', 0, 4) ORDER BY ALL +---- +0 1 + +query II +SELECT rowid, * FROM ducklake_table_insertions('ducklake', 'main', 'test', 0, 6) ORDER BY ALL +---- +0 1 +1 2 +2 3 + +query II +SELECT rowid, * FROM ducklake_table_insertions('ducklake', 'main', 'test', 0, 9) ORDER BY ALL +---- +0 1 +1 2 +2 3 +3 4 +4 5 diff --git a/tests/sqllogictests/sql/add_files/add_files_complex_nested_stats_mre.test b/tests/sqllogictests/sql/add_files/add_files_complex_nested_stats_mre.test new file mode 100644 index 0000000..257a4e6 --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_complex_nested_stats_mre.test @@ -0,0 +1,79 @@ +# name: test/sql/add_files/add_files_complex_nested_stats_mre.test +# description: MRE for statistics mismatch with array and struct array columns +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/stats_data', METADATA_CATALOG 'metadata'); + +# Schema that reproduces the original error: array + struct array + other types +statement ok +CREATE TABLE ducklake.test_table( + data_array DOUBLE[], + diagnostics STRUCT("key" VARCHAR, "value" VARCHAR)[], + seq_num UINTEGER +); + +statement ok +COPY (SELECT + [1.0::DOUBLE, 2.0] as data_array, + [{'key': 'status', 'value': 'active'}] as diagnostics, + 100::UINTEGER as seq_num +) TO '${DATA_PATH}/file1.parquet'; + +statement ok +COPY (SELECT + [3.0::DOUBLE, 4.0, 5.0] as data_array, + [{'key': 'type', 'value': 'test'}, {'key': 'state', 'value': 'running'}] as diagnostics, + 200::UINTEGER as seq_num +) TO '${DATA_PATH}/file2.parquet'; + +statement ok +CALL ducklake_add_data_files('ducklake', 'test_table', '${DATA_PATH}/file1.parquet') + +query III +SELECT column_id, min_value, max_value FROM metadata.ducklake_file_column_stats ORDER BY data_file_id, column_id +---- +2 1.0 2.0 +5 status status +6 active active +7 100 100 + + +query III +SELECT column_id, min_value, max_value FROM metadata.ducklake_table_column_stats ORDER BY column_id +---- +2 1.0 2.0 +5 status status +6 active active +7 100 100 + +statement ok +CALL ducklake_add_data_files('ducklake', 'test_table', '${DATA_PATH}/file2.parquet') + +query IIII +SELECT data_file_id, column_id, min_value, max_value FROM metadata.ducklake_file_column_stats ORDER BY data_file_id, column_id +---- +1 2 1.0 2.0 +1 5 status status +1 6 active active +1 7 100 100 +2 2 3.0 5.0 +2 5 state type +2 6 running test +2 7 200 200 + +query III +SELECT column_id, min_value, max_value FROM metadata.ducklake_table_column_stats ORDER BY column_id +---- +2 1.0 5.0 +5 state type +6 active test +7 100 200 diff --git a/tests/sqllogictests/sql/add_files/add_files_extra_columns.test b/tests/sqllogictests/sql/add_files/add_files_extra_columns.test new file mode 100644 index 0000000..c1f9943 --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_extra_columns.test @@ -0,0 +1,47 @@ +# name: test/sql/add_files/add_files_extra_columns.test +# description: test ducklake adding files with extra columns +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_extra_columns'); + +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER); + +statement ok +COPY (SELECT 42 j, 84 i, 100 x) TO '${DATA_PATH}/extra_column.parquet' + +# adding a file with extra columns results in an error +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/extra_column.parquet') +---- +Column "x" exists in file + +statement ok +BEGIN + +# UNLESS we specify "ignore_extra_columns" +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/extra_column.parquet', ignore_extra_columns => true) + +query II +FROM ducklake.test +---- +84 42 + +statement ok +COMMIT + +query II +FROM ducklake.test +---- +84 42 diff --git a/tests/sqllogictests/sql/add_files/add_files_hive.test b/tests/sqllogictests/sql/add_files/add_files_hive.test new file mode 100644 index 0000000..9c342ea --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_hive.test @@ -0,0 +1,94 @@ +# name: test/sql/add_files/add_files_hive.test +# description: test ducklake adding hive-partitioned files directly +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_hive/', METADATA_CATALOG 'metadata'); + +statement ok +CREATE TABLE partitioned_tbl(part_key INT, part_key2 INT, val VARCHAR); + +statement ok +INSERT INTO partitioned_tbl VALUES (1, 10, 'hello'), (2, 10, 'world'), (2, 20, 'abc') + +statement ok +COPY partitioned_tbl TO '${DATA_PATH}/ducklake_add_files_hive/' (FORMAT PARQUET, PARTITION_BY(part_key, part_key2)); + +statement ok +CREATE TABLE ducklake.test(part_key INT, part_key2 INT, val VARCHAR); + +# we need the hive partition columns +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/ducklake_add_files_hive/**/*.parquet', hive_partitioning => false) +---- +allow_missing + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/ducklake_add_files_hive/**/*.parquet', hive_partitioning => true) + +query III +SELECT part_key, part_key2, val FROM ducklake.test ORDER BY ALL +---- +1 10 hello +2 10 world +2 20 abc + +# test pushdown into stats +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM ducklake.test WHERE part_key=1 +---- +analyzed_plan :.*Total Files Read: 1.* + +# rename the hive partition +statement ok +ALTER TABLE ducklake.test RENAME part_key TO new_part_key + +query III +SELECT new_part_key, part_key2, val FROM ducklake.test ORDER BY ALL +---- +1 10 hello +2 10 world +2 20 abc + +# add new values post-rename +statement ok +COPY (FROM (VALUES (1, 10, 'new value')) t(new_part_key, part_key2, val)) TO '${DATA_PATH}/ducklake_add_files_hive/' (FORMAT PARQUET, PARTITION_BY(new_part_key, part_key2), APPEND); + +# hive partitioning is automatically detected by default +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/ducklake_add_files_hive/new_part_key=*/**/*.parquet') + +query III +SELECT new_part_key, part_key2, val FROM ducklake.test ORDER BY ALL +---- +1 10 hello +1 10 new value +2 10 world +2 20 abc + +# test pushdown into stats +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM ducklake.test WHERE new_part_key=1 +---- +analyzed_plan :.*Total Files Read: 2.* + +# type promotion +statement ok +ALTER TABLE ducklake.test ALTER new_part_key SET TYPE BIGINT + +query III +SELECT new_part_key, part_key2, val FROM ducklake.test ORDER BY ALL +---- +1 10 hello +1 10 new value +2 10 world +2 20 abc diff --git a/tests/sqllogictests/sql/add_files/add_files_hive_mismatch.test b/tests/sqllogictests/sql/add_files/add_files_hive_mismatch.test new file mode 100644 index 0000000..42449ee --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_hive_mismatch.test @@ -0,0 +1,63 @@ +# name: test/sql/add_files/add_files_hive_mismatch.test +# description: test ducklake adding hive-partitioned files with type mismatch +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_hive_mismatch/', METADATA_CATALOG 'metadata'); + +statement ok +CREATE TABLE partitioned_tbl(part_key VARCHAR, part_key2 INT, val VARCHAR); + +statement ok +INSERT INTO partitioned_tbl VALUES ('p1', 10, 'hello'), ('p2', 10, 'world'), ('p2', 20, 'abc') + +statement ok +COPY partitioned_tbl TO '${DATA_PATH}/ducklake_add_files_hive_mismatch/' (FORMAT PARQUET, PARTITION_BY(part_key, part_key2)); + +statement ok +CREATE TABLE ducklake.test(part_key INT, part_key2 INT, val VARCHAR); + +# value "p1" cannot be cast to integer +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/ducklake_add_files_hive_mismatch/**/*.parquet', hive_partitioning => true) +---- +cannot be cast to the column type + +statement ok +CREATE OR REPLACE TABLE ducklake.test(part_key VARCHAR, part_key2 INT, val VARCHAR); + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/ducklake_add_files_hive_mismatch/**/*.parquet', hive_partitioning => true) + +query III +FROM ducklake.test ORDER BY ALL +---- +p1 10 hello +p2 10 world +p2 20 abc + +# add non-hive partitioned file + +# now add non-hive partitioned column +statement ok +COPY (SELECT 'p1' part_key, 10 part_key2, 'non_partitioned' val) TO '${DATA_PATH}/ducklake_add_files_hive_mismatch/non_partitioned_file.parquet' + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/ducklake_add_files_hive_mismatch/non_partitioned_file.parquet') + +query III +FROM ducklake.test ORDER BY ALL +---- +p1 10 hello +p1 10 non_partitioned +p2 10 world +p2 20 abc diff --git a/tests/sqllogictests/sql/add_files/add_files_list.test b/tests/sqllogictests/sql/add_files/add_files_list.test new file mode 100644 index 0000000..533e25b --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_list.test @@ -0,0 +1,37 @@ +# name: test/sql/add_files/add_files_list.test +# description: test ducklake adding files directly +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files/', METADATA_CATALOG 'metadata'); + +statement ok +CREATE TABLE ducklake.test(col1 INTEGER, col2 VARCHAR); + +statement ok +INSERT INTO ducklake.test VALUES (100, 'hello'); + +# write a parquet file outside of DuckLake +statement ok +COPY (SELECT 200 col1, 'world' col2) TO '${DATA_PATH}/ducklake_add_files/main/test/my_file.parquet'; + +statement ok +COPY (SELECT 300 col1, '!' col2) TO '${DATA_PATH}/ducklake_add_files/main/test/my_file_2.parquet'; + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', ['${DATA_PATH}/ducklake_add_files/main/test/my_file.parquet','${DATA_PATH}/ducklake_add_files/main/test/my_file_2.parquet']) + +query II +FROM ducklake.test ORDER BY col1; +---- +100 hello +200 world +300 ! \ No newline at end of file diff --git a/tests/sqllogictests/sql/add_files/add_files_missing_columns.test b/tests/sqllogictests/sql/add_files/add_files_missing_columns.test new file mode 100644 index 0000000..9d2d8cf --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_missing_columns.test @@ -0,0 +1,47 @@ +# name: test/sql/add_files/add_files_missing_columns.test +# description: test ducklake adding files with missing columns +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_missing_columns'); + +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER); + +statement ok +COPY (SELECT 42 j) TO '${DATA_PATH}/missing_column.parquet' + +# adding a file with missing columns results in an error +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/missing_column.parquet') +---- +Column "i" exists in table "test" but was not found in file + +statement ok +BEGIN + +# UNLESS we specify "allow_missing +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/missing_column.parquet', allow_missing => true) + +query II +FROM ducklake.test +---- +NULL 42 + +statement ok +COMMIT + +query II +FROM ducklake.test +---- +NULL 42 diff --git a/tests/sqllogictests/sql/add_files/add_files_missing_fields.test b/tests/sqllogictests/sql/add_files/add_files_missing_fields.test new file mode 100644 index 0000000..a2c4457 --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_missing_fields.test @@ -0,0 +1,47 @@ +# name: test/sql/add_files/add_files_missing_fields.test +# description: test ducklake adding files with missing fields +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_missing_fields'); + +statement ok +CREATE TABLE ducklake.test(s STRUCT(i INTEGER, j INTEGER)); + +statement ok +COPY (SELECT {'j': 84} s) TO '${DATA_PATH}/missing_field.parquet' + +# adding a file with missing field results in an error +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/missing_field.parquet') +---- +Column "s.i" exists in table "test" but was not found in file + +statement ok +BEGIN + +# UNLESS we specify "allow_missing +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/missing_field.parquet', allow_missing => true) + +query I +FROM ducklake.test +---- +{'i': NULL, 'j': 84} + +statement ok +COMMIT + +query I +FROM ducklake.test +---- +{'i': NULL, 'j': 84} diff --git a/tests/sqllogictests/sql/add_files/add_files_nested.test b/tests/sqllogictests/sql/add_files/add_files_nested.test new file mode 100644 index 0000000..c726f94 --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_nested.test @@ -0,0 +1,53 @@ +# name: test/sql/add_files/add_files_nested.test +# description: test ducklake adding files with nested types directly +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_nested'); + +statement ok +CREATE TABLE ducklake.test(col1 STRUCT(i INTEGER, j INTEGER), col2 STRUCT(col1 INTEGER, col2 INTEGER)); + +statement ok +BEGIN + +statement ok +COPY (SELECT {'i': 1, 'j': 2} col1, {'col1': 100, 'col2': 200} col2) TO '${DATA_PATH}/nested1.parquet'; + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/nested1.parquet') + +query II +FROM ducklake.test +---- +{'i': 1, 'j': 2} {'col1': 100, 'col2': 200} + +statement ok +COMMIT + +query II +FROM ducklake.test +---- +{'i': 1, 'j': 2} {'col1': 100, 'col2': 200} + +# columns can be arbitrarily re-ordered, as can struct fields +statement ok +COPY (SELECT {'col2': 400, 'col1': 200} col2, {'j': 20, 'i': 10} col1) TO '${DATA_PATH}/nested2.parquet'; + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/nested2.parquet') + +query II +FROM ducklake.test +---- +{'i': 1, 'j': 2} {'col1': 100, 'col2': 200} +{'i': 10, 'j': 20} {'col1': 200, 'col2': 400} diff --git a/tests/sqllogictests/sql/add_files/add_files_rename.test b/tests/sqllogictests/sql/add_files/add_files_rename.test new file mode 100644 index 0000000..3de8e11 --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_rename.test @@ -0,0 +1,38 @@ +# name: test/sql/add_files/add_files_rename.test +# description: test ducklake adding files with rename +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_rename') + +statement ok +CREATE TABLE ducklake.test(col1 INTEGER, col2 VARCHAR); + +# write a parquet file outside of DuckLake +statement ok +COPY (SELECT 100 col1, 'world' col2) TO '${DATA_PATH}/rename_file.parquet'; + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/rename_file.parquet') + +query II +SELECT col1, col2 FROM ducklake.test +---- +100 world + +statement ok +ALTER TABLE ducklake.test RENAME col1 TO new_col + +query II +SELECT new_col, col2 FROM ducklake.test +---- +100 world diff --git a/tests/sqllogictests/sql/add_files/add_files_table_changes.test b/tests/sqllogictests/sql/add_files/add_files_table_changes.test new file mode 100644 index 0000000..0df95dc --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_table_changes.test @@ -0,0 +1,38 @@ +# name: test/sql/add_files/add_files_table_changes.test +# description: test ducklake adding files with table_changes function +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_table_changes'); + +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER); + +statement ok +COPY (SELECT 42 j, 84 i) TO '${DATA_PATH}/table_changes.parquet' + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/table_changes.parquet') + +query IIIII +FROM ducklake.table_changes('test', 2, 2) +---- +2 0 insert 84 42 + +# now with deletions +statement ok +DELETE FROM ducklake.test + +query IIIII +FROM ducklake.table_changes('test', 3, 3) +---- +3 0 delete 84 42 diff --git a/tests/sqllogictests/sql/add_files/add_files_transaction_local.test b/tests/sqllogictests/sql/add_files/add_files_transaction_local.test new file mode 100644 index 0000000..9d30d29 --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_transaction_local.test @@ -0,0 +1,41 @@ +# name: test/sql/add_files/add_files_transaction_local.test +# description: test ducklake adding files to a transaction-local table +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_transaction_local'); + +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.test(col1 INTEGER, col2 VARCHAR); + +# write a parquet file outside of DuckLake +statement ok +COPY (SELECT 1 col1, 'hello world' col2) TO '${DATA_PATH}/my_file.parquet'; + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/my_file.parquet') + +query II +FROM ducklake.test +---- +1 hello world + +statement ok +COMMIT + +query II +FROM ducklake.test +---- +1 hello world diff --git a/tests/sqllogictests/sql/add_files/add_files_type_check_decimal.test b/tests/sqllogictests/sql/add_files/add_files_type_check_decimal.test new file mode 100644 index 0000000..9aef7b0 --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_type_check_decimal.test @@ -0,0 +1,89 @@ +# name: test/sql/add_files/add_files_type_check_decimal.test +# description: test ducklake adding files with decimals +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_decimals') + +# create a bunch of parquet files with different decimals +statement ok +COPY (SELECT 999.9::DECIMAL(4,1) col1) TO '${DATA_PATH}/dec4_1.parquet' + +statement ok +COPY (SELECT 999999.999::DECIMAL(9,3) col1) TO '${DATA_PATH}/dec9_3.parquet' + +statement ok +COPY (SELECT 99999999999.9999999::DECIMAL(18,7) col1) TO '${DATA_PATH}/dec18_7.parquet' + +############ +# dec_4_1 # +############ +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 DECIMAL(4,1)); + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/dec9_3.parquet') +---- +Incompatible decimal precision/scale + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/dec4_1.parquet') + +query I +FROM ducklake.test +---- +999.9 + +############ +# dec_9_3 # +############ +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 DECIMAL(9,3)); + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/dec18_7.parquet') +---- +Incompatible decimal precision/scale + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/dec4_1.parquet') + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/dec9_3.parquet') + +query I +FROM ducklake.test +---- +999.900 +999999.999 + +############ +# dec_18_7 # +############ +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 DECIMAL(18,7)); + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/dec4_1.parquet') + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/dec9_3.parquet') + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/dec18_7.parquet') + +query I +FROM ducklake.test +---- +999.9000000 +999999.9990000 +99999999999.9999999 diff --git a/tests/sqllogictests/sql/add_files/add_files_type_check_float.test b/tests/sqllogictests/sql/add_files/add_files_type_check_float.test new file mode 100644 index 0000000..787f3e1 --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_type_check_float.test @@ -0,0 +1,72 @@ +# name: test/sql/add_files/add_files_type_check_float.test +# description: test ducklake adding files with floats +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_type_float') + +# create a bunch of parquet files with different types +statement ok +COPY (SELECT true col1) TO '${DATA_PATH}/bool.parquet' + +statement ok +COPY (SELECT 42.0::FLOAT col1) TO '${DATA_PATH}/float.parquet' + +statement ok +COPY (SELECT 42.0::DOUBLE col1) TO '${DATA_PATH}/double.parquet' + +######### +# float # +######### +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 FLOAT); + +foreach UNACCEPTED_TYPE bool double + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${UNACCEPTED_TYPE}.parquet') +---- +FLOAT + +endloop + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/float.parquet') + +query I +FROM ducklake.test +---- +42.0 + +######### +# double # +######### +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 DOUBLE); + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/bool.parquet') +---- +FLOAT + +foreach ACCEPTED_TYPE float double + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${ACCEPTED_TYPE}.parquet') + +endloop + +query I +FROM ducklake.test +---- +42.0 +42.0 diff --git a/tests/sqllogictests/sql/add_files/add_files_type_check_integer.test b/tests/sqllogictests/sql/add_files/add_files_type_check_integer.test new file mode 100644 index 0000000..e0203e3 --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_type_check_integer.test @@ -0,0 +1,273 @@ +# name: test/sql/add_files/add_files_type_check_integer.test +# description: test ducklake adding files with different types +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_type_mismatch') + +# create a bunch of parquet files with different types +statement ok +COPY (SELECT true col1) TO '${DATA_PATH}/bool.parquet' + +foreach SIGNED_TYPE tinyint smallint int bigint + +statement ok +COPY (SELECT -1::${SIGNED_TYPE} col1) TO '${DATA_PATH}/${SIGNED_TYPE}.parquet' + +endloop + +foreach UNSIGNED_TYPE utinyint usmallint uinteger ubigint + +statement ok +COPY (SELECT 42::${UNSIGNED_TYPE} col1) TO '${DATA_PATH}/${UNSIGNED_TYPE}.parquet' + +endloop + +############ +# booleans # +############ +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 BOOLEAN); + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/int.parquet') +---- +BOOLEAN + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/bool.parquet') + +query I +FROM ducklake.test +---- +true + +statement ok +DROP TABLE ducklake.test + +############ +# tinyint # +########### +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 TINYINT); + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/int.parquet') +---- +TINYINT + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/tinyint.parquet') + +query I +FROM ducklake.test +---- +-1 + +############ +# smallint # +############ +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 SMALLINT); + +foreach UNACCEPTED_TYPE bool int bigint uinteger ubigint + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${UNACCEPTED_TYPE}.parquet') +---- +SMALLINT + +endloop + +foreach ACCEPTED_TYPE tinyint smallint utinyint + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${ACCEPTED_TYPE}.parquet') + +endloop + +query I +FROM ducklake.test ORDER BY ALL +---- +-1 +-1 +42 + +########### +# integer # +########### +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 INTEGER); + +foreach UNACCEPTED_TYPE bool bigint uinteger ubigint + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${UNACCEPTED_TYPE}.parquet') +---- +Expected + +endloop + +foreach ACCEPTED_TYPE tinyint smallint int utinyint usmallint + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${ACCEPTED_TYPE}.parquet') + +endloop + +query I +FROM ducklake.test ORDER BY ALL +---- +-1 +-1 +-1 +42 +42 + +########### +# bigint # +########### +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 BIGINT); + +foreach UNACCEPTED_TYPE bool ubigint + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${UNACCEPTED_TYPE}.parquet') +---- +Expected + +endloop + +foreach ACCEPTED_TYPE tinyint smallint int bigint utinyint usmallint uinteger + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${ACCEPTED_TYPE}.parquet') + +endloop + +query I +FROM ducklake.test ORDER BY ALL +---- +-1 +-1 +-1 +-1 +42 +42 +42 + +############# +# utinyint # +############ +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 UTINYINT); + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/int.parquet') +---- +UTINYINT + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/utinyint.parquet') + +query I +FROM ducklake.test +---- +42 + +############# +# usmallint # +############# +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 USMALLINT); + +foreach UNACCEPTED_TYPE bool tinyint smallint int bigint uinteger ubigint + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${UNACCEPTED_TYPE}.parquet') +---- +USMALLINT + +endloop + +foreach ACCEPTED_TYPE utinyint usmallint + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${ACCEPTED_TYPE}.parquet') + +endloop + +query I +FROM ducklake.test ORDER BY ALL +---- +42 +42 + +############# +# uinteger # +############# +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 UINTEGER); + +foreach UNACCEPTED_TYPE bool tinyint smallint int bigint ubigint + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${UNACCEPTED_TYPE}.parquet') +---- +UINTEGER + +endloop + +foreach ACCEPTED_TYPE utinyint usmallint uinteger + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${ACCEPTED_TYPE}.parquet') + +endloop + +query I +FROM ducklake.test ORDER BY ALL +---- +42 +42 +42 + +############# +# ubigint # +############# +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 UBIGINT); + +foreach UNACCEPTED_TYPE bool tinyint smallint int bigint + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${UNACCEPTED_TYPE}.parquet') +---- +UBIGINT + +endloop + +foreach ACCEPTED_TYPE utinyint usmallint uinteger ubigint + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${ACCEPTED_TYPE}.parquet') + +endloop + +query I +FROM ducklake.test ORDER BY ALL +---- +42 +42 +42 +42 diff --git a/tests/sqllogictests/sql/add_files/add_files_type_check_nested.test b/tests/sqllogictests/sql/add_files/add_files_type_check_nested.test new file mode 100644 index 0000000..811dc68 --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_type_check_nested.test @@ -0,0 +1,105 @@ +# name: test/sql/add_files/add_files_type_check_nested.test +# description: test ducklake adding files with nested types +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_type_nested') + +statement ok +COPY (SELECT [true]::BOOL[] AS col1) TO '${DATA_PATH}/bool_list.parquet'; + +statement ok +COPY (SELECT [42]::INT[] AS col1) TO '${DATA_PATH}/int_list.parquet'; + +statement ok +COPY (SELECT {'list': true} AS col1) TO '${DATA_PATH}/bool_struct.parquet'; + +statement ok +COPY (SELECT {'list': 42::INTEGER} AS col1) TO '${DATA_PATH}/int_struct.parquet'; + +statement ok +COPY (SELECT MAP([true], [false]) AS col1) TO '${DATA_PATH}/bool_map.parquet'; + +statement ok +COPY (SELECT MAP([42], [84]) AS col1) TO '${DATA_PATH}/int_map.parquet'; + +############ +# int list # +############ +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 INT[]); + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/bool_list.parquet') +---- +BOOLEAN + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/int_struct.parquet') +---- +STRUCT + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/int_list.parquet') + +query I +FROM ducklake.test +---- +[42] + +############## +# int struct # +############## +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 STRUCT(list INT)); + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/bool_struct.parquet') +---- +BOOLEAN + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/int_list.parquet') +---- +LIST + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/int_struct.parquet') + +query I +FROM ducklake.test +---- +{'list': 42} + +########### +# int map # +########### +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 MAP(INT, INT)); + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/bool_map.parquet') +---- +BOOLEAN + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/int_list.parquet') +---- +LIST + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/int_map.parquet') + +query I +FROM ducklake.test +---- +{42=84} diff --git a/tests/sqllogictests/sql/add_files/add_files_type_check_string_blob.test b/tests/sqllogictests/sql/add_files/add_files_type_check_string_blob.test new file mode 100644 index 0000000..f4212e3 --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_type_check_string_blob.test @@ -0,0 +1,83 @@ +# name: test/sql/add_files/add_files_type_check_string_blob.test +# description: test ducklake adding files with string/blob +# group: [add_files] + +require ducklake + +require parquet + +require json + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_string_blob') + +statement ok +COPY (SELECT 'hello world' AS col1) TO '${DATA_PATH}/string.parquet'; + +statement ok +COPY (SELECT '\x80'::BLOB AS col1) TO '${DATA_PATH}/blob.parquet'; + +statement ok +COPY (SELECT '{"hello": "world"}'::JSON AS col1) TO '${DATA_PATH}/json.parquet'; + +########## +# string # +########## +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 VARCHAR); + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/blob.parquet') +---- +BLOB + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/string.parquet') + +query I +FROM ducklake.test +---- +hello world + +######## +# blob # +######## +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 BLOB); + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/string.parquet') +---- +VARCHAR + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/blob.parquet') + +query I +FROM ducklake.test +---- +\x80 + +######## +# json # +######## +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 JSON); + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/string.parquet') +---- +VARCHAR + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/json.parquet') + +query I +FROM ducklake.test +---- +{"hello": "world"} diff --git a/tests/sqllogictests/sql/add_files/add_files_type_check_timestamp.test b/tests/sqllogictests/sql/add_files/add_files_type_check_timestamp.test new file mode 100644 index 0000000..ea0f99b --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_type_check_timestamp.test @@ -0,0 +1,230 @@ +# name: test/sql/add_files/add_files_type_check_timestamp.test +# description: test ducklake adding files with various timestamp types +# group: [add_files] + +require ducklake + +require parquet + +require icu + +statement ok +SET TimeZone='UTC'; + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_type_timestamp') + +statement ok +COPY (SELECT DATE '2000-01-01' AS col1) TO '${DATA_PATH}/date.parquet'; + +statement ok +COPY (SELECT TIME '20:12:24' AS col1) TO '${DATA_PATH}/time.parquet'; + +statement ok +COPY (SELECT TIMESTAMP '2020-02-03 12:23:34.123456' AS col1) TO '${DATA_PATH}/timestamp.parquet'; + +statement ok +COPY (SELECT TIMESTAMPTZ '2020-02-03 12:23:34.123456' AS col1) TO '${DATA_PATH}/timestamptz.parquet'; + +statement ok +COPY (SELECT TIMESTAMP_S '2020-02-03 12:23:34' AS col1) TO '${DATA_PATH}/timestamp_s.parquet'; + +statement ok +COPY (SELECT TIMESTAMP_MS '2020-02-03 12:23:34.123' AS col1) TO '${DATA_PATH}/timestamp_ms.parquet'; + +statement ok +COPY (SELECT TIMESTAMP_NS '2020-02-03 12:23:34.123456789' AS col1) TO '${DATA_PATH}/timestamp_ns.parquet'; + +######## +# date # +######## +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 DATE); + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/timestamp.parquet') +---- +TIMESTAMP + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/time.parquet') +---- +TIME + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/date.parquet') + +query I +FROM ducklake.test +---- +2000-01-01 + +######## +# time # +######## +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 TIME); + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/timestamp.parquet') +---- +TIMESTAMP + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/date.parquet') +---- +DATE + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/time.parquet') + +query I +FROM ducklake.test +---- +20:12:24 + +############### +# timestamp_s # +############### +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 TIMESTAMP_S); + +foreach unaccepted_type date time timestamp_ns timestamptz + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${unaccepted_type}.parquet') +---- +TIMESTAMP + +endloop + +foreach accepted_type timestamp_s timestamp_ms timestamp + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${accepted_type}.parquet') + +endloop + +query I +FROM ducklake.test +---- +2020-02-03 12:23:34 +2020-02-03 12:23:34 +2020-02-03 12:23:34 + +################ +# timestamp_ms # +################ +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 TIMESTAMP_MS); + +foreach unaccepted_type date time timestamp_ns timestamptz + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${unaccepted_type}.parquet') +---- +TIMESTAMP + +endloop + +foreach accepted_type timestamp_s timestamp_ms timestamp + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${accepted_type}.parquet') + +endloop + +query I rowsort +FROM ducklake.test +---- +2020-02-03 12:23:34 +2020-02-03 12:23:34.123 +2020-02-03 12:23:34.123 + +############# +# timestamp # +############# +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 TIMESTAMP); + +foreach unaccepted_type date time timestamptz + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${unaccepted_type}.parquet') +---- +TIMESTAMP + +endloop + +foreach accepted_type timestamp_s timestamp_ms timestamp timestamp_ns + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${accepted_type}.parquet') + +endloop + +query I rowsort +FROM ducklake.test +---- +2020-02-03 12:23:34 +2020-02-03 12:23:34.123 +2020-02-03 12:23:34.123456 +2020-02-03 12:23:34.123456 + +################ +# timestamp_ns # +################ +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 TIMESTAMP_NS); + +foreach unaccepted_type date time timestamptz + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${unaccepted_type}.parquet') +---- +TIMESTAMP + +endloop + +foreach accepted_type timestamp_s timestamp_ms timestamp timestamp_ns + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${accepted_type}.parquet') + +endloop + +query I rowsort +FROM ducklake.test +---- +2020-02-03 12:23:34 +2020-02-03 12:23:34.123 +2020-02-03 12:23:34.123456 +2020-02-03 12:23:34.123456789 + +############### +# timestamptz # +############### +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 TIMESTAMPTZ); + +foreach unaccepted_type date time timestamp + +statement error +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/${unaccepted_type}.parquet') +---- +TIMESTAMP + +endloop + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/timestamptz.parquet') + +query I rowsort +FROM ducklake.test +---- +2020-02-03 12:23:34.123456+00 diff --git a/tests/sqllogictests/sql/add_files/add_files_type_check_uuid.test b/tests/sqllogictests/sql/add_files/add_files_type_check_uuid.test new file mode 100644 index 0000000..008a8a3 --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_files_type_check_uuid.test @@ -0,0 +1,28 @@ +# name: test/sql/add_files/add_files_type_check_uuid.test +# description: test ducklake adding files with UUID +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_files_uuid') + +statement ok +COPY (SELECT uuid() AS col1) TO '${DATA_PATH}/uuid.parquet'; + +statement ok +CREATE OR REPLACE TABLE ducklake.test(col1 UUID); + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/uuid.parquet') + +query I +SELECT uuid_extract_version(col1) FROM ducklake.test +---- +4 diff --git a/tests/sqllogictests/sql/add_files/add_old_list.test b/tests/sqllogictests/sql/add_files/add_old_list.test new file mode 100644 index 0000000..e61474f --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_old_list.test @@ -0,0 +1,29 @@ +# name: test/sql/add_files/add_old_list.test +# description: test ducklake adding a parquet file with legacy avro list layout +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/add_old_list'); + +statement ok +USE ducklake + +statement ok +CREATE TABLE test AS SELECT * FROM read_parquet('data/parquet/old_list_structure.parquet') +WITH NO DATA; + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', 'data/parquet/old_list_structure.parquet') + +query I +FROM test; +---- +[[1, 2], [3, 4]] diff --git a/tests/sqllogictests/sql/add_files/add_removed_files.test b/tests/sqllogictests/sql/add_files/add_removed_files.test new file mode 100644 index 0000000..e131974 --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_removed_files.test @@ -0,0 +1,39 @@ +# name: test/sql/add_files/add_removed_files.test +# description: test ducklake adding files that were previously removed +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_removed_files'); + +statement ok +CREATE TABLE ducklake.test(col1 INTEGER, col2 VARCHAR); + +statement ok +INSERT INTO ducklake.test VALUES (100, 'hello'); + +statement ok +SET VARIABLE parquet_files = (SELECT LIST(data_file) FROM ducklake_list_files('ducklake', 'test')) + +statement ok +DROP TABLE ducklake.test + +# Now we create the table again +statement ok +CREATE TABLE ducklake.test(col1 INTEGER, col2 VARCHAR); + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', getvariable('parquet_files')[1]) + +query II +FROM ducklake.test +---- +100 hello \ No newline at end of file diff --git a/tests/sqllogictests/sql/add_files/add_rollback.test b/tests/sqllogictests/sql/add_files/add_rollback.test new file mode 100644 index 0000000..7bf3c84 --- /dev/null +++ b/tests/sqllogictests/sql/add_files/add_rollback.test @@ -0,0 +1,41 @@ +# name: test/sql/add_files/add_rollback.test +# description: test ducklake does not desintegrate the OG file if ducklake_add_data_file rolls-back +# group: [add_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/add_rollback'); + +statement ok +USE ducklake + +statement ok +CREATE TABLE test(col1 INTEGER, col2 VARCHAR); + +statement ok +INSERT INTO test VALUES (1,2); + +statement ok +COPY (SELECT 200 col1, 'world' col2 LIMIT 1) TO '${DATA_PATH}/add_rollback/file.parquet'; + +statement ok +BEGIN + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/add_rollback/file.parquet'); + +statement ok +ROLLBACK + +# Our file should still be alive +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/add_rollback/*.parquet') +---- +1 diff --git a/tests/sqllogictests/sql/alter/add_column.test b/tests/sqllogictests/sql/alter/add_column.test new file mode 100644 index 0000000..3dc1b43 --- /dev/null +++ b/tests/sqllogictests/sql/alter/add_column.test @@ -0,0 +1,65 @@ +# name: test/sql/alter/add_column.test +# description: test ducklake add columns +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_col_files') + +statement ok +CREATE TABLE ducklake.test(col1 INTEGER); + +statement ok +ALTER TABLE ducklake.test ADD COLUMN new_col2 INTEGER + +statement error +ALTER TABLE ducklake.test ADD COLUMN new_col2 INTEGER +---- +already exists + +statement ok +ALTER TABLE ducklake.test ADD COLUMN IF NOT EXISTS new_col2 INTEGER + +statement ok +INSERT INTO ducklake.test VALUES (1, 2), (NULL, 3); + +query II +SELECT col1, new_col2 FROM ducklake.test +---- +1 2 +NULL 3 + +statement ok +ALTER TABLE ducklake.test ADD COLUMN new_col3 VARCHAR + +query IIIIII +DESCRIBE ducklake.test +---- +col1 INTEGER YES NULL NULL NULL +new_col2 INTEGER YES NULL NULL NULL +new_col3 VARCHAR YES NULL NULL NULL + +statement ok +INSERT INTO ducklake.test VALUES (1, 2, 'hello'), (NULL, 3, 'world'); + +query III rowsort +SELECT * FROM ducklake.test +---- +1 2 NULL +1 2 hello +NULL 3 NULL +NULL 3 world + +# filters +query III rowsort +SELECT * FROM ducklake.test WHERE new_col3='hello' +---- +1 2 hello diff --git a/tests/sqllogictests/sql/alter/add_column_nested.test b/tests/sqllogictests/sql/alter/add_column_nested.test new file mode 100644 index 0000000..5642bf0 --- /dev/null +++ b/tests/sqllogictests/sql/alter/add_column_nested.test @@ -0,0 +1,40 @@ +# name: test/sql/alter/add_column_nested.test +# description: test ducklake add nested columns +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_nested_col_files', METADATA_CATALOG 'xx') + +statement ok +CREATE TABLE ducklake.test(col1 STRUCT(i INT, j INT)); + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 1, 'j': 2}) + +statement ok +ALTER TABLE ducklake.test ADD COLUMN new_col2 INT[] + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 100, 'j': 200}, []) + +statement ok +ALTER TABLE ducklake.test ADD COLUMN new_col3 STRUCT(k INT, v INT); + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 42, 'j': NULL}, [1, 2, 3], {'k': 1, 'v': 2}) + +query III +SELECT * FROM ducklake.test +---- +{'i': 1, 'j': 2} NULL NULL +{'i': 100, 'j': 200} [] NULL +{'i': 42, 'j': NULL} [1, 2, 3] {'k': 1, 'v': 2} diff --git a/tests/sqllogictests/sql/alter/add_column_transaction_local.test b/tests/sqllogictests/sql/alter/add_column_transaction_local.test new file mode 100644 index 0000000..8715be1 --- /dev/null +++ b/tests/sqllogictests/sql/alter/add_column_transaction_local.test @@ -0,0 +1,32 @@ +# name: test/sql/alter/add_column_transaction_local.test +# description: test ducklake add columns +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_col_tl_files') + +statement ok +CREATE TABLE ducklake.test(col1 INTEGER); + +statement ok +BEGIN + +statement ok +INSERT INTO ducklake.test VALUES (42); + +statement ok +ALTER TABLE ducklake.test ADD COLUMN new_col2 INTEGER + +query II +SELECT * FROM ducklake.test +---- +42 NULL diff --git a/tests/sqllogictests/sql/alter/alter_timestamptz_promotion.test b/tests/sqllogictests/sql/alter/alter_timestamptz_promotion.test new file mode 100644 index 0000000..3bf702e --- /dev/null +++ b/tests/sqllogictests/sql/alter/alter_timestamptz_promotion.test @@ -0,0 +1,33 @@ +# name: test/sql/alter/alter_timestamptz_promotion.test +# description: test ducklake timestamp to timestamptz promotion +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/alter_timestamptz_promotion') + +statement ok +CREATE TABLE ducklake.test(col1 TIMESTAMP); + +statement ok +INSERT INTO ducklake.test VALUES ('2025-01-15 12:30:45'::TIMESTAMP) + +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 TYPE TIMESTAMPTZ; + +statement ok +INSERT INTO ducklake.test VALUES ('2025-01-15 12:30:45'::TIMESTAMPTZ) + +query I +FROM ducklake.test +---- +2025-01-15 12:30:45+00 +2025-01-15 12:30:45+00 diff --git a/tests/sqllogictests/sql/alter/drop_column.test b/tests/sqllogictests/sql/alter/drop_column.test new file mode 100644 index 0000000..0f2885b --- /dev/null +++ b/tests/sqllogictests/sql/alter/drop_column.test @@ -0,0 +1,52 @@ +# name: test/sql/alter/drop_column.test +# description: test ducklake drop columns +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_drop_col_files') + +statement ok +CREATE TABLE ducklake.test(col1 INTEGER, col2 INTEGER, col3 INTEGER); + +statement ok +ALTER TABLE ducklake.test DROP COLUMN col3 + +statement ok +ALTER TABLE ducklake.test DROP COLUMN col2 + +statement error +ALTER TABLE ducklake.test DROP COLUMN col2 +---- +does not + +statement ok +ALTER TABLE ducklake.test DROP COLUMN IF EXISTS col2 + +statement error +ALTER TABLE ducklake.test DROP COLUMN col1 +---- +only has one column remaining + +statement ok +INSERT INTO ducklake.test VALUES (1), (2), (3); + +query I +FROM ducklake.test +---- +1 +2 +3 + +statement error +ALTER TABLE ducklake.test DROP COLUMN nonexistent_column +---- +nonexistent_column diff --git a/tests/sqllogictests/sql/alter/drop_column_nested.test b/tests/sqllogictests/sql/alter/drop_column_nested.test new file mode 100644 index 0000000..35df6a6 --- /dev/null +++ b/tests/sqllogictests/sql/alter/drop_column_nested.test @@ -0,0 +1,38 @@ +# name: test/sql/alter/drop_column_nested.test +# description: test ducklake dropping nested columns +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_drop_nested_col_files', METADATA_CATALOG 'xx') + +statement ok +CREATE TABLE ducklake.test(col1 STRUCT(i INT, j INT), col2 STRUCT(k INT, v INT), col3 INT[]); + +statement ok +ALTER TABLE ducklake.test DROP COLUMN col2 + +statement ok +ALTER TABLE ducklake.test ADD COLUMN new_col2 INT[] + +statement ok +ALTER TABLE ducklake.test DROP COLUMN col3 + +statement ok +ALTER TABLE ducklake.test ADD COLUMN new_col3 STRUCT(k INT, v INT); + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 42, 'j': NULL}, [1, 2, 3], {'k': 1, 'v': 2}) + +query III +SELECT col1, new_col2, new_col3 FROM ducklake.test +---- +{'i': 42, 'j': NULL} [1, 2, 3] {'k': 1, 'v': 2} diff --git a/tests/sqllogictests/sql/alter/expire_snapshot_bug.test b/tests/sqllogictests/sql/alter/expire_snapshot_bug.test new file mode 100644 index 0000000..e248305 --- /dev/null +++ b/tests/sqllogictests/sql/alter/expire_snapshot_bug.test @@ -0,0 +1,97 @@ +# name: test/sql/alter/expire_snapshot_bug.test +# description: test an issue that would delete files from a renamed table when expiring the OG table +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/expire_snapshot_bug', METADATA_CATALOG 'ducklake_meta') + +statement ok +USE ducklake; + +statement ok +create table a(i integer); + +statement ok +insert into a values(0); + +statement ok +alter table a rename to b; + +statement ok +insert into b values(1); + + +query II +SELECT snapshot_id, changes FROM snapshots(); +---- +0 {schemas_created=[main]} +1 {tables_created=[main.a]} +2 {tables_inserted_into=[1]} +3 {tables_created=[main.b]} +4 {tables_inserted_into=[1]} + +statement ok +CALL ducklake_expire_snapshots('ducklake', versions => [0,1,2]) + +query I +FROM b; +---- +0 +1 + +statement ok +CALL ducklake_expire_snapshots('ducklake', versions => [3]) + +query I +FROM b; +---- +0 +1 + +statement ok +alter table b rename to c; + +query II +SELECT snapshot_id, changes FROM snapshots(); +---- +4 {tables_inserted_into=[1]} +5 {tables_created=[main.c]} + +query I +FROM c; +---- +0 +1 + +statement ok +CALL ducklake_expire_snapshots('ducklake', versions => [4,5]) + +query I +FROM c; +---- +0 +1 + +statement ok +DROP TABLE c; + +statement ok +CALL ducklake_expire_snapshots('ducklake', versions => [5]) + +# all traces of the table are gone +foreach tbl ducklake_table ducklake_column ducklake_table_stats ducklake_table_column_stats ducklake_data_file ducklake_delete_file + +query I +SELECT COUNT(*) FROM ducklake_meta.${tbl} +---- +0 + +endloop \ No newline at end of file diff --git a/tests/sqllogictests/sql/alter/mixed_alter.test b/tests/sqllogictests/sql/alter/mixed_alter.test new file mode 100644 index 0000000..b3ef286 --- /dev/null +++ b/tests/sqllogictests/sql/alter/mixed_alter.test @@ -0,0 +1,40 @@ +# name: test/sql/alter/mixed_alter.test +# description: test ducklake mixed alter statements +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_mixed_alter_files') + +statement ok +CREATE TABLE ducklake.test(col1 INTEGER, col2 INTEGER, col3 INTEGER); + +statement ok +INSERT INTO ducklake.test (col1, col2, col3) VALUES (1, 2, 3); + +statement ok +ALTER TABLE ducklake.test DROP COLUMN col2 + +statement ok +INSERT INTO ducklake.test (col1, col3) VALUES (10, 20); + +statement ok +ALTER TABLE ducklake.test ADD COLUMN col2 VARCHAR + +statement ok +INSERT INTO ducklake.test (col1, col3, col2) VALUES (100, 300, 'hello world'); + +query III +SELECT col1, col2, col3 FROM ducklake.test +---- +1 NULL 3 +10 NULL 20 +100 hello world 300 diff --git a/tests/sqllogictests/sql/alter/mixed_alter2.test b/tests/sqllogictests/sql/alter/mixed_alter2.test new file mode 100644 index 0000000..d94e5ae --- /dev/null +++ b/tests/sqllogictests/sql/alter/mixed_alter2.test @@ -0,0 +1,45 @@ +# name: test/sql/alter/mixed_alter2.test +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/mixed_alter2_files'); + +statement ok +USE ducklake; + +statement ok +CREATE TABLE tbl(col1 INTEGER); + +statement ok +INSERT INTO tbl VALUES (42); + +statement ok +ALTER TABLE tbl ADD COLUMN col2 VARCHAR; + +statement ok +ALTER TABLE tbl ADD COLUMN new_column VARCHAR DEFAULT 'my_default'; + +statement ok +ALTER TABLE tbl ADD COLUMN nested_column STRUCT(i INTEGER); + +query IIII +FROM tbl +---- +42 NULL my_default NULL + +statement ok +ALTER TABLE tbl DROP COLUMN new_column; + +query III +FROM tbl +---- +42 NULL NULL diff --git a/tests/sqllogictests/sql/alter/promote_type.test b/tests/sqllogictests/sql/alter/promote_type.test new file mode 100644 index 0000000..8c86200 --- /dev/null +++ b/tests/sqllogictests/sql/alter/promote_type.test @@ -0,0 +1,49 @@ +# name: test/sql/alter/promote_type.test +# description: test ducklake promoting integer columns +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_promote_type_files', METADATA_CATALOG 'xx') + +statement ok +CREATE TABLE ducklake.test(col1 TINYINT); + +statement ok +INSERT INTO ducklake.test VALUES (25) + +statement error +INSERT INTO ducklake.test VALUES (1000) +---- +out of range + +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE INT; + +statement ok +INSERT INTO ducklake.test VALUES (1000) + +query I +FROM ducklake.test +---- +25 +1000 + +# cannot widen type +statement error +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE SMALLINT; +---- +only widening + +statement error +ALTER TABLE ducklake.test ALTER COLUMN nonexistent_column SET DATA TYPE SMALLINT; +---- +nonexistent_column diff --git a/tests/sqllogictests/sql/alter/rename_column.test b/tests/sqllogictests/sql/alter/rename_column.test new file mode 100644 index 0000000..96d9aa0 --- /dev/null +++ b/tests/sqllogictests/sql/alter/rename_column.test @@ -0,0 +1,50 @@ +# name: test/sql/alter/rename_column.test +# description: test ducklake renanming columns +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_rename_col_files') + +statement ok +CREATE TABLE ducklake.test(col1 INTEGER, col2 INTEGER); + +statement ok +ALTER TABLE ducklake.test RENAME COLUMN col1 TO new_col1 + +statement ok +INSERT INTO ducklake.test VALUES (1, 2), (NULL, 3); + +query II +SELECT new_col1, col2 FROM ducklake.test +---- +1 2 +NULL 3 + +statement ok +ALTER TABLE ducklake.test RENAME COLUMN col2 TO new_col2 + +query IIIIII +DESCRIBE ducklake.test +---- +new_col1 INTEGER YES NULL NULL NULL +new_col2 INTEGER YES NULL NULL NULL + +query II +SELECT new_col1, new_col2 FROM ducklake.test +---- +1 2 +NULL 3 + +statement error +ALTER TABLE ducklake.test RENAME COLUMN blablabla TO k +---- +column blablabla does not exist diff --git a/tests/sqllogictests/sql/alter/rename_entity.test b/tests/sqllogictests/sql/alter/rename_entity.test new file mode 100644 index 0000000..7ed29ae --- /dev/null +++ b/tests/sqllogictests/sql/alter/rename_entity.test @@ -0,0 +1,57 @@ +# name: test/sql/alter/rename_entity.test +# description: Make sure, that renamed entities do not show up in the list of entities +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_rename_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +CREATE TABLE ducklake.view_base_table AS FROM range(42); + +statement ok +CREATE TABLE ducklake.original_table AS FROM range(84); + +statement ok +CREATE VIEW ducklake.original_view AS FROM ducklake.view_base_table; + +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.original_table RENAME TO new_table_name + +statement ok +ALTER VIEW ducklake.original_view RENAME TO new_view_name + +query I +SELECT table_name FROM duckdb_tables() WHERE database_name='ducklake' ORDER BY table_name; +---- +new_table_name +view_base_table + +query I +SELECT view_name FROM duckdb_views() WHERE database_name='ducklake' ORDER BY view_name +---- +new_view_name + +query I +SELECT count(*) from ducklake.new_view_name; +---- +42 + +query I +SELECT count(*) from ducklake.new_table_name; +---- +84 + +statement ok +COMMIT diff --git a/tests/sqllogictests/sql/alter/rename_table.test b/tests/sqllogictests/sql/alter/rename_table.test new file mode 100644 index 0000000..7b86e4a --- /dev/null +++ b/tests/sqllogictests/sql/alter/rename_table.test @@ -0,0 +1,150 @@ +# name: test/sql/alter/rename_table.test +# description: Test renaming tables in DuckLake +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_rename_files', METADATA_CATALOG 'ducklake_meta') + +# test table renames +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok +INSERT INTO ducklake.test VALUES (42); + +query I +SELECT * FROM ducklake.test +---- +42 + +statement ok +ALTER TABLE ducklake.test RENAME TO test2 + +query I +SELECT * FROM ducklake.test2 +---- +42 + +# rename a transaction-local table +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.local_test(i VARCHAR); + +statement ok +INSERT INTO ducklake.local_test VALUES (42); + +statement ok +ALTER TABLE ducklake.local_test RENAME TO local_test2 + +statement ok +INSERT INTO ducklake.local_test2 VALUES (84); + +query I +SELECT * FROM ducklake.local_test2 ORDER BY ALL +---- +42 +84 + +# and I'll do it again! +statement ok +ALTER TABLE ducklake.local_test2 RENAME TO local_test3 + +query I +SELECT * FROM ducklake.local_test3 +---- +42 +84 + +statement ok +COMMIT + +query I +SELECT * FROM ducklake.local_test3 +---- +42 +84 + +# rename a regular table multiple times in a transaction +statement ok +BEGIN + +statement ok +INSERT INTO ducklake.test2 VALUES (1); + +statement ok +ALTER TABLE ducklake.test2 RENAME TO test3 + +statement ok +INSERT INTO ducklake.test3 VALUES (2); + +statement ok +ALTER TABLE ducklake.test3 RENAME TO test4 + +statement ok +INSERT INTO ducklake.test4 VALUES (3); + +statement ok +ALTER TABLE ducklake.test4 RENAME TO test5 + +statement ok +INSERT INTO ducklake.test5 VALUES (4); + +statement ok +COMMIT + +statement error +SELECT * FROM ducklake.test2 +---- +does not exist + +statement error +SELECT * FROM ducklake.test3 +---- +does not exist + +statement error +SELECT * FROM ducklake.test4 +---- +does not exist + +query I +SELECT * FROM ducklake.test5 ORDER BY ALL +---- +1 +2 +3 +4 +42 + +# rename followed by drop +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.test5 RENAME TO test6 + +statement ok +DROP TABLE ducklake.test6 + +statement error +SELECT * FROM ducklake.test6 +---- + +statement ok +COMMIT + +statement error +ALTER TABLE ducklake.nonexistent_table RENAME TO target +---- +nonexistent_table diff --git a/tests/sqllogictests/sql/alter/rename_table_case.test b/tests/sqllogictests/sql/alter/rename_table_case.test new file mode 100644 index 0000000..09c0bfb --- /dev/null +++ b/tests/sqllogictests/sql/alter/rename_table_case.test @@ -0,0 +1,36 @@ +# name: test/sql/alter/rename_table_case.test +# description: Test renaming tables in DuckLake +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/rename_table_case', METADATA_CATALOG 'ducklake_meta') + +statement ok +USE ducklake + +statement ok +create table MY_TABLE (i integer); + +statement ok +insert into MY_TABLE values(42); + +statement ok +alter table MY_TABLE rename to my_table; + +query I +select * from my_table; +---- +42 + +query I +select * from MY_TABLE; +---- +42 diff --git a/tests/sqllogictests/sql/alter/rename_table_dbt_workload.test b/tests/sqllogictests/sql/alter/rename_table_dbt_workload.test new file mode 100644 index 0000000..a0e6a61 --- /dev/null +++ b/tests/sqllogictests/sql/alter/rename_table_dbt_workload.test @@ -0,0 +1,77 @@ +# name: test/sql/alter/rename_table_dbt_workload.test +# description: Test renaming tables in DuckLake +# group: [alter] + +require parquet + +require ducklake + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS my_ducklake (DATA_PATH '${DATA_PATH}/ducklake_rename_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +USE my_ducklake; + +statement ok +CREATE TABLE my_table AS FROM range(42); + +statement ok +BEGIN TRANSACTION; + +statement ok +CREATE TABLE my_table_tmp AS FROM range(84); + +statement ok +ALTER TABLE my_table RENAME TO my_table_backup; + +statement ok +ALTER TABLE my_table_tmp RENAME TO my_table; + +query I +SELECT COUNT(*) AS count FROM my_table; +---- +84 + +query I +SELECT COUNT(*) AS count FROM my_table_backup; +---- +42 + +statement error +SELECT COUNT(*) AS count FROM my_table_tmp; +---- +Catalog Error: Table with name my_table_tmp does not exist! + +query I +SELECT table_name FROM duckdb_tables() WHERE database_name = 'my_ducklake' ORDER BY table_name; +---- +my_table +my_table_backup + +statement ok +COMMIT + +query I +SELECT COUNT(*) AS count FROM my_table; +---- +84 + +query I +SELECT COUNT(*) AS count FROM my_table_backup; +---- +42 + +statement error +SELECT COUNT(*) AS count FROM my_table_tmp; +---- +Catalog Error: Table with name my_table_tmp does not exist! + +query I +SELECT table_name FROM duckdb_tables() WHERE database_name = 'my_ducklake' ORDER BY table_name; +---- +my_table +my_table_backup diff --git a/tests/sqllogictests/sql/alter/rename_table_within_transaction.test b/tests/sqllogictests/sql/alter/rename_table_within_transaction.test new file mode 100644 index 0000000..f465855 --- /dev/null +++ b/tests/sqllogictests/sql/alter/rename_table_within_transaction.test @@ -0,0 +1,43 @@ +# name: test/sql/alter/rename_table_within_transaction.test +# description: Test renaming tables in DuckLake within a transaction +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_rename_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +use ducklake; + +statement ok +CREATE TABLE tbl(i INTEGER); + +statement ok +INSERT INTO tbl VALUES (999), (100); + +statement ok +BEGIN TRANSACTION + +statement ok +ALTER TABLE tbl RENAME TO tbl2 + +statement error +SELECT * FROM tbl +---- + +query I +SELECT * FROM tbl2 +---- +999 +100 + + +statement ok +COMMIT; \ No newline at end of file diff --git a/tests/sqllogictests/sql/alter/struct_evolution.test b/tests/sqllogictests/sql/alter/struct_evolution.test new file mode 100644 index 0000000..4eaf4af --- /dev/null +++ b/tests/sqllogictests/sql/alter/struct_evolution.test @@ -0,0 +1,110 @@ +# name: test/sql/alter/struct_evolution.test +# description: test ducklake struct field evolution +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_struct_evolution_files') + +statement ok +CREATE TABLE ducklake.test(col1 STRUCT(i INT, j INT)); + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 1, 'j': 2}) + +# add k TINYINT +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE STRUCT(i INT, j INT, k TINYINT); + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 10, 'j': 20, 'k': 3}), ({'i': 11, 'j': 21, 'k': 10}) + +query I +FROM ducklake.test +---- +{'i': 1, 'j': 2, 'k': NULL} +{'i': 10, 'j': 20, 'k': 3} +{'i': 11, 'j': 21, 'k': 10} + +statement error +INSERT INTO ducklake.test VALUES ({'i': 10, 'j': 20, 'k': 1000}) +---- +out of range + +# promote k to INT +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE STRUCT(i INT, j INT, k INTEGER); + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 100, 'j': 200, 'k': 1000}) + +query I +FROM ducklake.test +---- +{'i': 1, 'j': 2, 'k': NULL} +{'i': 10, 'j': 20, 'k': 3} +{'i': 11, 'j': 21, 'k': 10} +{'i': 100, 'j': 200, 'k': 1000} + +# drop i +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE STRUCT(j INT, k INTEGER); + +statement ok +INSERT INTO ducklake.test VALUES ({'j': 150, 'k': 1000}), ({'j': 151, 'k': 1001}) + +query I +FROM ducklake.test +---- +{'j': 2, 'k': NULL} +{'j': 20, 'k': 3} +{'j': 21, 'k': 10} +{'j': 200, 'k': 1000} +{'j': 150, 'k': 1000} +{'j': 151, 'k': 1001} + +# drop all original columns +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE STRUCT(k INTEGER); + +statement ok +INSERT INTO ducklake.test VALUES ({'k': 10000}) + +query I +FROM ducklake.test ORDER BY ALL +---- +{'k': 3} +{'k': 10} +{'k': 1000} +{'k': 1000} +{'k': 1001} +{'k': 10000} +{'k': NULL} + +query I +SELECT col1.k FROM ducklake.test WHERE col1.k=1000 +---- +1000 +1000 + +query I +SELECT col1.k FROM ducklake.test WHERE col1.k>3 ORDER BY ALL +---- +10 +1000 +1000 +1001 +10000 + +query I +SELECT col1.k FROM ducklake.test WHERE col1.k IS NULL +---- +NULL diff --git a/tests/sqllogictests/sql/alter/struct_evolution_alter.test b/tests/sqllogictests/sql/alter/struct_evolution_alter.test new file mode 100644 index 0000000..982b095 --- /dev/null +++ b/tests/sqllogictests/sql/alter/struct_evolution_alter.test @@ -0,0 +1,202 @@ +# name: test/sql/alter/struct_evolution_alter.test +# description: test ducklake struct field evolution via alter +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_struct_evolution_alter_files') + +statement ok +CREATE TABLE ducklake.test(col1 STRUCT(i INT, j INT)); + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 1, 'j': 2}) + +# add k INTEGER +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.test ADD COLUMN col1.k INTEGER + +query I +FROM ducklake.test +---- +{'i': 1, 'j': 2, 'k': NULL} + +statement ok +COMMIT + +statement error +ALTER TABLE ducklake.test ADD COLUMN col1.K INT +---- +already exists + +statement ok +ALTER TABLE ducklake.test ADD COLUMN IF NOT EXISTS col1.k INT + +statement error +ALTER TABLE ducklake.test ADD COLUMN col1.i.s INT +---- +can only be added to structs + +statement error +ALTER TABLE ducklake.test ADD COLUMN col1.v.s INT +---- +does not exist + +statement error +ALTER TABLE ducklake.test ADD COLUMN col2.s INT +---- +does not exist + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 10, 'j': 20, 'k': 3}), ({'i': 11, 'j': 21, 'k': 10}), ({'i': 100, 'j': 200, 'k': 1000}) + +query I +FROM ducklake.test +---- +{'i': 1, 'j': 2, 'k': NULL} +{'i': 10, 'j': 20, 'k': 3} +{'i': 11, 'j': 21, 'k': 10} +{'i': 100, 'j': 200, 'k': 1000} + +# drop i +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.test DROP COLUMN col1.I + +query I +FROM ducklake.test +---- +{'j': 2, 'k': NULL} +{'j': 20, 'k': 3} +{'j': 21, 'k': 10} +{'j': 200, 'k': 1000} + +statement ok +COMMIT + +statement error +ALTER TABLE ducklake.test DROP COLUMN col1.i +---- +does not exist + +statement ok +ALTER TABLE ducklake.test DROP COLUMN IF EXISTS col1.i + +statement error +ALTER TABLE ducklake.test DROP COLUMN col1.j.x +---- +does not exist + +statement ok +INSERT INTO ducklake.test VALUES ({'j': 150, 'k': 1000}), ({'j': 151, 'k': 1001}) + +query I +FROM ducklake.test +---- +{'j': 2, 'k': NULL} +{'j': 20, 'k': 3} +{'j': 21, 'k': 10} +{'j': 200, 'k': 1000} +{'j': 150, 'k': 1000} +{'j': 151, 'k': 1001} + +# conflict in rename +statement error +ALTER TABLE ducklake.test RENAME col1.J TO K +---- +already exists + +# drop all original columns +statement ok +ALTER TABLE ducklake.test DROP COLUMN col1.j + +statement ok +INSERT INTO ducklake.test VALUES ({'k': 10000}) + +query I +FROM ducklake.test ORDER BY ALL +---- +{'k': 3} +{'k': 10} +{'k': 1000} +{'k': 1000} +{'k': 1001} +{'k': 10000} +{'k': NULL} + +query I +SELECT col1.k FROM ducklake.test WHERE col1.k=1000 +---- +1000 +1000 + +query I +SELECT col1.k FROM ducklake.test WHERE col1.k>3 ORDER BY ALL +---- +10 +1000 +1000 +1001 +10000 + +query I +SELECT col1.k FROM ducklake.test WHERE col1.k IS NULL +---- +NULL + +statement ok +ALTER TABLE ducklake.test RENAME col1.K TO v1 + +statement error +ALTER TABLE ducklake.test RENAME col1.z TO v2 +---- +does not exist + +statement error +ALTER TABLE ducklake.test RENAME col2.v1 TO v2 +---- +does not exist + +query I +FROM ducklake.test ORDER BY ALL +---- +{'v1': 3} +{'v1': 10} +{'v1': 1000} +{'v1': 1000} +{'v1': 1001} +{'v1': 10000} +{'v1': NULL} + +query I +SELECT col1.v1 FROM ducklake.test WHERE col1.v1=1000 +---- +1000 +1000 + +query I +SELECT col1.v1 FROM ducklake.test WHERE col1.v1>3 ORDER BY ALL +---- +10 +1000 +1000 +1001 +10000 + +query I +SELECT col1.v1 FROM ducklake.test WHERE col1.v1 IS NULL +---- +NULL diff --git a/tests/sqllogictests/sql/alter/struct_evolution_list_alter.test b/tests/sqllogictests/sql/alter/struct_evolution_list_alter.test new file mode 100644 index 0000000..5cde83a --- /dev/null +++ b/tests/sqllogictests/sql/alter/struct_evolution_list_alter.test @@ -0,0 +1,177 @@ +# name: test/sql/alter/struct_evolution_list_alter.test +# description: test ducklake struct field evolution within a list via alter +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_struct_evolution_alter_list_files') + +statement ok +CREATE TABLE ducklake.test(col1 STRUCT(i INT, j INT)[]); + +statement ok +INSERT INTO ducklake.test VALUES ([{'i': 1, 'j': 2}]) + +# add k INTEGER +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.test ADD COLUMN col1.element.k INTEGER + +query I +FROM ducklake.test +---- +[{'i': 1, 'j': 2, 'k': NULL}] + +statement ok +COMMIT + +statement error +ALTER TABLE ducklake.test ADD COLUMN col1.element.K INT +---- +already exists + +statement ok +ALTER TABLE ducklake.test ADD COLUMN IF NOT EXISTS col1.element.k INT + +statement error +ALTER TABLE ducklake.test ADD COLUMN col1.s INT +---- +can only be added to structs + +statement error +ALTER TABLE ducklake.test ADD COLUMN col1.el.s INT +---- +does not exist + +statement ok +INSERT INTO ducklake.test VALUES ([{'i': 10, 'j': 20, 'k': 3}]), ([{'i': 11, 'j': 21, 'k': 10}]), ([{'i': 100, 'j': 200, 'k': 1000}]) + +query I +FROM ducklake.test +---- +[{'i': 1, 'j': 2, 'k': NULL}] +[{'i': 10, 'j': 20, 'k': 3}] +[{'i': 11, 'j': 21, 'k': 10}] +[{'i': 100, 'j': 200, 'k': 1000}] + +# drop i +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.test DROP COLUMN col1.element.I + +query I +FROM ducklake.test +---- +[{'j': 2, 'k': NULL}] +[{'j': 20, 'k': 3}] +[{'j': 21, 'k': 10}] +[{'j': 200, 'k': 1000}] + +statement ok +COMMIT + +statement error +ALTER TABLE ducklake.test DROP COLUMN col1.element.i +---- +does not exist + +statement ok +ALTER TABLE ducklake.test DROP COLUMN IF EXISTS col1.element.i + +statement error +ALTER TABLE ducklake.test DROP COLUMN col1.ele.x +---- +does not exist + +statement ok +INSERT INTO ducklake.test VALUES ([{'j': 150, 'k': 1000}]), ([{'j': 151, 'k': 1001}]) + +query I +FROM ducklake.test +---- +[{'j': 2, 'k': NULL}] +[{'j': 20, 'k': 3}] +[{'j': 21, 'k': 10}] +[{'j': 200, 'k': 1000}] +[{'j': 150, 'k': 1000}] +[{'j': 151, 'k': 1001}] + +# conflict in rename +statement error +ALTER TABLE ducklake.test RENAME col1.element.J TO K +---- +already exists + +# drop all original columns +statement ok +ALTER TABLE ducklake.test DROP COLUMN col1.element.j + +statement ok +INSERT INTO ducklake.test VALUES ([{'k': 10000}]) + +query I +FROM ducklake.test ORDER BY ALL +---- +[{'k': 3}] +[{'k': 10}] +[{'k': 1000}] +[{'k': 1000}] +[{'k': 1001}] +[{'k': 10000}] +[{'k': NULL}] + +statement ok +ALTER TABLE ducklake.test RENAME col1.element.K TO v1 + +statement error +ALTER TABLE ducklake.test RENAME col1.element.z TO v2 +---- +does not exist + +statement error +ALTER TABLE ducklake.test RENAME col2.element.v1 TO v2 +---- +does not exist + +query I +FROM ducklake.test ORDER BY ALL +---- +[{'v1': 3}] +[{'v1': 10}] +[{'v1': 1000}] +[{'v1': 1000}] +[{'v1': 1001}] +[{'v1': 10000}] +[{'v1': NULL}] + +query I +SELECT col1[1].v1 FROM ducklake.test WHERE col1[1].v1=1000 +---- +1000 +1000 + +query I +SELECT col1[1].v1 FROM ducklake.test WHERE col1[1].v1>3 ORDER BY ALL +---- +10 +1000 +1000 +1001 +10000 + +query I +SELECT col1[1].v1 FROM ducklake.test WHERE col1[1].v1 IS NULL +---- +NULL diff --git a/tests/sqllogictests/sql/alter/struct_evolution_map_alter.test b/tests/sqllogictests/sql/alter/struct_evolution_map_alter.test new file mode 100644 index 0000000..5d55b5c --- /dev/null +++ b/tests/sqllogictests/sql/alter/struct_evolution_map_alter.test @@ -0,0 +1,157 @@ +# name: test/sql/alter/struct_evolution_map_alter.test +# description: test ducklake struct field evolution within a map via alter +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_struct_evolution_alter_map_files') + +statement ok +CREATE TABLE ducklake.test(col1 MAP(INT, STRUCT(i INT, j INT))); + +statement ok +INSERT INTO ducklake.test VALUES (MAP {'1': {'i': 1, 'j': 2}}) + +# add k INTEGER +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.test ADD COLUMN col1.value.k INTEGER + +query I +FROM ducklake.test +---- +{1={'i': 1, 'j': 2, 'k': NULL}} + +statement ok +COMMIT + +statement error +ALTER TABLE ducklake.test ADD COLUMN col1.value.K INT +---- +already exists + +statement ok +ALTER TABLE ducklake.test ADD COLUMN IF NOT EXISTS col1.value.k INT + +statement error +ALTER TABLE ducklake.test ADD COLUMN col1.s INT +---- +can only be added to structs + +statement error +ALTER TABLE ducklake.test ADD COLUMN col1.val.s INT +---- +does not exist + +statement ok +INSERT INTO ducklake.test VALUES (MAP {'2': {'i': 10, 'j': 20, 'k': 3}}), (MAP {'3': {'i': 11, 'j': 21, 'k': 10}}), (MAP {'4': {'i': 100, 'j': 200, 'k': 1000}}) + +query I +FROM ducklake.test +---- +{1={'i': 1, 'j': 2, 'k': NULL}} +{2={'i': 10, 'j': 20, 'k': 3}} +{3={'i': 11, 'j': 21, 'k': 10}} +{4={'i': 100, 'j': 200, 'k': 1000}} + +# drop i +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.test DROP COLUMN col1.value.I + +query I +FROM ducklake.test +---- +{1={'j': 2, 'k': NULL}} +{2={'j': 20, 'k': 3}} +{3={'j': 21, 'k': 10}} +{4={'j': 200, 'k': 1000}} + +statement ok +COMMIT + +statement error +ALTER TABLE ducklake.test DROP COLUMN col1.value.i +---- +does not exist + +statement ok +ALTER TABLE ducklake.test DROP COLUMN IF EXISTS col1.value.i + +statement error +ALTER TABLE ducklake.test DROP COLUMN col1.val.x +---- +does not exist + +statement ok +INSERT INTO ducklake.test VALUES (MAP {'5': {'j': 150, 'k': 1000}}), (MAP {'6': {'j': 151, 'k': 1001}}) + +query I +FROM ducklake.test +---- +{1={'j': 2, 'k': NULL}} +{2={'j': 20, 'k': 3}} +{3={'j': 21, 'k': 10}} +{4={'j': 200, 'k': 1000}} +{5={'j': 150, 'k': 1000}} +{6={'j': 151, 'k': 1001}} + +# conflict in rename +statement error +ALTER TABLE ducklake.test RENAME col1.value.J TO K +---- +already exists + +# drop all original columns +statement ok +ALTER TABLE ducklake.test DROP COLUMN col1.value.j + +statement ok +INSERT INTO ducklake.test VALUES (MAP {'7': {'k': 10000}}) + +query I +FROM ducklake.test ORDER BY ALL +---- +{1={'k': NULL}} +{2={'k': 3}} +{3={'k': 10}} +{4={'k': 1000}} +{5={'k': 1000}} +{6={'k': 1001}} +{7={'k': 10000}} + +statement ok +ALTER TABLE ducklake.test RENAME col1.value.K TO v1 + +statement error +ALTER TABLE ducklake.test RENAME col1.value.z TO v2 +---- +does not exist + +statement error +ALTER TABLE ducklake.test RENAME col2.value.v1 TO v2 +---- +does not exist + +query I +FROM ducklake.test ORDER BY ALL +---- +{1={'v1': NULL}} +{2={'v1': 3}} +{3={'v1': 10}} +{4={'v1': 1000}} +{5={'v1': 1000}} +{6={'v1': 1001}} +{7={'v1': 10000}} diff --git a/tests/sqllogictests/sql/alter/struct_evolution_nested.test b/tests/sqllogictests/sql/alter/struct_evolution_nested.test new file mode 100644 index 0000000..1e57e9e --- /dev/null +++ b/tests/sqllogictests/sql/alter/struct_evolution_nested.test @@ -0,0 +1,87 @@ +# name: test/sql/alter/struct_evolution_nested.test +# description: test ducklake struct nested evolution +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_struct_evolution_nested_files') + +statement ok +CREATE TABLE ducklake.test(col1 STRUCT(i INT, j STRUCT(c1 TINYINT, c2 INT[]), k INT)); + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 1, 'j': {'c1': 2, 'c2': []}, 'k': 1}) + +# add a column to j and promote c1 +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE STRUCT(i INT, j STRUCT(c1 INT, c2 INT[], c3 TINYINT), k INT); + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 10, 'j': {'c1': 1000, 'c2': [1, 2, 3], 'c3': 25}, 'k': 10}) + +query I +FROM ducklake.test +---- +{'i': 1, 'j': {'c1': 2, 'c2': [], 'c3': NULL}, 'k': 1} +{'i': 10, 'j': {'c1': 1000, 'c2': [1, 2, 3], 'c3': 25}, 'k': 10} + +# drop struct fields again +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE STRUCT(j STRUCT(c2 INT[]), k INT); + +statement ok +INSERT INTO ducklake.test VALUES ({'j': {'c2': [100]}, 'k': 100}) + +query I +FROM ducklake.test +---- +{'j': {'c2': []}, 'k': 1} +{'j': {'c2': [1, 2, 3]}, 'k': 10} +{'j': {'c2': [100]}, 'k': 100} + +# add a nested column +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE STRUCT(j STRUCT(c2 INT[], x STRUCT(a INT, b INT, c INT)), k INT); + +statement ok +INSERT INTO ducklake.test VALUES ({'j': {'c2': NULL, 'x': {'a': 1, 'b': 2, 'c': 3}}, 'k': 1000}) + +query I +FROM ducklake.test +---- +{'j': {'c2': [], 'x': NULL}, 'k': 1} +{'j': {'c2': [1, 2, 3], 'x': NULL}, 'k': 10} +{'j': {'c2': [100], 'x': NULL}, 'k': 100} +{'j': {'c2': NULL, 'x': {'a': 1, 'b': 2, 'c': 3}}, 'k': 1000} + +# drop the column entirely +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE STRUCT(k INT); + +query I +FROM ducklake.test +---- +{'k': 1} +{'k': 10} +{'k': 100} +{'k': 1000} + +# now add a new deeply nested column +statement ok +ALTER TABLE ducklake.test ADD COLUMN col2 STRUCT(i INT, j STRUCT(c1 TINYINT, c2 INT[]), k INT) + +query II +FROM ducklake.test +---- +{'k': 1} NULL +{'k': 10} NULL +{'k': 100} NULL +{'k': 1000} NULL diff --git a/tests/sqllogictests/sql/alter/struct_evolution_nested_alter.test b/tests/sqllogictests/sql/alter/struct_evolution_nested_alter.test new file mode 100644 index 0000000..897c346 --- /dev/null +++ b/tests/sqllogictests/sql/alter/struct_evolution_nested_alter.test @@ -0,0 +1,105 @@ +# name: test/sql/alter/struct_evolution_nested_alter.test +# description: test ducklake struct nested evolution via alter +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_struct_evolution_nested_alter_files') + +statement ok +CREATE TABLE ducklake.test(col1 STRUCT(i INT, j STRUCT(c1 INT, c2 INT[]), k INT)); + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 1, 'j': {'c1': 2, 'c2': []}, 'k': 1}) + +statement ok +ALTER TABLE ducklake.test ADD COLUMN COL1.J.c3 TINYINT + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 10, 'j': {'c1': 1000, 'c2': [1, 2, 3], 'c3': 25}, 'k': 10}) + +query I +FROM ducklake.test +---- +{'i': 1, 'j': {'c1': 2, 'c2': [], 'c3': NULL}, 'k': 1} +{'i': 10, 'j': {'c1': 1000, 'c2': [1, 2, 3], 'c3': 25}, 'k': 10} + +# drop struct fields +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.test DROP COLUMN col1.i + +statement ok +ALTER TABLE ducklake.test DROP COLUMN col1.j.c1 + +statement ok +ALTER TABLE ducklake.test DROP COLUMN col1.j.c3 + +statement ok +INSERT INTO ducklake.test VALUES ({'j': {'c2': [100]}, 'k': 100}) + +query I +FROM ducklake.test +---- +{'j': {'c2': []}, 'k': 1} +{'j': {'c2': [1, 2, 3]}, 'k': 10} +{'j': {'c2': [100]}, 'k': 100} + +statement ok +COMMIT + +query I +FROM ducklake.test +---- +{'j': {'c2': []}, 'k': 1} +{'j': {'c2': [1, 2, 3]}, 'k': 10} +{'j': {'c2': [100]}, 'k': 100} + +# add a nested column +statement ok +ALTER TABLE ducklake.test ADD COLUMN COL1.J.x STRUCT(a INT, b INT, c INT) + +statement ok +INSERT INTO ducklake.test VALUES ({'j': {'c2': NULL, 'x': {'a': 1, 'b': 2, 'c': 3}}, 'k': 1000}) + +query I +FROM ducklake.test +---- +{'j': {'c2': [], 'x': NULL}, 'k': 1} +{'j': {'c2': [1, 2, 3], 'x': NULL}, 'k': 10} +{'j': {'c2': [100], 'x': NULL}, 'k': 100} +{'j': {'c2': NULL, 'x': {'a': 1, 'b': 2, 'c': 3}}, 'k': 1000} + +# drop the column entirely +statement ok +ALTER TABLE ducklake.test DROP COLUMN col1.J + +query I +FROM ducklake.test +---- +{'k': 1} +{'k': 10} +{'k': 100} +{'k': 1000} + +# now add a new deeply nested column +statement ok +ALTER TABLE ducklake.test ADD COLUMN col2 STRUCT(i INT, j STRUCT(c1 TINYINT, c2 INT[]), k INT) + +query II +FROM ducklake.test +---- +{'k': 1} NULL +{'k': 10} NULL +{'k': 100} NULL +{'k': 1000} NULL diff --git a/tests/sqllogictests/sql/alter/struct_evolution_reuse.test b/tests/sqllogictests/sql/alter/struct_evolution_reuse.test new file mode 100644 index 0000000..34e44a6 --- /dev/null +++ b/tests/sqllogictests/sql/alter/struct_evolution_reuse.test @@ -0,0 +1,44 @@ +# name: test/sql/alter/struct_evolution_reuse.test +# description: test ducklake struct field evolution re-use +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_struct_evolution_reuse_files', METADATA_CATALOG 'xx') + +statement ok +CREATE TABLE ducklake.test(col1 STRUCT(i INT, j INT)); + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 1, 'j': 2}) + +# drop column i +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE STRUCT(j INT); + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 10, 'j': 20}) + +query I +FROM ducklake.test +---- +{'j': 2} +{'j': 20} + +# re-add column i +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE STRUCT(j INT, i INT); + +query I +FROM ducklake.test +---- +{'j': 2, 'i': NULL} +{'j': 20, 'i': NULL} diff --git a/tests/sqllogictests/sql/alter/struct_in_list_evolution.test b/tests/sqllogictests/sql/alter/struct_in_list_evolution.test new file mode 100644 index 0000000..f54c046 --- /dev/null +++ b/tests/sqllogictests/sql/alter/struct_in_list_evolution.test @@ -0,0 +1,115 @@ +# name: test/sql/alter/struct_in_list_evolution.test +# description: test ducklake struct field evolution in a list +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_struct_in_list_evolution_files') + +statement ok +CREATE TABLE ducklake.test(col1 STRUCT(i INT, j INT)[]); + +statement ok +INSERT INTO ducklake.test VALUES ([{'i': 1, 'j': 2}]) + +# add k TINYINT +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE STRUCT(i INT, j INT, k TINYINT)[]; + +query I +FROM ducklake.test +---- +[{'i': 1, 'j': 2, 'k': NULL}] + +statement ok +INSERT INTO ducklake.test VALUES ([{'i': 10, 'j': 20, 'k': 3}]), ([{'i': 11, 'j': 21, 'k': 10}]) + +query I +FROM ducklake.test +---- +[{'i': 1, 'j': 2, 'k': NULL}] +[{'i': 10, 'j': 20, 'k': 3}] +[{'i': 11, 'j': 21, 'k': 10}] + +statement error +INSERT INTO ducklake.test VALUES ([{'i': 10, 'j': 20, 'k': 1000}]) +---- +out of range + +# promote k to INT +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE STRUCT(i INT, j INT, k INTEGER)[]; + +statement ok +INSERT INTO ducklake.test VALUES ([{'i': 100, 'j': 200, 'k': 1000}]) + +query I +FROM ducklake.test +---- +[{'i': 1, 'j': 2, 'k': NULL}] +[{'i': 10, 'j': 20, 'k': 3}] +[{'i': 11, 'j': 21, 'k': 10}] +[{'i': 100, 'j': 200, 'k': 1000}] + +# drop i +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE STRUCT(j INT, k INTEGER)[]; + +statement ok +INSERT INTO ducklake.test VALUES ([{'j': 150, 'k': 1000}]), ([{'j': 151, 'k': 1001}]) + +query I +FROM ducklake.test +---- +[{'j': 2, 'k': NULL}] +[{'j': 20, 'k': 3}] +[{'j': 21, 'k': 10}] +[{'j': 200, 'k': 1000}] +[{'j': 150, 'k': 1000}] +[{'j': 151, 'k': 1001}] + +# drop all original columns +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE STRUCT(k INTEGER)[]; + +statement ok +INSERT INTO ducklake.test VALUES ([{'k': 10000}]) + +query I +FROM ducklake.test ORDER BY ALL +---- +[{'k': 3}] +[{'k': 10}] +[{'k': 1000}] +[{'k': 1000}] +[{'k': 1001}] +[{'k': 10000}] +[{'k': NULL}] + +query I +SELECT col1[1].k FROM ducklake.test WHERE col1[1].k=1000 +---- +1000 +1000 + +query I +SELECT col1[1].k FROM ducklake.test WHERE col1[1].k>3 ORDER BY ALL +---- +10 +1000 +1000 +1001 +10000 + +query I +SELECT col1[1].k FROM ducklake.test WHERE col1[1].k IS NULL +---- +NULL diff --git a/tests/sqllogictests/sql/alter/struct_in_map_evolution.test b/tests/sqllogictests/sql/alter/struct_in_map_evolution.test new file mode 100644 index 0000000..20e584e --- /dev/null +++ b/tests/sqllogictests/sql/alter/struct_in_map_evolution.test @@ -0,0 +1,95 @@ +# name: test/sql/alter/struct_in_map_evolution.test +# description: test ducklake struct field evolution in a list +# group: [alter] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_struct_in_map_evolution_files') + +statement ok +CREATE TABLE ducklake.test(col1 MAP(INT, STRUCT(i INT, j INT))); + +statement ok +INSERT INTO ducklake.test VALUES (MAP {'1': {'i': 1, 'j': 2}}) + +# add k TINYINT +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE MAP(INT, STRUCT(i INT, j INT, k TINYINT)); + +query I +FROM ducklake.test +---- +{1={'i': 1, 'j': 2, 'k': NULL}} + +statement ok +INSERT INTO ducklake.test VALUES (MAP {'2': {'i': 10, 'j': 20, 'k': 3}}), (MAP {'3': {'i': 11, 'j': 21, 'k': 10}}) + +query I +FROM ducklake.test +---- +{1={'i': 1, 'j': 2, 'k': NULL}} +{2={'i': 10, 'j': 20, 'k': 3}} +{3={'i': 11, 'j': 21, 'k': 10}} + +statement error +INSERT INTO ducklake.test VALUES (MAP {'4': {'i': 10, 'j': 20, 'k': 1000}}) +---- +out of range + +# promote k to INT +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE MAP(INT, STRUCT(i INT, j INT, k INTEGER)); + +statement ok +INSERT INTO ducklake.test VALUES (MAP {'4': {'i': 10, 'j': 20, 'k': 1000}}) + +query I +FROM ducklake.test +---- +{1={'i': 1, 'j': 2, 'k': NULL}} +{2={'i': 10, 'j': 20, 'k': 3}} +{3={'i': 11, 'j': 21, 'k': 10}} +{4={'i': 10, 'j': 20, 'k': 1000}} + +# drop i +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE MAP(INT, STRUCT(j INT, k INTEGER)); + +statement ok +INSERT INTO ducklake.test VALUES (MAP {'5': {'j': 150, 'k': 1000}}), (MAP {'6': {'j': 151, 'k': 1001}}) + +query I +FROM ducklake.test +---- +{1={'j': 2, 'k': NULL}} +{2={'j': 20, 'k': 3}} +{3={'j': 21, 'k': 10}} +{4={'j': 20, 'k': 1000}} +{5={'j': 150, 'k': 1000}} +{6={'j': 151, 'k': 1001}} + +# drop all original columns +statement ok +ALTER TABLE ducklake.test ALTER COLUMN col1 SET DATA TYPE MAP(INT, STRUCT(k INTEGER)); + +statement ok +INSERT INTO ducklake.test VALUES (MAP {'7': {'k': 10000}}) + +query I +FROM ducklake.test ORDER BY ALL +---- +{1={'k': NULL}} +{2={'k': 3}} +{3={'k': 10}} +{4={'k': 1000}} +{5={'k': 1000}} +{6={'k': 1001}} +{7={'k': 10000}} diff --git a/tests/sqllogictests/sql/attach/attach_replace.test b/tests/sqllogictests/sql/attach/attach_replace.test new file mode 100644 index 0000000..7bc98c2 --- /dev/null +++ b/tests/sqllogictests/sql/attach/attach_replace.test @@ -0,0 +1,17 @@ +# name: test/sql/attach/attach_replace.test +# description: test attach replace with ducklake +# group: [attach] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/attach_replace', METADATA_CATALOG 'x') + +statement ok +ATTACH OR REPLACE'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/attach_replace_2', METADATA_CATALOG 'xx') diff --git a/tests/sqllogictests/sql/attach/different_paths.test b/tests/sqllogictests/sql/attach/different_paths.test new file mode 100644 index 0000000..72f2789 --- /dev/null +++ b/tests/sqllogictests/sql/attach/different_paths.test @@ -0,0 +1,35 @@ +# name: test/sql/attach/different_paths.test +# description: test attach of ducklake when providing different paths +# group: [attach] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/some.db' AS a_ducklake (DATA_PATH '__TEST_DIR__/some/'); + +statement ok +USE a_ducklake; + +statement ok +CREATE TABLE t AS SELECT range a FROM range(10); + +statement ok +USE memory; + +statement ok +DETACH a_ducklake; + +statement error +ATTACH 'ducklake:__TEST_DIR__/some.db' AS other_ducklake (DATA_PATH '__TEST_DIR__/other_path/'); +---- +does not match existing data path in the catalog + +statement ok +ATTACH 'ducklake:__TEST_DIR__/some.db' AS other_ducklake (DATA_PATH '__TEST_DIR__/other_path/', OVERRIDE_DATA_PATH TRUE); + +# This will obviously not work, different OSs have different error messages saying file not found +statement error +FROM other_ducklake.t; +---- diff --git a/tests/sqllogictests/sql/audit/test_base_audit.test b/tests/sqllogictests/sql/audit/test_base_audit.test new file mode 100644 index 0000000..1a15ef0 --- /dev/null +++ b/tests/sqllogictests/sql/audit/test_base_audit.test @@ -0,0 +1,277 @@ +# name: test/sql/audit/test_base_audit.test +# description: test ducklake can keep track of audits on the base table +# group: [audit] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_audit') + +statement ok +CREATE TABLE ducklake.test(a integer, b varchar); + +query II +SELECT * FROM ducklake.test +---- + +# At this point all our author and messages are NULL +query III +SELECT snapshot_id, author, commit_message FROM ducklake.snapshots() +---- +0 NULL NULL +1 NULL NULL + +# If we insert information here, it gets commited without commit information as well +statement ok +INSERT INTO ducklake.test VALUES (0, 'oogie'); + +# At this point all our author and messages are NULL +query III +SELECT snapshot_id, author, commit_message FROM ducklake.snapshots() where snapshot_id = 2 +---- +2 NULL NULL + + +# Now we get a nice message and author +statement ok +BEGIN; + +statement ok +INSERT INTO ducklake.test VALUES (1, 'pedro'); + +statement ok +CALL ducklake.set_commit_message('Pedro', 'Inserting myself'); + +statement ok +COMMIT; + +query III +SELECT snapshot_id, author, commit_message FROM ducklake.snapshots() where snapshot_id = 3 +---- +3 Pedro Inserting myself + +# We can also have only an author +statement ok +BEGIN; + +statement ok +INSERT INTO ducklake.test VALUES (2, 'Thijs'); + +statement ok +CALL ducklake.set_commit_message('Pedro', ''); + +statement ok +COMMIT; + +query III +SELECT snapshot_id, author, commit_message FROM ducklake.snapshots() where snapshot_id = 4 +---- +4 Pedro (empty) + +# Only the message +statement ok +BEGIN; + +statement ok +INSERT INTO ducklake.test VALUES (3, 'Mark'); + +statement ok +CALL ducklake.set_commit_message(NULL, 'Adding Mark'); + +statement ok +COMMIT; + +query III +SELECT snapshot_id, author, commit_message FROM ducklake.snapshots() where snapshot_id = 5 +---- +5 NULL Adding Mark + +# NULLS +statement ok +BEGIN; + +statement ok +INSERT INTO ducklake.test VALUES (4, 'Gabor'); + +statement ok +CALL ducklake.set_commit_message(NULL, NULL); + +statement ok +COMMIT; + +# Empty +statement ok +BEGIN; + +statement ok +INSERT INTO ducklake.test VALUES (5, 'Tom'); + +statement ok +CALL ducklake.set_commit_message('', ''); + +statement ok +COMMIT; + +# Both NULL and (empty) are accepted and treated as NULL +query III +SELECT snapshot_id, author, commit_message FROM ducklake.snapshots() where snapshot_id > 5 +---- +6 NULL NULL +7 (empty) (empty) + +# Test that ROLLBACK doesn't mess things up +# Empty +statement ok +BEGIN; + +statement ok +INSERT INTO ducklake.test VALUES (6, 'Hannes'); + +statement ok +CALL ducklake.set_commit_message('Pedro', 'Adding Hannes'); + +statement ok +ROLLBACK; + +statement ok +INSERT INTO ducklake.test VALUES (6, 'Hannes'); + +query III +SELECT snapshot_id, author, commit_message FROM ducklake.snapshots() where snapshot_id = 8 +---- +8 NULL NULL + +statement ok +CALL ducklake.set_option('require_commit_message', True) + +statement error +INSERT INTO ducklake.test VALUES (7, 'Morgana'); +---- +Commit Information for the snapshot is required but has not been provided. + +statement ok +BEGIN; + +statement ok +INSERT INTO ducklake.test VALUES (7, 'Morgana'); + +statement ok +CALL ducklake.set_commit_message('Pedro', 'Adding Morgana'); + +statement ok +Commit; + +query III +SELECT snapshot_id, author, commit_message FROM ducklake.snapshots() where snapshot_id = 9 +---- +9 Pedro Adding Morgana + + +statement error +INSERT INTO ducklake.test VALUES (8, 'Odara'); +---- +Commit Information for the snapshot is required but has not been provided. + +# We can set it back to false +statement ok +CALL ducklake.set_option('require_commit_message', False) + + +statement ok +INSERT INTO ducklake.test VALUES (8, 'Odara'); + +query III +SELECT snapshot_id, author, commit_message FROM ducklake.snapshots() where snapshot_id = 10 +---- +10 NULL NULL + +# Changing commit message afterwards +statement ok +BEGIN; + +statement ok +INSERT INTO ducklake.test VALUES (9, 'Teddy'); + +statement ok +CALL ducklake.set_commit_message('Pedro', 'bla'); + +statement ok +CALL ducklake.set_commit_message('Mark', 'Inserting Teddy'); + +statement ok +COMMIT; + +# Last set should survive +query III +SELECT snapshot_id, author, commit_message FROM ducklake.snapshots() where snapshot_id = 11 +---- +11 Mark Inserting Teddy + +# Lets check that our previous insertion has a null value for extra info +query IIII +SELECT snapshot_id, author, commit_message, commit_extra_info FROM ducklake.snapshots() where snapshot_id = 11 +---- +11 Mark Inserting Teddy NULL + +# Let's add some extra info to the next commit + +statement ok +BEGIN; + +statement ok +INSERT INTO ducklake.test VALUES (10, 'Ollie'); + +statement ok +CALL ducklake.set_commit_message('Pedro', 'Inserting Ollie', extra_info=>'{''barks'':7, ''cuteness'':10}'); + +statement ok +COMMIT; + +# Lets check ollie is inserted with the correct extra_info +query IIII +SELECT snapshot_id, author, commit_message, commit_extra_info FROM ducklake.snapshots() where snapshot_id = 12 +---- +12 Pedro Inserting Ollie {'barks':7, 'cuteness':10} + +# Empty is also valid +statement ok +BEGIN; + +statement ok +INSERT INTO ducklake.test VALUES (11, 'Jack Black'); + +statement ok +CALL ducklake.set_commit_message('Pedro', 'Inserting the greatest musician of the world.', extra_info=>''); + +statement ok +COMMIT; + +# Lets check ollie is inserted with the correct extra_info +query IIII +SELECT snapshot_id, author, commit_message, commit_extra_info FROM ducklake.snapshots() where snapshot_id = 13 +---- +13 Pedro Inserting the greatest musician of the world. (empty) + +# Check all data was inserted correctly at the end +query II +FROM ducklake.test +---- +0 oogie +1 pedro +2 Thijs +3 Mark +4 Gabor +5 Tom +6 Hannes +7 Morgana +8 Odara +9 Teddy +10 Ollie +11 Jack Black diff --git a/tests/sqllogictests/sql/autoloading/autoload_data_path.test b/tests/sqllogictests/sql/autoloading/autoload_data_path.test new file mode 100644 index 0000000..1030fc4 --- /dev/null +++ b/tests/sqllogictests/sql/autoloading/autoload_data_path.test @@ -0,0 +1,51 @@ +# name: test/sql/autoloading/autoload_data_path.test +# description: Tests for autoloading with filesystems +# group: [autoloading] + +require-env LOCAL_EXTENSION_REPO + +require ducklake + +statement ok +set allow_persistent_secrets=false; + +# Ensure we have a clean extension directory without any preinstalled extensions +statement ok +set extension_directory='__TEST_DIR__/autoloading_filesystems' + +### No autoloading nor installing: throw error with installation hint +statement ok +set autoload_known_extensions=false + +statement ok +set autoinstall_known_extensions=false + +statement error +ATTACH 'ducklake:autoload_problem.ducklake' (DATA_PATH 's3://some-bucket/') +---- +Missing Extension Error: Data path s3://some-bucket/ requires the extension httpfs to be loaded + +### With autoloading, install and correct repo +statement ok +set autoload_known_extensions=true + +statement ok +set autoinstall_known_extensions=true + +statement ok +set autoinstall_extension_repository='${LOCAL_EXTENSION_REPO}'; + +## Note that attaching Ducklake will NOT actually validate the data path +statement ok +ATTACH 'ducklake:autoload_problem.ducklake' (DATA_PATH 's3://some-bucket') + +statement ok +DETACH autoload_problem + +statement ok +ATTACH 'ducklake:autoload_problem.ducklake' + +query I +SELECT value FROM autoload_problem.options() WHERE option_name='data_path' +---- +s3://some-bucket/ diff --git a/tests/sqllogictests/sql/catalog/drop_table.test b/tests/sqllogictests/sql/catalog/drop_table.test new file mode 100644 index 0000000..65274fd --- /dev/null +++ b/tests/sqllogictests/sql/catalog/drop_table.test @@ -0,0 +1,107 @@ +# name: test/sql/catalog/drop_table.test +# description: Test dropping of tables in DuckLake +# group: [catalog] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_drop_files') + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok +BEGIN + +statement ok +DROP TABLE ducklake.test + +statement error +SELECT * FROM ducklake.test +---- +does not exist + +query I +SELECT table_name FROM duckdb_tables() WHERE database_name = 'ducklake' +---- + +statement ok +ROLLBACK + +query I +SELECT * FROM ducklake.test +---- + +statement ok +DROP TABLE ducklake.test + +statement error +SELECT * FROM ducklake.test +---- +does not exist + +statement ok +DROP TABLE IF EXISTS ducklake.test + +statement error +DROP TABLE ducklake.test +---- +does not exist + +# test drop of a transaction-local table +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.test2(i INTEGER); + +query I +SELECT * FROM ducklake.test2 +---- + +statement ok +DROP TABLE ducklake.test2 + +statement error +SELECT * FROM ducklake.test2 +---- +does not exist + +statement ok +COMMIT + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +# drop and create the same table in the same transaction +statement ok +BEGIN + +statement ok +DROP TABLE ducklake.test; + +statement ok +CREATE TABLE ducklake.test(i VARCHAR, j VARCHAR); + +statement ok +INSERT INTO ducklake.test VALUES ('hello', 'world'); + +query II +SELECT * FROM ducklake.test +---- +hello world + +statement ok +COMMIT + +query II +SELECT * FROM ducklake.test +---- +hello world diff --git a/tests/sqllogictests/sql/catalog/quoted_identifiers.test b/tests/sqllogictests/sql/catalog/quoted_identifiers.test new file mode 100644 index 0000000..96afd5b --- /dev/null +++ b/tests/sqllogictests/sql/catalog/quoted_identifiers.test @@ -0,0 +1,28 @@ +# name: test/sql/catalog/quoted_identifiers.test +# description: Test quoted identifiers in DuckLake +# group: [catalog] + +require ducklake + +require parquet + +# windows doesn't like these paths +require notwindows + +# SQLite databases do not support creating new schemas +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}''quoted'' "db".db' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake ''quoted'' "path"', METADATA_CATALOG '''quoted'' catalog "name"', METADATA_SCHEMA '''quoted'' catalog "schema"') + +statement ok +CREATE TABLE ducklake."quoted 'table' ""name"""("quoted 'column' ""name""" INTEGER); + +query I +SELECT "quoted 'column' ""name""" FROM ducklake."quoted 'table' ""name""" +---- + +statement ok +DROP TABLE ducklake."quoted 'table' ""name""" diff --git a/tests/sqllogictests/sql/catalog/schema.test b/tests/sqllogictests/sql/catalog/schema.test new file mode 100644 index 0000000..8434d6f --- /dev/null +++ b/tests/sqllogictests/sql/catalog/schema.test @@ -0,0 +1,176 @@ +# name: test/sql/catalog/schema.test +# description: Test schema support in DuckLake +# group: [catalog] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_schema_files') + +# basic multi-schema support +statement ok +CREATE SCHEMA ducklake.s1; + +statement ok +CREATE SCHEMA ducklake.s2; + +query I +SELECT schema_name FROM duckdb_schemas() WHERE database_name='ducklake' ORDER BY ALL +---- +main +s1 +s2 + +statement ok +CREATE TABLE ducklake.s1.tbl(i INT); + +statement ok +CREATE TABLE ducklake.s2.tbl(a VARCHAR, b VARCHAR); + +statement ok +INSERT INTO ducklake.s1.tbl VALUES (42); + +statement ok +INSERT INTO ducklake.s2.tbl VALUES ('hello', 'world'); + +query I +SELECT * FROM ducklake.s1.tbl +---- +42 + +query II +SELECT * FROM ducklake.s2.tbl +---- +hello world + +# drop the schemas again +statement error +DROP SCHEMA ducklake.s1 +---- +there are entries that depend on it + +statement ok +DROP TABLE ducklake.s1.tbl + +statement ok +DROP SCHEMA ducklake.s1 + +statement ok +DROP SCHEMA ducklake.s2 CASCADE + +# the schemas are now gone +statement error +CREATE TABLE ducklake.s1.tbl2(i INT) +---- +not found + +foreach commit_query ROLLBACK COMMIT + +# now try all of this transaction-local +statement ok +BEGIN + +statement ok +CREATE SCHEMA ducklake.s1; + +statement ok +CREATE SCHEMA ducklake.s2; + +query I +SELECT schema_name FROM duckdb_schemas() WHERE database_name='ducklake' ORDER BY ALL +---- +main +s1 +s2 + +statement ok +CREATE TABLE ducklake.s1.tbl(i INT); + +statement ok +CREATE TABLE ducklake.s2.tbl(a VARCHAR, b VARCHAR); + +statement ok +INSERT INTO ducklake.s1.tbl VALUES (42); + +statement ok +INSERT INTO ducklake.s2.tbl VALUES ('hello', 'world'); + +query I +SELECT * FROM ducklake.s1.tbl +---- +42 + +query II +SELECT * FROM ducklake.s2.tbl +---- +hello world + +statement ok +${commit_query} + +endloop + +query I +SELECT * FROM ducklake.s1.tbl +---- +42 + +query II +SELECT * FROM ducklake.s2.tbl +---- +hello world + +# drop and re-create a schema within the same transaction +statement ok +BEGIN + +statement ok +DROP SCHEMA ducklake.s1 CASCADE + +query I +SELECT schema_name FROM duckdb_schemas() WHERE database_name='ducklake' ORDER BY ALL +---- +main +s2 + +statement ok +CREATE SCHEMA ducklake.s1 + +statement ok +CREATE TABLE ducklake.s1.tbl(a DATE); + +statement ok +INSERT INTO ducklake.s1.tbl VALUES (DATE '1992-01-01'); + +query I +SELECT * FROM ducklake.s1.tbl +---- +1992-01-01 + +statement ok +COMMIT + +query I +SELECT * FROM ducklake.s1.tbl +---- +1992-01-01 + +# write multiple schemas in one transaction +statement ok +BEGIN + +statement ok +CREATE SCHEMA ducklake.schema_one; + +statement ok +CREATE SCHEMA ducklake.schema_two + +statement ok +COMMIT diff --git a/tests/sqllogictests/sql/checkpoint/checkpoint_ducklake.test b/tests/sqllogictests/sql/checkpoint/checkpoint_ducklake.test new file mode 100644 index 0000000..a930c14 --- /dev/null +++ b/tests/sqllogictests/sql/checkpoint/checkpoint_ducklake.test @@ -0,0 +1,104 @@ +# name: test/sql/checkpoint/checkpoint_ducklake.test +# description: Test checkpoint in ducklake +# group: [checkpoint] + +require ducklake + +require parquet + +require icu + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/checkpoint_ducklake', DATA_INLINING_ROW_LIMIT 5) + +statement ok +use ducklake + +# Create table with expire_snapshot/delete old_file +statement ok +CREATE TABLE test(i INTEGER) + +statement ok +INSERT INTO test FROM range(1000) + +statement ok +DELETE FROM test + +statement ok +DROP TABLE test + +statement ok +CALL ducklake_expire_snapshots('ducklake', dry_run => false, versions => [2]) + +# Create table with inlined and merge adjacent possibility +statement ok +create table t (a integer ) + +statement ok +insert into t values (1), (2) + +statement ok +insert into t values (2),(3) + +statement ok +insert into t values (4) + +# Create table with data rewrite +statement ok +create table t_2 (a integer ) + +statement ok +INSERT INTO t_2 SELECT i FROM range(100) t(i) + +statement ok +DELETE FROM t_2 +WHERE a < 98; + +# Create orphan file +statement ok +COPY t_2 to '${DATA_PATH}/checkpoint_ducklake/main/t/bla.parquet'; + +query II +SELECT data_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +4 3 + +query II +SELECT delete_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_delete_file +---- +5 3 + +# t has the orphan, all data is inlined, t_2 has the data, and deletion, test has the data, we have 4 files in total. +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/checkpoint_ducklake/main/**') +---- +4 + +statement ok +CALL ducklake.set_option('delete_older_than', '1 millisecond'); + +statement ok +CHECKPOINT; + + +query II +SELECT data_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_data_file ORDER BY ALL +---- +4 3 +6 2 +7 3 + +query II +SELECT delete_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_delete_file +---- + +query I +SELECT COUNT(*) +FROM GLOB('${DATA_PATH}/checkpoint_ducklake/main/**') +WHERE file LIKE '%bla%'; +---- +0 \ No newline at end of file diff --git a/tests/sqllogictests/sql/checkpoint/checkpoint_updates_interleaved.test b/tests/sqllogictests/sql/checkpoint/checkpoint_updates_interleaved.test new file mode 100644 index 0000000..99f3595 --- /dev/null +++ b/tests/sqllogictests/sql/checkpoint/checkpoint_updates_interleaved.test @@ -0,0 +1,126 @@ +# name: test/sql/checkpoint/checkpoint_updates_interleaved.test +# description: Test checkpoint while interleaving updates in the same transaction +# group: [checkpoint] + +require ducklake + +require parquet + +require icu + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/checkpoint_updates_interleaved', DATA_INLINING_ROW_LIMIT 5) + +statement ok +use ducklake + +statement ok +CALL ducklake.set_option('expire_older_than', '1 week'); + +statement ok +CALL ducklake.set_option('delete_older_than', '1 week'); + +statement ok +CREATE TABLE test(i INTEGER) + +statement ok +INSERT INTO test FROM range(10) + +statement ok +BEGIN; + +statement ok +INSERT INTO test FROM range(10,13) + +statement ok +CHECKPOINT; + +statement ok +COMMIT; + +query I +FROM test; +---- +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 + +statement ok +BEGIN; + +statement ok +CHECKPOINT; + +statement ok +INSERT INTO test FROM range(13,16) + +statement ok +CHECKPOINT; + +statement ok +INSERT INTO test FROM range(17,25) + +statement ok +COMMIT; + +query I +FROM test; +---- +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +17 +18 +19 +20 +21 +22 +23 +24 +13 +14 +15 + +statement ok +BEGIN; + +statement ok +delete from test where i < 15; + +statement ok +CHECKPOINT; + +statement ok +INSERT INTO test FROM range(1,3) + +statement ok +CHECKPOINT; + +statement error +COMMIT; +---- + diff --git a/tests/sqllogictests/sql/checkpoint/many_inserts_transaction.test b/tests/sqllogictests/sql/checkpoint/many_inserts_transaction.test new file mode 100644 index 0000000..0fddf15 --- /dev/null +++ b/tests/sqllogictests/sql/checkpoint/many_inserts_transaction.test @@ -0,0 +1,48 @@ +# name: test/sql/checkpoint/many_inserts_transaction.test +# description: Test checkpoint with many inserts in one transaction +# group: [checkpoint] + +require ducklake + +require parquet + +require icu + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/many_inserts_transaction/') + +statement ok +use ducklake + +statement ok +BEGIN; + +statement ok +CREATE TABLE integers(i INTEGER); + +statement ok +INSERT INTO integers SELECT 1 FROM range(10); + +statement ok +INSERT INTO integers SELECT 1; + +statement ok +INSERT INTO integers SELECT 2; + +statement ok +INSERT INTO integers SELECT 3; + +statement ok +COMMIT; + +statement ok +CHECKPOINT; + +query IIII +SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM integers; +---- +1 3 13 13 \ No newline at end of file diff --git a/tests/sqllogictests/sql/checkpoint/view_checkpoint.test b/tests/sqllogictests/sql/checkpoint/view_checkpoint.test new file mode 100644 index 0000000..8da36a1 --- /dev/null +++ b/tests/sqllogictests/sql/checkpoint/view_checkpoint.test @@ -0,0 +1,28 @@ +# name: test/sql/checkpoint/view_checkpoint.test +# description: Test checkpoint with a view +# group: [checkpoint] + +require ducklake + +require parquet + +require icu + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/view_checkpoint/') + +statement ok +use ducklake + +statement ok +create table t (a integer, b integer, c integer) + +statement ok +create view v as select a from t; + +statement ok +CHECKPOINT; \ No newline at end of file diff --git a/tests/sqllogictests/sql/cleanup/cleanup_old_files.test b/tests/sqllogictests/sql/cleanup/cleanup_old_files.test new file mode 100644 index 0000000..9eef5a9 --- /dev/null +++ b/tests/sqllogictests/sql/cleanup/cleanup_old_files.test @@ -0,0 +1,54 @@ +# name: test/sql/cleanup/cleanup_old_files.test +# description: Cleanup files when creating and dropping a table +# group: [cleanup] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/cleanup_old_files') + +statement ok +USE ducklake; + +statement ok +CREATE TABLE t (x INT); + +statement ok +INSERT INTO t VALUES (1), (2), (3); + +statement ok +INSERT INTO t VALUES (4), (5); + +statement ok +DELETE FROM t WHERE x <= 2; + +statement ok +INSERT INTO t VALUES (6), (7); + +statement ok +CALL ducklake_rewrite_data_files('ducklake', 't'); + +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +statement ok +CALL ducklake_expire_snapshots('ducklake', older_than => now()); + +query I +SELECT COUNT(*) FROM ducklake_cleanup_old_files('ducklake', dry_run => true, cleanup_all => true); +---- +2 + +statement ok +CALL ducklake_cleanup_old_files('ducklake', dry_run => false, cleanup_all => true); + +query I +SELECT COUNT(*) FROM ducklake_cleanup_old_files('ducklake', dry_run => true, cleanup_all => true); +---- +0 \ No newline at end of file diff --git a/tests/sqllogictests/sql/cleanup/create_drop_cleanup.test b/tests/sqllogictests/sql/cleanup/create_drop_cleanup.test new file mode 100644 index 0000000..6a7f1c6 --- /dev/null +++ b/tests/sqllogictests/sql/cleanup/create_drop_cleanup.test @@ -0,0 +1,41 @@ +# name: test/sql/cleanup/create_drop_cleanup.test +# description: Cleanup files when creating and dropping a table +# group: [cleanup] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_createq_drop_cleanup') + +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.tbl(i INT); + +statement ok +INSERT INTO ducklake.tbl VALUES (42); + +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_createq_drop_cleanup/main/tbl/*.parquet') +---- +1 + +statement ok +DROP TABLE ducklake.tbl + +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_createq_drop_cleanup/main/tbl/*.parquet') +---- +0 + +statement ok +COMMIT + diff --git a/tests/sqllogictests/sql/clickbench/clickbench.test_slow b/tests/sqllogictests/sql/clickbench/clickbench.test_slow new file mode 100644 index 0000000..e54b1f9 --- /dev/null +++ b/tests/sqllogictests/sql/clickbench/clickbench.test_slow @@ -0,0 +1,250 @@ +# name: test/sql/clickbench/clickbench.test_slow +# description: Test running Clickbench on DuckLake +# group: [clickbench] + +require ducklake + +require parquet + +mode skip +# wget https://datasets.clickhouse.com/hits_compatible/hits.parquet + +statement ok +CREATE MACRO toDateTime(t) AS epoch_ms(t * 1000); + +statement ok +CREATE TABLE hits AS +SELECT * + REPLACE (make_date(EventDate) AS EventDate, toDateTime(EventTime) AS EventTime) +FROM read_parquet('/Users/holanda/Downloads/hits.parquet', binary_as_string=True); + +# Not yet supported +statement ok +DROP MACRO toDateTime; + +statement ok +ATTACH 'ducklake:ducklake_clickbench.db' AS ducklake (DATA_PATH '/Users/holanda/Documents/Projects/ducklake/clickbench/') + +statement ok +COPY FROM DATABASE memory TO ducklake + +statement ok +USE ducklake + +query I +SELECT COUNT(*) FROM ducklake.hits; +---- +:duckdb/benchmark/clickbench/answers/q00.csv + +query I +SELECT COUNT(*) FROM ducklake.hits WHERE AdvEngineID <> 0; +---- +:duckdb/benchmark/clickbench/answers/q01.csv + +query I +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM ducklake.hits; +---- +:duckdb/benchmark/clickbench/answers/q02.csv + +query I +SELECT AVG(UserID) FROM ducklake.hits; +---- +:duckdb/benchmark/clickbench/answers/q03.csv + +query I +SELECT COUNT(DISTINCT UserID) FROM ducklake.hits; +---- +:duckdb/benchmark/clickbench/answers/q04.csv + +query I +SELECT COUNT(DISTINCT SearchPhrase) FROM ducklake.hits; +---- +:duckdb/benchmark/clickbench/answers/q05.csv + +query I +SELECT MIN(EventDate), MAX(EventDate) FROM ducklake.hits; +---- +:duckdb/benchmark/clickbench/answers/q06.csv + +query I +SELECT AdvEngineID, COUNT(*) FROM ducklake.hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; +---- +:duckdb/benchmark/clickbench/answers/q07.csv + +query I +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM ducklake.hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q08.csv + +query I +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM ducklake.hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q09.csv + +query I +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM ducklake.hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q10.csv + +query I +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM ducklake.hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q11.csv + +query I +SELECT SearchPhrase, COUNT(*) AS c FROM ducklake.hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q12.csv + +query I +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM ducklake.hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q13.csv + +query I +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM ducklake.hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q14.csv + +query I +SELECT UserID, COUNT(*) FROM ducklake.hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q15.csv + +query I +SELECT UserID, COALESCE(NULLIF(SearchPhrase, ''), NULL) AS SearchPhrase , COUNT(*) FROM ducklake.hits GROUP BY UserID, COALESCE(NULLIF(SearchPhrase, ''), NULL) ORDER BY COUNT(*) DESC LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q16.csv + +query I +SELECT COUNT(*) AS count FROM (SELECT UserID, SearchPhrase FROM (SELECT UserID, SearchPhrase, COUNT(*) FROM ducklake.hits GROUP BY UserID, SearchPhrase LIMIT 10) GROUP BY UserID, SearchPhrase) t +---- +:duckdb/benchmark/clickbench/answers/q17.csv + +query I +SELECT * FROM (SELECT UserID, extract(minute FROM EventTime) AS m, COALESCE(NULLIF(SearchPhrase, ''), NULL), COUNT(*) FROM ducklake.hits GROUP BY UserID, m, COALESCE(NULLIF(SearchPhrase, ''), NULL) ORDER BY COUNT(*) DESC LIMIT 10) ORDER BY 4 DESC, 1 ; +---- +:duckdb/benchmark/clickbench/answers/q18.csv + +query I +SELECT UserID FROM ducklake.hits WHERE UserID = 435090932899640449; +---- +:duckdb/benchmark/clickbench/answers/q19.csv + +query I +SELECT COUNT(*) FROM ducklake.hits WHERE URL LIKE '%google%'; +---- +:duckdb/benchmark/clickbench/answers/q20.csv + +query I +SELECT * FROM (SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM ducklake.hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10) ORDER BY c DESC, SearchPhrase LIMIT 8 +---- +:duckdb/benchmark/clickbench/answers/q21.csv + +query I +SELECT * FROM (SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM ducklake.hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10) ORDER BY c DESC LIMIT 10 +---- +:duckdb/benchmark/clickbench/answers/q22.csv + +# Invalid Error: mutex lock failed: Invalid argument +# query I +# SELECT * FROM ducklake.hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +# ---- +# :duckdb/benchmark/clickbench/answers/q23.csv + +# Segfault +# query I +# SELECT * FROM (SELECT * FROM (SELECT SearchPhrase FROM ducklake.hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10) LIMIT 4) ORDER BY SearchPhrase +# ---- +# :duckdb/benchmark/clickbench/answers/q24.csv + +query I +SELECT SearchPhrase FROM ducklake.hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q25.csv + +query I +SELECT SearchPhrase FROM ducklake.hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q26.csv + +query I +SELECT CounterID, AVG(STRLEN(URL)) AS l, COUNT(*) AS c FROM ducklake.hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +---- +:duckdb/benchmark/clickbench/answers/q27.csv + +query I +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(STRLEN(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM ducklake.hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +---- +:duckdb/benchmark/clickbench/answers/q28.csv + +query I +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM ducklake.hits; +---- +:duckdb/benchmark/clickbench/answers/q29.csv + +query I +SELECT * FROM (SELECT * FROM (SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM ducklake.hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10) ORDER BY c DESC, ClientIP) ORDER BY c DESC, ClientIP LIMIT 9 +---- +:duckdb/benchmark/clickbench/answers/q30.csv + +query I +SELECT MIN(c), MAX(c), COUNT(*) FROM (SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM ducklake.hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10) +---- +:duckdb/benchmark/clickbench/answers/q31.csv + +# Invalid Error: TProtocolException: Invalid data +# query I +# SELECT MIN(c), MAX(c), COUNT(*) FROM (SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM ducklake.hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10) +# ---- +# :duckdb/benchmark/clickbench/answers/q32.csv + +query I +SELECT URL, COUNT(*) AS c FROM ducklake.hits GROUP BY URL ORDER BY c DESC LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q33.csv + +query I +SELECT 1, URL, COUNT(*) AS c FROM ducklake.hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q34.csv + +query I +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM ducklake.hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q35.csv + +query I +SELECT URL, COUNT(*) AS PageViews FROM ducklake.hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q36.csv + +query I +SELECT Title, COUNT(*) AS PageViews FROM ducklake.hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +---- +:duckdb/benchmark/clickbench/answers/q37.csv + +query I +SELECT MIN(PageViews), MAX(PageViews), COUNT(*) FROM (SELECT URL, COUNT(*) AS PageViews FROM ducklake.hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY ALL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000) +---- +:duckdb/benchmark/clickbench/answers/q38.csv + +query I +SELECT MIN(PageViews), MAX(PageViews), COUNT(*) FROM (SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM ducklake.hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000) +---- +:duckdb/benchmark/clickbench/answers/q39.csv + +query I +SELECT * FROM (SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM ducklake.hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100) ORDER BY PageViews DESC, URLHash OFFSET 2 LIMIT 5 +---- +:duckdb/benchmark/clickbench/answers/q40.csv + +query I +SELECT MIN(PageViews), MAX(PageViews), COUNT(*) FROM (SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM ducklake.hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000) +---- +:duckdb/benchmark/clickbench/answers/q41.csv + +query I +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM ducklake.hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; +---- +:duckdb/benchmark/clickbench/answers/q42.csv diff --git a/tests/sqllogictests/sql/cloud/test_cloud_cases.test b/tests/sqllogictests/sql/cloud/test_cloud_cases.test new file mode 100644 index 0000000..7b0e0dc --- /dev/null +++ b/tests/sqllogictests/sql/cloud/test_cloud_cases.test @@ -0,0 +1,44 @@ +# name: test/sql/cloud/test_cloud_cases.test +# description: test ducklake extension +# group: [cloud] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS lake (DATA_PATH '${DATA_PATH}/test_cloud_cases/', METADATA_CATALOG 'metadata'); + +statement ok +use lake; + +statement ok +CREATE TABLE test AS FROM range (10) t(i); + +statement ok +START; + +statement ok +DELETE FROM test where i = 1; + +statement ok +DELETE FROM test where i = 2; + +statement ok +COMMIT; + +query I +select * from test order by all; +---- +0 +3 +4 +5 +6 +7 +8 +9 \ No newline at end of file diff --git a/tests/sqllogictests/sql/comments/comment_on_column.test b/tests/sqllogictests/sql/comments/comment_on_column.test new file mode 100644 index 0000000..5597dc4 --- /dev/null +++ b/tests/sqllogictests/sql/comments/comment_on_column.test @@ -0,0 +1,52 @@ +# name: test/sql/comments/comment_on_column.test +# description: test ducklake comments on a column +# group: [comments] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_column_comments_files') + +statement ok +CREATE TABLE ducklake.test_table as SELECT 1 as test_table_column + +### Comment on column from table +query I +select comment from duckdb_columns() where column_name='test_table_column'; +---- +NULL + +statement ok +COMMENT ON COLUMN ducklake.test_table.test_table_column IS 'very gezellige column' + +query I +select comment from duckdb_columns() where column_name='test_table_column'; +---- +very gezellige column + +statement ok +BEGIN + +statement ok +COMMENT ON COLUMN ducklake.test_table.test_table_column IS 'toch niet zo gezellig' + +query I +select comment from duckdb_columns() where column_name='test_table_column'; +---- +toch niet zo gezellig + +# take that back! +statement ok +ROLLBACK + +query I +select comment from duckdb_columns() where column_name='test_table_column'; +---- +very gezellige column diff --git a/tests/sqllogictests/sql/comments/comments.test b/tests/sqllogictests/sql/comments/comments.test new file mode 100644 index 0000000..b7d4f0b --- /dev/null +++ b/tests/sqllogictests/sql/comments/comments.test @@ -0,0 +1,83 @@ +# name: test/sql/comments/comments.test +# description: test ducklake comments +# group: [comments] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_comments_files') + +### Comment on Tables +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER); + +statement ok +COMMENT ON TABLE ducklake.test IS 'very gezellige table' + +query I +select comment from duckdb_tables() where table_name='test'; +---- +very gezellige table + +# Reverting to null goes like dis +statement ok +COMMENT ON TABLE ducklake.test IS NULL + +query I +select comment from duckdb_tables() where table_name='test'; +---- +NULL + +# rollback +statement ok +BEGIN + +statement ok +COMMENT ON TABLE ducklake.test IS 'rolled back comment' + +query I +select comment from duckdb_tables() where table_name='test'; +---- +rolled back comment + +statement ok +ROLLBACK + +query I +select comment from duckdb_tables() where table_name='test'; +---- +NULL + +## Comment on view +statement ok +CREATE VIEW ducklake.test_view as SELECT 1 as test_view_column + +query I +select comment from duckdb_views() where view_name='test_view'; +---- +NULL + +statement ok +COMMENT ON VIEW ducklake.test_view IS 'very gezellige view' + +query I +select comment from duckdb_views() where view_name='test_view'; +---- +very gezellige view + +statement error +COMMENT ON VIEW ducklake.test IS '123' +---- +not a view + +statement error +COMMENT ON TABLE ducklake.test_view IS '123' +---- +not a table diff --git a/tests/sqllogictests/sql/compaction/cleanup_old_files_global_option.test b/tests/sqllogictests/sql/compaction/cleanup_old_files_global_option.test new file mode 100644 index 0000000..cab6ac5 --- /dev/null +++ b/tests/sqllogictests/sql/compaction/cleanup_old_files_global_option.test @@ -0,0 +1,57 @@ +# name: test/sql/compaction/cleanup_old_files_global_option.test +# description: Test global options for the cleanup old files function ducklake_cleanup_old_files +# group: [compaction] + +require ducklake + +require parquet + +require icu + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/cleanup_old_files_global_option') + +# snapshot 1 +statement ok +CREATE TABLE ducklake.test(i INTEGER) + +# snapshot 2 +statement ok +INSERT INTO ducklake.test FROM range(1000) + +# snapshot 3 +statement ok +DELETE FROM ducklake.test + +# snapshot 4 +statement ok +DROP TABLE ducklake.test + +statement ok +CALL ducklake_expire_snapshots('ducklake', dry_run => false, versions => [2]) + +statement ok +CALL ducklake.set_option('delete_older_than', '1 millisecond') + +query I +SELECT count(*) FROM ducklake_cleanup_old_files('ducklake', dry_run => true) +---- +1 + +statement ok +CALL ducklake.set_option('delete_older_than', '1 week') + +query I +SELECT count(*) FROM ducklake_cleanup_old_files('ducklake', dry_run => true) +---- +0 + +query I +SELECT count(*) FROM ducklake_cleanup_old_files('ducklake', dry_run => true, cleanup_all => true) +---- +1 + diff --git a/tests/sqllogictests/sql/compaction/compaction_alter_table.test b/tests/sqllogictests/sql/compaction/compaction_alter_table.test new file mode 100644 index 0000000..c6d69a5 --- /dev/null +++ b/tests/sqllogictests/sql/compaction/compaction_alter_table.test @@ -0,0 +1,136 @@ +# name: test/sql/compaction/compaction_alter_table.test +# description: test compaction on a database that is altered +# group: [compaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_compaction_alter_files') + +# snapshot 1 +statement ok +CREATE TABLE ducklake.test(id INTEGER, i INTEGER); + +# snapshot 2 +statement ok +INSERT INTO ducklake.test VALUES (1, 10); + +# snapshot 3 +statement ok +INSERT INTO ducklake.test VALUES (2, 20); + +# snapshot 4 +statement ok +ALTER TABLE ducklake.test ADD COLUMN j INTEGER + +# snapshot 5 +statement ok +INSERT INTO ducklake.test VALUES (3, 30, 300); + +# snapshot 6 +statement ok +INSERT INTO ducklake.test VALUES (4, 40, 400); + +# snapshot 7 +statement ok +ALTER TABLE ducklake.test DROP COLUMN i + +# snapshot 8 +statement ok +INSERT INTO ducklake.test VALUES (5, 500); + +# snapshot 9 +statement ok +INSERT INTO ducklake.test VALUES (6, 600); + +# snapshot 10 +statement ok +ALTER TABLE ducklake.test ADD COLUMN i VARCHAR + +# snapshot 11 +statement ok +INSERT INTO ducklake.test VALUES (7, 700, 'hello'); + +# snapshot 12 +statement ok +INSERT INTO ducklake.test VALUES (8, 800, 'world'); + +query IIIII +SELECT snapshot_id, rowid, * FROM ducklake.test ORDER BY ALL +---- +2 0 1 NULL NULL +3 1 2 NULL NULL +5 2 3 300 NULL +6 3 4 400 NULL +8 4 5 500 NULL +9 5 6 600 NULL +11 6 7 700 hello +12 7 8 800 world + +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_compaction_alter_files/**/*') +---- +8 + +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +# we cannot merge across alter statements - so this results in 4 separate merges +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_compaction_alter_files/**/*') +---- +12 + +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +# four files remain +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_compaction_alter_files/**/*') +---- +4 + +# verify correct behavior when operating on the compacted file +# time travel +query IIII +SELECT snapshot_id, rowid, * FROM ducklake.test AT (VERSION => 3) ORDER BY ALL +---- +2 0 1 10 +3 1 2 20 + +query IIIII +SELECT snapshot_id, rowid, * FROM ducklake.test AT (VERSION => 6) ORDER BY ALL +---- +2 0 1 10 NULL +3 1 2 20 NULL +5 2 3 30 300 +6 3 4 40 400 + +query IIII +SELECT snapshot_id, rowid, * FROM ducklake.test AT (VERSION => 9) ORDER BY ALL +---- +2 0 1 NULL +3 1 2 NULL +5 2 3 300 +6 3 4 400 +8 4 5 500 +9 5 6 600 + +query IIIII +SELECT snapshot_id, rowid, * FROM ducklake.test ORDER BY ALL +---- +2 0 1 NULL NULL +3 1 2 NULL NULL +5 2 3 300 NULL +6 3 4 400 NULL +8 4 5 500 NULL +9 5 6 600 NULL +11 6 7 700 hello +12 7 8 800 world diff --git a/tests/sqllogictests/sql/compaction/compaction_cleanup_global.test b/tests/sqllogictests/sql/compaction/compaction_cleanup_global.test new file mode 100644 index 0000000..5792fa1 --- /dev/null +++ b/tests/sqllogictests/sql/compaction/compaction_cleanup_global.test @@ -0,0 +1,49 @@ +# name: test/sql/compaction/compaction_cleanup_global.test +# description: test ducklake cleanup using global setup +# group: [compaction] + +require ducklake + +require parquet + +require icu + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/compaction_cleanup_global', METADATA_CATALOG 'metadata') + +statement ok +CREATE TABLE ducklake.test(i INTEGER) + +statement ok +INSERT INTO ducklake.test FROM range(10) + +statement ok +DELETE FROM ducklake.test + +statement ok +DROP TABLE ducklake.test + +statement ok +CALL ducklake_expire_snapshots('ducklake', dry_run => false, versions => [2]) + +statement ok +CALL ducklake.set_option('delete_older_than', '1 week') + +# This should return 0 since file to be deleted is not that old +query I +SELECT count(*) FROM ducklake_cleanup_old_files('ducklake', dry_run => true); +---- +0 + +statement ok +CALL ducklake.set_option('delete_older_than', '1 millisecond') + +# This should return 1 since file to be deleted is older than 1 millisecond +query I +SELECT count(*) FROM ducklake_cleanup_old_files('ducklake', dry_run => true); +---- +1 diff --git a/tests/sqllogictests/sql/compaction/compaction_delete_conflict.test b/tests/sqllogictests/sql/compaction/compaction_delete_conflict.test new file mode 100644 index 0000000..b969bf4 --- /dev/null +++ b/tests/sqllogictests/sql/compaction/compaction_delete_conflict.test @@ -0,0 +1,137 @@ +# name: test/sql/compaction/compaction_delete_conflict.test +# description: Test transaction conflicts with compaction +# group: [compaction] + +require ducklake + +require parquet + + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_conflicts_compaction_files') + +statement ok +SET immediate_transaction_mode=true + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok +INSERT INTO ducklake.test VALUES (1); + +statement ok +INSERT INTO ducklake.test VALUES (2); + +# try to commit a delete after a compaction: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CALL ducklake_merge_adjacent_files('ducklake'); + +statement ok con2 +DELETE FROM ducklake.test + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + + +statement ok +INSERT INTO ducklake.test VALUES (3); + +statement ok +INSERT INTO ducklake.test VALUES (4); + +# try to commit a compaction after a deletion: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +DELETE FROM ducklake.test + +statement ok con2 +CALL ducklake_merge_adjacent_files('ducklake'); + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + + +statement ok +INSERT INTO ducklake.test VALUES (5); + +statement ok +INSERT INTO ducklake.test VALUES (6); + +# two transactions both try to compact: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CALL ducklake_merge_adjacent_files('ducklake'); + +statement ok con2 +CALL ducklake_merge_adjacent_files('ducklake'); + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + + +statement ok +INSERT INTO ducklake.test VALUES (7); + +statement ok +INSERT INTO ducklake.test VALUES (8); + +# compaction and insert: no conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CALL ducklake_merge_adjacent_files('ducklake'); + +statement ok con2 +INSERT INTO ducklake.test VALUES (9); + +statement ok con1 +COMMIT + +statement ok con2 +COMMIT + +query I +SELECT * FROM ducklake.test ORDER BY ALL +---- +5 +6 +7 +8 +9 diff --git a/tests/sqllogictests/sql/compaction/compaction_encrypted.test b/tests/sqllogictests/sql/compaction/compaction_encrypted.test new file mode 100644 index 0000000..c9cc623 --- /dev/null +++ b/tests/sqllogictests/sql/compaction/compaction_encrypted.test @@ -0,0 +1,61 @@ +# name: test/sql/compaction/compaction_encrypted.test +# description: test compaction on an encrypted database +# group: [compaction] + +require ducklake + +require parquet + +require httpfs + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_compaction_encrypted_files', ENCRYPTED) + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok +INSERT INTO ducklake.test VALUES (1); + +statement ok +INSERT INTO ducklake.test VALUES (2); + +statement ok +INSERT INTO ducklake.test VALUES (3); + +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +# delete the old files +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +# all files have been compacted into one file +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_compaction_encrypted_files/**/*') +---- +1 + +# verify the file is encrypted +statement error +SELECT * FROM '${DATA_PATH}/ducklake_compaction_encrypted_files/**/*.parquet' +---- +encrypted + +# all reading still works +query I +SELECT * FROM ducklake.test AT (VERSION => 2) +---- +1 + +query III +SELECT snapshot_id, rowid, * FROM ducklake.test ORDER BY ALL +---- +2 0 1 +3 1 2 +4 2 3 diff --git a/tests/sqllogictests/sql/compaction/compaction_full_file_delete.test b/tests/sqllogictests/sql/compaction/compaction_full_file_delete.test new file mode 100644 index 0000000..90a7dbf --- /dev/null +++ b/tests/sqllogictests/sql/compaction/compaction_full_file_delete.test @@ -0,0 +1,42 @@ +# name: test/sql/compaction/compaction_full_file_delete.test +# description: test compaction on a table with full-file deletes +# group: [compaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_compaction_full_file_delete') + +statement ok +use ducklake; + +statement ok +create table test (id int); + +statement ok +insert into test values (1); + +statement ok +delete from test where id = 1; + +statement ok +insert into test values (2); + +statement ok +insert into test values (3); + +statement ok +CALL merge_adjacent_files(); + +query I +from test; +---- +2 +3 diff --git a/tests/sqllogictests/sql/compaction/compaction_hive_structure.test b/tests/sqllogictests/sql/compaction/compaction_hive_structure.test new file mode 100644 index 0000000..ecc3036 --- /dev/null +++ b/tests/sqllogictests/sql/compaction/compaction_hive_structure.test @@ -0,0 +1,96 @@ +# name: test/sql/compaction/compaction_hive_structure.test +# description: test compaction keeps hive structure +# group: [compaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_compaction_hive') + +statement ok +use ducklake + +statement ok +CREATE TABLE sales ( + id INTEGER, + product STRING, + amount DOUBLE, + sale_date DATE + ); + +statement ok +ALTER TABLE sales SET PARTITIONED BY (sale_date); + +statement ok +INSERT INTO sales + VALUES + (1, 'Product A', 100.0, '2023-07-01'), + (2, 'Product B', 150.0, '2023-07-01'); + +statement ok +INSERT INTO sales + VALUES + (3, 'Product A', 100.0, '2023-07-02'), + (4, 'Product B', 150.0, '2023-07-02'); + +statement ok +INSERT INTO sales + VALUES + (5, 'Product C', 300.0, '2023-07-02'), + (6, 'Product B', 150.0, '2023-07-02'); + +statement ok +CALL merge_adjacent_files(); + +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +query I +SELECT count(*) FROM ducklake_list_files('ducklake', 'sales') where data_file like '%sale_date%' +---- +2 + +query IIII +FROM sales +---- +1 Product A 100.0 2023-07-01 +2 Product B 150.0 2023-07-01 +3 Product A 100.0 2023-07-02 +4 Product B 150.0 2023-07-02 +5 Product C 300.0 2023-07-02 +6 Product B 150.0 2023-07-02 + +statement ok +CREATE TABLE test_table AS SELECT 1 as partition_col, random() as value LIMIT 0; + +statement ok +ALTER TABLE test_table SET PARTITIONED BY (partition_col); + +statement ok +INSERT INTO test_table SELECT 1 as partition_col, random() as value; + +statement ok +INSERT INTO test_table SELECT 2 as partition_col, random() as value; + +query I +SELECT count(*) FROM ducklake_list_files('ducklake', 'test_table') where data_file like '%partition_col%' +---- +2 + +statement ok +ALTER TABLE test_table RENAME TO test_table_renamed; + +statement ok +INSERT INTO test_table_renamed SELECT 2 as partition_col, random() as value; + +query I +SELECT count(*) FROM ducklake_list_files('ducklake', 'test_table_renamed') where data_file like '%partition_col%'; +---- +3 \ No newline at end of file diff --git a/tests/sqllogictests/sql/compaction/compaction_partitioned_non_adjacent.test b/tests/sqllogictests/sql/compaction/compaction_partitioned_non_adjacent.test new file mode 100644 index 0000000..cbe908f --- /dev/null +++ b/tests/sqllogictests/sql/compaction/compaction_partitioned_non_adjacent.test @@ -0,0 +1,151 @@ +# name: test/sql/compaction/compaction_partitioned_non_adjacent.test +# description: test compaction of partitioned tables +# group: [compaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_partitioned_compact_non_adjacent_files', METADATA_CATALOG 'ducklake_metadata') + +statement ok +CREATE TABLE ducklake.partitioned(part_key INTEGER, value INTEGER); + +statement ok +ALTER TABLE ducklake.partitioned SET PARTITIONED BY (part_key); + +statement ok +INSERT INTO ducklake.partitioned VALUES (1, 10); + +statement ok +INSERT INTO ducklake.partitioned VALUES (2, 100); + +statement ok +INSERT INTO ducklake.partitioned VALUES (1, 20); + +statement ok +INSERT INTO ducklake.partitioned VALUES (2, 200); + +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_partitioned_compact_non_adjacent_files/**/*.parquet') +---- +4 + +query IIII +SELECT snapshot_id, rowid, * FROM ducklake.partitioned ORDER BY ALL +---- +3 0 1 10 +4 1 2 100 +5 2 1 20 +6 3 2 200 + +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +# we gain two files here +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_partitioned_compact_non_adjacent_files/**/*.parquet') +---- +6 + +# cleanup +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +# two files left +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_partitioned_compact_non_adjacent_files/**/*.parquet') +---- +2 + +# verify we have written partition info correctly +query II +SELECT partition_id, partition_value +FROM ducklake_metadata.ducklake_data_file +JOIN ducklake_metadata.ducklake_file_partition_value USING (data_file_id) +ORDER BY ALL +---- +2 1 +2 2 + +query II +SELECT * FROM ducklake.partitioned AT (VERSION => 3) +---- +1 10 + +query II rowsort +SELECT * FROM ducklake.partitioned AT (VERSION => 4) +---- +1 10 +2 100 + +query IIII +SELECT snapshot_id, rowid, * FROM ducklake.partitioned ORDER BY ALL +---- +3 0 1 10 +4 1 2 100 +5 2 1 20 +6 3 2 200 + +# insert and compact again +statement ok +INSERT INTO ducklake.partitioned VALUES (1, 30); + +statement ok +INSERT INTO ducklake.partitioned VALUES (2, 300); + +statement ok +INSERT INTO ducklake.partitioned VALUES (1, 40); + +statement ok +INSERT INTO ducklake.partitioned VALUES (2, 400); + +query IIII +SELECT snapshot_id, rowid, * FROM ducklake.partitioned ORDER BY ALL +---- +3 0 1 10 +4 1 2 100 +5 2 1 20 +6 3 2 200 +8 4 1 30 +9 5 2 300 +10 6 1 40 +11 7 2 400 + +# we're back up to 6 files +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_partitioned_compact_non_adjacent_files/**/*.parquet') +---- +6 + +# compact + cleanup +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +# cleanup +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +# back down to 2 files +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_partitioned_compact_non_adjacent_files/**/*.parquet') +---- +2 + +query IIII +SELECT snapshot_id, rowid, * FROM ducklake.partitioned ORDER BY ALL +---- +3 0 1 10 +4 1 2 100 +5 2 1 20 +6 3 2 200 +8 4 1 30 +9 5 2 300 +10 6 1 40 +11 7 2 400 diff --git a/tests/sqllogictests/sql/compaction/compaction_partitioned_table.test b/tests/sqllogictests/sql/compaction/compaction_partitioned_table.test new file mode 100644 index 0000000..0c9bbc3 --- /dev/null +++ b/tests/sqllogictests/sql/compaction/compaction_partitioned_table.test @@ -0,0 +1,94 @@ +# name: test/sql/compaction/compaction_partitioned_table.test +# description: test compaction of partitioned tables +# group: [compaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_partitioned_compact_files', METADATA_CATALOG 'ducklake_metadata') + +statement ok +CREATE TABLE ducklake.partitioned(part_key INTEGER, value INTEGER); + +statement ok +ALTER TABLE ducklake.partitioned SET PARTITIONED BY (part_key); + +statement ok +INSERT INTO ducklake.partitioned VALUES (1, 10); + +statement ok +INSERT INTO ducklake.partitioned VALUES (1, 20); + +statement ok +INSERT INTO ducklake.partitioned VALUES (2, 100); + +statement ok +INSERT INTO ducklake.partitioned VALUES (2, 200); + +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_partitioned_compact_files/**/*.parquet') +---- +4 + +query IIII +SELECT snapshot_id, rowid, * FROM ducklake.partitioned ORDER BY ALL +---- +3 0 1 10 +4 1 1 20 +5 2 2 100 +6 3 2 200 + +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +# we gain two files here +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_partitioned_compact_files/**/*.parquet') +---- +6 + +# cleanup +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +# two files left +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_partitioned_compact_files/**/*.parquet') +---- +2 + +# verify we have written partition info correctly +query II +SELECT partition_id, partition_value +FROM ducklake_metadata.ducklake_data_file +JOIN ducklake_metadata.ducklake_file_partition_value USING (data_file_id) +ORDER BY ALL +---- +2 1 +2 2 + +query II +SELECT * FROM ducklake.partitioned AT (VERSION => 3) +---- +1 10 + +query II +SELECT * FROM ducklake.partitioned AT (VERSION => 4) +---- +1 10 +1 20 + +query IIII +SELECT snapshot_id, rowid, * FROM ducklake.partitioned ORDER BY ALL +---- +3 0 1 10 +4 1 1 20 +5 2 2 100 +6 3 2 200 diff --git a/tests/sqllogictests/sql/compaction/compaction_size_limit.test b/tests/sqllogictests/sql/compaction/compaction_size_limit.test new file mode 100644 index 0000000..6c5669f --- /dev/null +++ b/tests/sqllogictests/sql/compaction/compaction_size_limit.test @@ -0,0 +1,64 @@ +# name: test/sql/compaction/compaction_size_limit.test +# description: Test Compaction over file size limits +# group: [compaction] + +require ducklake + +require parquet + +require tpch + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/compaction_size_limit/') + +statement ok +CALL dbgen(sf = 0.01); + +statement ok +use ducklake + +# This should be enough to fit 3 files +statement ok +CALL ducklake.set_option('target_file_size', '5.4MB'); + +statement ok +CREATE TABLE test AS (SELECT * FROM memory.main.lineitem WHERE 0=1); + +loop i 0 5 + +# This will generate 1.7 MB files +statement ok +INSERT INTO test SELECT * FROM memory.main.lineitem + +endloop + +# verify number of correct files +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/compaction_size_limit/main/test/*') +---- +5 + + +statement ok +CALL ducklake_merge_adjacent_files('ducklake') + +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +# verify we are down to two files now +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/compaction_size_limit/main/test/*') +---- +2 + + +# Verify the data is still correct in size +query I +select count(*) FROM test; +---- +300875 \ No newline at end of file diff --git a/tests/sqllogictests/sql/compaction/expire_snapshot_global_option.test b/tests/sqllogictests/sql/compaction/expire_snapshot_global_option.test new file mode 100644 index 0000000..64e9e98 --- /dev/null +++ b/tests/sqllogictests/sql/compaction/expire_snapshot_global_option.test @@ -0,0 +1,46 @@ +# name: test/sql/compaction/expire_snapshot_global_option.test +# description: Test global options for the inlining function ducklake_expire_snapshots +# group: [compaction] + +require ducklake + +require parquet + +require icu + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/expire_snapshot_global_option') + +# delete all values in a table +# snapshot 1 +statement ok +CREATE TABLE ducklake.test(i INTEGER) + +# snapshot 2 +statement ok +INSERT INTO ducklake.test FROM range(10) + +statement ok +CALL ducklake.set_option('expire_older_than', '1 millisecond') + +query I +SELECT count(*) FROM ducklake_expire_snapshots('ducklake', dry_run => true) +---- +2 + +statement ok +CALL ducklake.set_option('expire_older_than', '1 week') + +query I +SELECT count(*) FROM ducklake_expire_snapshots('ducklake', dry_run => true) +---- +0 + +query I +SELECT count(*) FROM ducklake_expire_snapshots('ducklake', dry_run => true, versions => [1]) +---- +1 \ No newline at end of file diff --git a/tests/sqllogictests/sql/compaction/expire_snapshots.test b/tests/sqllogictests/sql/compaction/expire_snapshots.test new file mode 100644 index 0000000..c9030d6 --- /dev/null +++ b/tests/sqllogictests/sql/compaction/expire_snapshots.test @@ -0,0 +1,91 @@ +# name: test/sql/compaction/expire_snapshots.test +# description: test ducklake expiration of snapshots +# group: [compaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_expire_snapshots_files', METADATA_CATALOG 'metadata') + +# delete all values in a table +# snapshot 1 +statement ok +CREATE TABLE ducklake.test(i INTEGER) + +# snapshot 2 +statement ok +INSERT INTO ducklake.test FROM range(1000) + +# snapshot 3 +statement ok +DELETE FROM ducklake.test + +# snapshot 4 +statement ok +DROP TABLE ducklake.test + +# explicitly expire snapshot 2 +# dry run +query I +SELECT snapshot_id FROM ducklake_expire_snapshots('ducklake', dry_run => true, versions => [2]) +---- +2 + +# actually expire +statement ok +CALL ducklake_expire_snapshots('ducklake', dry_run => false, versions => [2]) + +# the snapshot is no longer available +statement error +FROM ducklake.test AT (VERSION => 2) +---- +No snapshot found at version 2 + +# we can query around that though +query I +FROM ducklake.test AT (VERSION => 1) +---- + +query I +FROM ducklake.test AT (VERSION => 3) +---- + +# the data file is no longer required (since only snapshot 2 references it) -> we can delete it now +query I +SELECT COUNT(*) FROM ducklake_cleanup_old_files('ducklake', dry_run => true, cleanup_all => true); +---- +1 + +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +# verify that it is actually gone +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_expire_snapshots_files/*') +---- +0 + +# let's delete all remaining snapshots (except for the last one) +query I +SELECT snapshot_id FROM ducklake_expire_snapshots('ducklake', older_than => NOW()) ORDER BY ALL +---- +0 +1 +3 + +# all traces of the table are gone +foreach tbl ducklake_table ducklake_column ducklake_table_stats ducklake_table_column_stats + +query I +SELECT COUNT(*) FROM metadata.${tbl} +---- +0 + +endloop diff --git a/tests/sqllogictests/sql/compaction/expire_snapshots_drop_table.test b/tests/sqllogictests/sql/compaction/expire_snapshots_drop_table.test new file mode 100644 index 0000000..93d10f2 --- /dev/null +++ b/tests/sqllogictests/sql/compaction/expire_snapshots_drop_table.test @@ -0,0 +1,78 @@ +# name: test/sql/compaction/expire_snapshots_drop_table.test +# description: test ducklake expiration of snapshots +# group: [compaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_expire_snapshots_drop_table_files', METADATA_CATALOG 'metadata') + +# delete all values in a table +# snapshot 1 +statement ok +CREATE TABLE ducklake.test(i INTEGER) + +# snapshot 2 +statement ok +INSERT INTO ducklake.test FROM range(1000) + +# snapshot 3 +statement ok +DELETE FROM ducklake.test WHERE i%4=0 + +# snapshot 4 +statement ok +DELETE FROM ducklake.test WHERE i%2=0 + +# snapshot 5 +statement ok +DROP TABLE ducklake.test + +# we have 3 files now (insert from snapshot 2, delete from snapshot 3, delete from snapshot 4) +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_expire_snapshots_drop_table_files/main/test/*') +---- +3 + +# explicitly expire snapshot 3 and cleanup files +statement ok +CALL ducklake_expire_snapshots('ducklake', versions => [3]) + +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +# the deletes from snapshot 3 should now be gone +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_expire_snapshots_drop_table_files/main/test/*') +---- +2 + +# now expire snapshots 1, 2 and 4 - this should fully remove all traces of the table +statement ok +CALL ducklake_expire_snapshots('ducklake', versions => [1, 2, 4]) + +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +# verify that all files are actually gone +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_expire_snapshots_drop_table_files/main/test/*') +---- +0 + +# all traces of the table are gone +foreach tbl ducklake_table ducklake_column ducklake_table_stats ducklake_table_column_stats ducklake_data_file ducklake_delete_file + +query I +SELECT COUNT(*) FROM metadata.${tbl} +---- +0 + +endloop diff --git a/tests/sqllogictests/sql/compaction/expire_snapshots_schema.test b/tests/sqllogictests/sql/compaction/expire_snapshots_schema.test new file mode 100644 index 0000000..67a5110 --- /dev/null +++ b/tests/sqllogictests/sql/compaction/expire_snapshots_schema.test @@ -0,0 +1,59 @@ +# name: test/sql/compaction/expire_snapshots_schema.test +# description: test ducklake expiration of snapshots with schema changes +# group: [compaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_expire_snapshots_schema_files', METADATA_CATALOG 'metadata') + +# snapshot 1 +statement ok +CREATE SCHEMA s1; + +# snapshot 2 +statement ok +CREATE VIEW s1.vw AS SELECT 42 + +# snapshot 3 +statement ok +CREATE TABLE s1.tbl(i INTEGER) + +# snapshot 4 +statement ok +DROP TABLE s1.tbl + +# snapshot 5 +statement ok +DROP VIEW s1.vw + +# snapshot 6 +statement ok +DROP SCHEMA s1 + +# expire all snapshots +statement ok +CALL ducklake_expire_snapshots('ducklake', versions => [1, 2, 3, 4, 5]) + +# we have one schema remaining (`main`) +query I +SELECT COUNT(*) FROM metadata.ducklake_schema +---- +1 + +# all traces of the schema are gone +foreach tbl ducklake_view ducklake_table ducklake_column ducklake_table_stats ducklake_table_column_stats ducklake_data_file ducklake_delete_file + +query I +SELECT COUNT(*) FROM metadata.${tbl} +---- +0 + +endloop diff --git a/tests/sqllogictests/sql/compaction/merge_adjacent_global_option.test b/tests/sqllogictests/sql/compaction/merge_adjacent_global_option.test new file mode 100644 index 0000000..cac08ff --- /dev/null +++ b/tests/sqllogictests/sql/compaction/merge_adjacent_global_option.test @@ -0,0 +1,128 @@ +# name: test/sql/compaction/merge_adjacent_global_option.test +# description: Test global options for the merge adjacent function ducklake_merge_adjacent_files +# group: [compaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/merge_adjacent_global_option') + +statement ok +use ducklake + +# Create one Table with two files +statement ok +CREATE TABLE example (key VARCHAR, value VARCHAR); + +statement ok +INSERT INTO example (key, value) VALUES ('foo', 'bar'); + +statement ok +INSERT INTO example (key, value) VALUES ('baz', 'qux'); + +# Create another Table with two files +statement ok +CREATE TABLE example_2 (key VARCHAR, value VARCHAR); + +statement ok +INSERT INTO example_2 (key, value) VALUES ('foo', 'bar'); + +statement ok +INSERT INTO example_2 (key, value) VALUES ('baz', 'qux'); + +# Different schema with another table with two files +statement ok +CREATE SCHEMA s1; + +statement ok +CREATE TABLE s1.example (key VARCHAR, value VARCHAR); + +statement ok +INSERT INTO s1.example (key, value) VALUES ('foo', 'bar'); + +statement ok +INSERT INTO s1.example (key, value) VALUES ('baz', 'qux'); + +statement ok +CREATE TABLE s1.example_2 (key VARCHAR, value VARCHAR); + +statement ok +INSERT INTO s1.example_2 (key, value) VALUES ('foo', 'bar'); + +statement ok +INSERT INTO s1.example_2 (key, value) VALUES ('baz', 'qux'); + +statement ok +CREATE TABLE s1.example_3 (key VARCHAR, value VARCHAR); + +statement ok +INSERT INTO s1.example_3 (key, value) VALUES ('foo', 'bar'); + +statement ok +INSERT INTO s1.example_3 (key, value) VALUES ('baz', 'qux'); + +query II +SELECT data_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +0 1 +1 1 +2 2 +3 2 +4 4 +5 4 +6 5 +7 5 +8 6 +9 6 + +# We define a table_name but not a schema name so we should be able to merge from example_2 +statement ok +CALL ducklake.set_option('auto_compact', false , schema => 's1') + +statement ok +CALL ducklake.set_option('auto_compact', false , schema => 'main') + +statement ok +CALL ducklake.set_option('auto_compact', true , schema => 's1', table_name => 'example_2') + +statement ok +CALL ducklake.set_option('auto_compact', true , schema => 'main', table_name => 'example_2') + +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +# We can set specific tables to be merged +query II +SELECT data_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +0 1 +1 1 +4 4 +5 4 +8 6 +9 6 +10 2 +11 5 + +# We can set a whole schema +statement ok +CALL ducklake.set_option('auto_compact', true , schema => 's1') + +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +query II +SELECT data_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +0 1 +1 1 +10 2 +11 5 +12 4 +13 6 \ No newline at end of file diff --git a/tests/sqllogictests/sql/compaction/merge_adjacent_max_files.test b/tests/sqllogictests/sql/compaction/merge_adjacent_max_files.test new file mode 100644 index 0000000..ceca573 --- /dev/null +++ b/tests/sqllogictests/sql/compaction/merge_adjacent_max_files.test @@ -0,0 +1,129 @@ +# name: test/sql/compaction/merge_adjacent_max_files.test +# description: test ducklake merge adjacent files max_files options +# group: [compaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/merge_adjacent_max_files/') + +statement ok +USE ducklake; + +statement ok +CREATE TABLE example (key integer); + +# Generate 100 files +loop i 0 20 + +statement ok +INSERT INTO example VALUES (${i}); + +endloop + +statement ok +CALL ducklake_merge_adjacent_files('ducklake', 'example', max_compacted_files=>1); + +# We create max one file, since we use the default size limits, one file should fit all +query II +SELECT max(data_file_id), min (data_file_id) FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +20 20 + +statement ok +DROP TABLE example; + +statement ok +CALL ducklake_expire_snapshots('ducklake', older_than => now()); + +statement ok +CALL ducklake.set_option('target_file_size', '1KB'); + +statement ok +CREATE TABLE example (key integer); + +# Generate 100 files +loop i 0 20 + +statement ok +INSERT INTO example VALUES (${i}); + +endloop + +query I +SELECT count(*) FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +20 + +statement error +CALL ducklake_merge_adjacent_files('ducklake', 'example', max_compacted_files=>-1); +---- +Type INT32 with value -1 can't be cast + +statement error +CALL ducklake_merge_adjacent_files('ducklake', 'example', max_compacted_files=>NULL); +---- +The max_compacted_files option must be a non-null integer + +statement error +CALL ducklake_merge_adjacent_files('ducklake', 'example', max_compacted_files=>0); +---- +The max_compacted_files option must be greater than zero. + +query I +SELECT count(*) FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +20 + +query II +SELECT max(data_file_id), min (data_file_id) FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +40 21 + +statement ok +CALL ducklake_merge_adjacent_files('ducklake', 'example', max_compacted_files=>2); + +# We create two new files, and remove as many as necessary to create them +query II +SELECT max(data_file_id), min (data_file_id) > 30 FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +42 True + +statement ok +CALL ducklake_merge_adjacent_files('ducklake', 'example', max_compacted_files=>1); + +# We create max one file +query II +SELECT max(data_file_id), min (data_file_id) > 31 FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +43 True + +statement ok +CALL ducklake_merge_adjacent_files('ducklake', 'example', max_compacted_files=>1000); + +# We basically do max compaction, limiting the size set +query II +SELECT max(data_file_id) > 43, min (data_file_id) > 32 FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +True True + +statement ok +CALL ducklake_merge_adjacent_files('ducklake', 'example', max_compacted_files=>1000); + +# Calling it again, produces no changes. +query I +SELECT max(data_file_id) BETWEEN 43 AND 45 FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +True + +# Check data is still correct +query I +select count(*) FROM example; +---- +20 diff --git a/tests/sqllogictests/sql/compaction/merge_adjacent_options.test b/tests/sqllogictests/sql/compaction/merge_adjacent_options.test new file mode 100644 index 0000000..68b3e9c --- /dev/null +++ b/tests/sqllogictests/sql/compaction/merge_adjacent_options.test @@ -0,0 +1,119 @@ +# name: test/sql/compaction/merge_adjacent_options.test +# description: test ducklake merge adjacent files options +# group: [compaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/merge_adjacent_options/') + +statement ok +USE ducklake; + +# Create one Table with two files +statement ok +CREATE TABLE example (key VARCHAR, value VARCHAR); + +statement ok +INSERT INTO example (key, value) VALUES ('foo', 'bar'); + +statement ok +INSERT INTO example (key, value) VALUES ('baz', 'qux'); + +# Create another Table with two files +statement ok +CREATE TABLE example_2 (key VARCHAR, value VARCHAR); + +statement ok +INSERT INTO example_2 (key, value) VALUES ('foo', 'bar'); + +statement ok +INSERT INTO example_2 (key, value) VALUES ('baz', 'qux'); + +# Different schema with another table with two files +statement ok +CREATE SCHEMA s1; + +statement ok +CREATE TABLE s1.example (key VARCHAR, value VARCHAR); + +statement ok +INSERT INTO s1.example (key, value) VALUES ('foo', 'bar'); + +statement ok +INSERT INTO s1.example (key, value) VALUES ('baz', 'qux'); + +statement ok +CREATE TABLE s1.example_2 (key VARCHAR, value VARCHAR); + +statement ok +INSERT INTO s1.example_2 (key, value) VALUES ('foo', 'bar'); + +statement ok +INSERT INTO s1.example_2 (key, value) VALUES ('baz', 'qux'); + +query II +SELECT data_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +0 1 +1 1 +2 2 +3 2 +4 4 +5 4 +6 5 +7 5 + +# We define a table_name but not a schema name so we should be able to merge from example_2 +statement ok +CALL ducklake_merge_adjacent_files('ducklake', 'example_2'); + +# We write a new file for table id 2 +query II +SELECT data_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +0 1 +1 1 +4 4 +5 4 +6 5 +7 5 +8 2 + + +# If we define the schema we do table id 5 +statement ok +CALL ducklake_merge_adjacent_files('ducklake', 'example_2', schema=>'s1'); + +query II +SELECT data_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +0 1 +1 1 +4 4 +5 4 +8 2 +9 5 + +# If we call with nothing, all tables, schemas go in, so we compress both remaining tables +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +query II +SELECT data_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +8 2 +9 5 +10 1 +11 4 + +statement error +CALL ducklake_merge_adjacent_files('ducklake', 'example_2', schema=>'bogus'); +---- +Did you mean "main.example_2 or s1.example_2"? \ No newline at end of file diff --git a/tests/sqllogictests/sql/compaction/merge_files_expired_snapshots.test b/tests/sqllogictests/sql/compaction/merge_files_expired_snapshots.test new file mode 100644 index 0000000..24e3ddc --- /dev/null +++ b/tests/sqllogictests/sql/compaction/merge_files_expired_snapshots.test @@ -0,0 +1,132 @@ +# name: test/sql/compaction/merge_files_expired_snapshots.test +# description: test ducklake merges files from expired snapshots if they also belong to current snapshot +# group: [compaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_merge_expire_snapshots_schema') + +statement ok +USE ducklake; + +statement ok +CREATE TABLE example (key VARCHAR, value VARCHAR); + +statement ok +INSERT INTO example (key, value) VALUES ('foo', 'bar'); + +statement ok +INSERT INTO example (key, value) VALUES ('baz', 'qux'); + +query II +SELECT snapshot_id, changes FROM snapshots(); +---- +0 {schemas_created=[main]} +1 {tables_created=[main.example]} +2 {tables_inserted_into=[1]} +3 {tables_inserted_into=[1]} + +query I +SELECT count(*) FROM ducklake_list_files('ducklake', 'example'); +---- +2 + +statement ok +CALL ducklake_expire_snapshots('ducklake', versions => [1,2]); + +query I +SELECT count(*) FROM ducklake_list_files('ducklake', 'example'); +---- +2 + +statement ok +CALL ducklake.merge_adjacent_files(); + +query I +SELECT count(*) FROM ducklake_list_files('ducklake', 'example'); +---- +1 + +query II +FROM example +---- +foo bar +baz qux + +# Add a few schema changes +statement ok +INSERT INTO example (key, value) VALUES ('a', 'b'); + +statement ok +ALTER TABLE example ADD COLUMN j INTEGER + +statement ok +INSERT INTO example VALUES ('c', 'd', 1); + +statement ok +INSERT INTO example VALUES ('e', 'f', 2); + +statement ok +ALTER TABLE example DROP COLUMN key + +statement ok +INSERT INTO example VALUES ('k', 3); + +statement ok +INSERT INTO example VALUES ('f', 9); + +query I +SELECT count(*) FROM ducklake_list_files('ducklake', 'example'); +---- +6 + +query II +SELECT snapshot_id, changes FROM snapshots(); +---- +0 {schemas_created=[main]} +3 {tables_inserted_into=[1]} +4 {} +5 {tables_inserted_into=[1]} +6 {tables_altered=[1]} +7 {tables_inserted_into=[1]} +8 {tables_inserted_into=[1]} +9 {tables_altered=[1]} +10 {tables_inserted_into=[1]} +11 {tables_inserted_into=[1]} + + +statement ok +CALL ducklake_expire_snapshots('ducklake', versions => [5,6,7,8]); + +query I +SELECT count(*) FROM ducklake_list_files('ducklake', 'example'); +---- +6 + +statement ok +CALL ducklake.merge_adjacent_files(); + +# We altered the schema twice, hence we can have 3 files, as there are only 3 different schemas. +query I +SELECT count(*) FROM ducklake_list_files('ducklake', 'example'); +---- +3 + +query II +FROM example ORDER BY ALL +---- +b NULL +bar NULL +d 1 +f 2 +f 9 +k 3 +qux NULL \ No newline at end of file diff --git a/tests/sqllogictests/sql/compaction/mix_large_small_insertions.test b/tests/sqllogictests/sql/compaction/mix_large_small_insertions.test new file mode 100644 index 0000000..fd07ac6 --- /dev/null +++ b/tests/sqllogictests/sql/compaction/mix_large_small_insertions.test @@ -0,0 +1,91 @@ +# name: test/sql/compaction/mix_large_small_insertions.test +# description: test ducklake mix of small and large insertions +# group: [compaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_compaction_mix_files', METADATA_CATALOG 'metadata') + +# set target file size very small +statement ok +CALL ducklake.set_option('target_file_size', '2KB'); + +# snapshot 1 +statement ok +CREATE TABLE ducklake.tbl(id INTEGER, str VARCHAR); + +# perform a mix of large and small insertions + +# snapshot 2 +statement ok +INSERT INTO ducklake.tbl VALUES (1, 'hello'); + +# snapshot 3 +statement ok +INSERT INTO ducklake.tbl SELECT 100000 + i, concat('thisisastring', i) FROM range(10000) t(i) + +# snapshot 4 +statement ok +INSERT INTO ducklake.tbl VALUES (2, 'world'); + +# snapshot 5 +statement ok +INSERT INTO ducklake.tbl VALUES (3, 'and'); + +# snapshot 6 +statement ok +INSERT INTO ducklake.tbl SELECT 200000 + i, concat('thisisalsoastring', i) FROM range(10000) t(i) + +# snapshot 7 +statement ok +INSERT INTO ducklake.tbl VALUES (3, 'my'); + +# snapshot 8 +statement ok +INSERT INTO ducklake.tbl VALUES (3, 'friends'); + +query IIII +SELECT rowid, snapshot_id, * FROM ducklake.tbl WHERE snapshot_id NOT IN (3, 6) ORDER BY ALL +---- +0 2 1 hello +10001 4 2 world +10002 5 3 and +20003 7 3 my +20004 8 3 friends + +# we should have 7 files now +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_compaction_mix_files/**/*.parquet') +---- +7 + +# now compact and cleanup +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +# now we should have 3 files +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_compaction_mix_files/**/*.parquet') +---- +3 + +# all row-ids and snapshots are still intact +query IIII +SELECT rowid, snapshot_id, * FROM ducklake.tbl WHERE snapshot_id NOT IN (3, 6) ORDER BY ALL +---- +0 2 1 hello +10001 4 2 world +10002 5 3 and +20003 7 3 my +20004 8 3 friends diff --git a/tests/sqllogictests/sql/compaction/multi_compaction.test b/tests/sqllogictests/sql/compaction/multi_compaction.test new file mode 100644 index 0000000..b223697 --- /dev/null +++ b/tests/sqllogictests/sql/compaction/multi_compaction.test @@ -0,0 +1,61 @@ +# name: test/sql/compaction/multi_compaction.test +# description: test chain of compaction statements +# group: [compaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_multi_compaction_files') + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +foreach BASE 0 3 + +statement ok +INSERT INTO ducklake.test VALUES (${BASE} + 1); + +statement ok +INSERT INTO ducklake.test VALUES (${BASE} + 2); + +statement ok +INSERT INTO ducklake.test VALUES (${BASE} + 3); + +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +endloop + +# delete the old files +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +# all files have been compacted into one file +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_multi_compaction_files/**/*') +---- +1 + +# verify that after multi-compaction time travel still works +query I +SELECT * FROM ducklake.test AT (VERSION => 2) +---- +1 + +# row-ids/snapshot-ids are kept also across multiple compaction runs +query III +SELECT snapshot_id, rowid, * FROM ducklake.test ORDER BY ALL +---- +2 0 1 +3 1 2 +4 2 3 +6 3 4 +7 4 5 +8 5 6 diff --git a/tests/sqllogictests/sql/compaction/small_insert_compaction.test b/tests/sqllogictests/sql/compaction/small_insert_compaction.test new file mode 100644 index 0000000..3e331bf --- /dev/null +++ b/tests/sqllogictests/sql/compaction/small_insert_compaction.test @@ -0,0 +1,155 @@ +# name: test/sql/compaction/small_insert_compaction.test +# description: test ducklake compaction of consecutive small inserts +# group: [compaction] + +require ducklake + +require parquet + +statement ok +SET preserve_insertion_order=false; + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_compaction_files', METADATA_CATALOG 'xx') + +# snapshot 1 +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +# snapshot 2 +statement ok +CREATE TABLE ducklake.test2(i INTEGER); + +# snapshot 3 +statement ok +INSERT INTO ducklake.test VALUES (1); + +# snapshot 4: random change that does not modify the "test" table +statement ok +INSERT INTO ducklake.test2 VALUES (42); + +# snapshot 5 +statement ok +INSERT INTO ducklake.test VALUES (2); + +# snapshot 6 +statement ok +INSERT INTO ducklake.test VALUES (3); + +# snapshot 7 +statement ok +INSERT INTO ducklake.test VALUES (4); + +# snapshot 8 +statement ok +INSERT INTO ducklake.test VALUES (5); + +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_compaction_files/**/*') +---- +6 + +query III +SELECT snapshot_id, rowid, * FROM ducklake.test ORDER BY ALL +---- +3 0 1 +5 1 2 +6 2 3 +7 3 4 +8 4 5 + +statement ok +CALL ducklake.merge_adjacent_files(); + +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +# files are not immediately deleted - but are added to the deletion queue +# we actually gain a file here +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_compaction_files/**/*') +---- +7 + +# force clean-up of the files +# test dry run - this just lists the files to be cleaned up +query I +SELECT COUNT(*) FROM ducklake_cleanup_old_files('ducklake', cleanup_all => true, dry_run => true); +---- +5 + +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +# no more files to be deleted +query I +SELECT COUNT(*) FROM ducklake_cleanup_old_files('ducklake', cleanup_all => true, dry_run => true); +---- +0 + +# now the files are gone +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_compaction_files/**/*') +---- +2 + +# verify correct behavior when operating on the compacted file +# time travel +query I +SELECT * FROM ducklake.test AT (VERSION => 3) +---- +1 + +query I +SELECT * FROM ducklake.test AT (VERSION => 4) +---- +1 + + +query I +SELECT * FROM ducklake.test AT (VERSION => 5) ORDER BY ALL +---- +1 +2 + +# reading snapshot id and row id +query III +SELECT snapshot_id, rowid, * FROM ducklake.test ORDER BY ALL +---- +3 0 1 +5 1 2 +6 2 3 +7 3 4 +8 4 5 + +# table insertions function +query II +SELECT rowid, * FROM ducklake_table_insertions('ducklake', 'main', 'test', 0, 3) ORDER BY ALL +---- +0 1 + +query II +SELECT rowid, * FROM ducklake_table_insertions('ducklake', 'main', 'test', 0, 4) ORDER BY ALL +---- +0 1 + +query II +SELECT rowid, * FROM ducklake_table_insertions('ducklake', 'main', 'test', 0, 6) ORDER BY ALL +---- +0 1 +1 2 +2 3 + +query II +SELECT rowid, * FROM ducklake_table_insertions('ducklake', 'main', 'test', 0, 9) ORDER BY ALL +---- +0 1 +1 2 +2 3 +3 4 +4 5 diff --git a/tests/sqllogictests/sql/concurrent/concurrent_insert_conflict.test b/tests/sqllogictests/sql/concurrent/concurrent_insert_conflict.test new file mode 100644 index 0000000..1a7fba0 --- /dev/null +++ b/tests/sqllogictests/sql/concurrent/concurrent_insert_conflict.test @@ -0,0 +1,44 @@ +# name: test/sql/concurrent/concurrent_insert_conflict.test +# description: test concurrent inserts +# group: [concurrent] + +require notwindows + +require ducklake + +require parquet + +# Some problem with file-system on Windows +require notwindows + + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_concurrent_insert_files') + +statement ok +SET ducklake_retry_wait_ms=100 + +statement ok +SET ducklake_retry_backoff=2.0 + +statement ok +CREATE TABLE ducklake.tbl(key INTEGER); + +concurrentloop i 0 2 + +query I +INSERT INTO ducklake.tbl VALUES (${i}) +---- +1 + +endloop + +query II +SELECT COUNT(*), SUM(key) FROM ducklake.tbl +---- +2 1 diff --git a/tests/sqllogictests/sql/concurrent/concurrent_insert_data_inlining.test b/tests/sqllogictests/sql/concurrent/concurrent_insert_data_inlining.test new file mode 100644 index 0000000..edd73b8 --- /dev/null +++ b/tests/sqllogictests/sql/concurrent/concurrent_insert_data_inlining.test @@ -0,0 +1,47 @@ +# name: test/sql/concurrent/concurrent_insert_data_inlining.test +# description: test concurrent insert with data inlining +# group: [concurrent] + +require notwindows + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_concurrent_insert_inline_files') + +statement ok +CREATE TABLE ducklake.tbl(key INTEGER); + +statement ok +CALL ducklake.set_option('data_inlining_row_limit', 10); + +concurrentloop i 0 2 + +query I +INSERT INTO ducklake.tbl VALUES (${i}) +---- +1 + +endloop + +query II +SELECT COUNT(*), SUM(key) FROM ducklake.tbl +---- +2 1 + +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_concurrent_insert_inline_files/**') +---- +0 + +query I +SELECT stats(key) FROM ducklake.tbl LIMIT 1 +---- +:.*Min: 0, Max: 1.* diff --git a/tests/sqllogictests/sql/concurrent/file_level_conflict.test b/tests/sqllogictests/sql/concurrent/file_level_conflict.test new file mode 100644 index 0000000..ddb1df3 --- /dev/null +++ b/tests/sqllogictests/sql/concurrent/file_level_conflict.test @@ -0,0 +1,55 @@ +# name: test/sql/concurrent/file_level_conflict.test +# description: test concurrent inserts +# group: [concurrent] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/concurrent_insert_conflict', BUSY_TIMEOUT 10000) + +statement ok +use ducklake; + +statement ok +CREATE TABLE tbl (key INTEGER, grouping INTEGER); + +statement ok +ALTER TABLE tbl SET PARTITIONED BY (grouping); + +statement ok +insert into tbl select i as key, i%20 as grouping from range (0,1000) t(i); + +statement ok +SET ducklake_max_retry_count = 100; + +# This will not cause any conflicts, since we are deleating once from each file. +concurrentloop i 0 20 + +statement ok +DELETE FROM ducklake.tbl WHERE key = ${i} + +endloop + +query II +SELECT COUNT(*), SUM(key) FROM ducklake.tbl +---- +980 499310 + +# Lets do multiple files on the same transaction +concurrentloop i 20 30 + +statement ok +DELETE FROM ducklake.tbl WHERE key = ${i} OR KEY = ${i} + 10 + +endloop + +query II +SELECT COUNT(*), SUM(key) FROM ducklake.tbl +---- +960 498720 diff --git a/tests/sqllogictests/sql/constraints/not_null.test b/tests/sqllogictests/sql/constraints/not_null.test new file mode 100644 index 0000000..ee8bf35 --- /dev/null +++ b/tests/sqllogictests/sql/constraints/not_null.test @@ -0,0 +1,110 @@ +# name: test/sql/constraints/not_null.test +# description: test NOT NULL constraint +# group: [constraints] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_not_null_files') + +statement ok +CREATE TABLE ducklake.test(i INTEGER NOT NULL, j INTEGER); + +statement ok +INSERT INTO ducklake.test VALUES (42, NULL); + +statement error +INSERT INTO ducklake.test VALUES (NULL, 84) +---- +NOT NULL constraint + +# check that NOT NULL shows up in DESCRIBE +query IIIIII +DESCRIBE ducklake.test +---- +i INTEGER NO NULL NULL NULL +j INTEGER YES NULL NULL NULL + +# we cannot add a null constraint to a column that already has it +statement error +ALTER TABLE ducklake.test ALTER i SET NOT NULL; +---- +already + +# we cannot drop the null constraint from a column that does not have it +statement error +ALTER TABLE ducklake.test ALTER j DROP NOT NULL; +---- +no NOT NULL constraint + +# column does not exist +statement error +ALTER TABLE ducklake.test ALTER nonexistent_column SET NOT NULL; +---- +nonexistent_column + +statement error +ALTER TABLE ducklake.test ALTER nonexistent_column DROP NOT NULL; +---- +nonexistent_column + +# we can drop the NOT NULL constraint +statement ok +ALTER TABLE ducklake.test ALTER i DROP NOT NULL; + +# we can then re-add it - and roll it back +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.test ALTER i SET NOT NULL; + +statement ok +ROLLBACK + +# if we re-add, we cannot add NULL values in the same transaction +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.test ALTER i SET NOT NULL; + +statement error +INSERT INTO ducklake.test VALUES (NULL, 42) +---- +NOT NULL constraint failed + +statement ok +ROLLBACK + +# if we re-add, we cannot add NULL values in the same transaction +statement ok +BEGIN + +statement ok +INSERT INTO ducklake.test VALUES (NULL, 84) + +statement error +ALTER TABLE ducklake.test ALTER i SET NOT NULL; +---- +transaction-local + +statement ok +ROLLBACK + +# we cannot SET NOT NULL on a table that has NULL values +statement error +ALTER TABLE ducklake.test ALTER j SET NOT NULL; +---- +has NULL values + +# after dropping the constraint - we can add NULL rows +statement ok +INSERT INTO ducklake.test VALUES (NULL, 84) diff --git a/tests/sqllogictests/sql/constraints/not_null_drop_column.test b/tests/sqllogictests/sql/constraints/not_null_drop_column.test new file mode 100644 index 0000000..747bc40 --- /dev/null +++ b/tests/sqllogictests/sql/constraints/not_null_drop_column.test @@ -0,0 +1,42 @@ +# name: test/sql/constraints/not_null_drop_column.test +# description: test dropping NOT NULL columns +# group: [constraints] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_not_null_drop_files') + +statement ok +CREATE TABLE ducklake.test(i INTEGER NOT NULL, j INTEGER, k INTEGER NOT NULL); + +statement ok +INSERT INTO ducklake.test VALUES (42, NULL, 3); + +statement error +INSERT INTO ducklake.test VALUES (NULL, 84, 3) +---- +NOT NULL constraint + +statement ok +ALTER TABLE ducklake.test DROP COLUMN j + +statement error +INSERT INTO ducklake.test VALUES (42, NULL) +---- +NOT NULL constraint + +statement ok +ALTER TABLE ducklake.test DROP COLUMN k + +statement error +INSERT INTO ducklake.test VALUES (NULL) +---- +NOT NULL constraint diff --git a/tests/sqllogictests/sql/constraints/unsupported.test b/tests/sqllogictests/sql/constraints/unsupported.test new file mode 100644 index 0000000..33d5581 --- /dev/null +++ b/tests/sqllogictests/sql/constraints/unsupported.test @@ -0,0 +1,25 @@ +# name: test/sql/constraints/unsupported.test +# description: test unsupported constraints +# group: [constraints] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_unsupported_files') + +statement error +CREATE TABLE ducklake.test(i INTEGER PRIMARY KEY, j INTEGER); +---- +not supported + +statement error +CREATE TABLE ducklake.test(i INTEGER, j INTEGER, CHECK (i > j)); +---- +not supported diff --git a/tests/sqllogictests/sql/data_inlining/basic_data_inlining.test b/tests/sqllogictests/sql/data_inlining/basic_data_inlining.test new file mode 100644 index 0000000..806ef22 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/basic_data_inlining.test @@ -0,0 +1,114 @@ +# name: test/sql/data_inlining/basic_data_inlining.test +# description: test ducklake extension +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/basic_ducklake_inlining_files', METADATA_CATALOG 'ducklake_meta', DATA_INLINING_ROW_LIMIT 10) + +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER); + +query II +SELECT * FROM ducklake.test +---- + +statement ok +BEGIN + +query I +INSERT INTO ducklake.test VALUES (1, 2), (NULL, 3); +---- +2 + +query II +SELECT * FROM ducklake.test +---- +1 2 +NULL 3 + +query I +SELECT COUNT(*) FROM ducklake.test +---- +2 + +query II +SELECT j, i FROM ducklake.test +---- +2 1 +3 NULL + +query IIII +SELECT rowid, snapshot_id, * FROM ducklake.test +---- +1000000000000000000 NULL 1 2 +1000000000000000001 NULL NULL 3 + +query III +SELECT filename, file_row_number, file_index FROM ducklake.test +---- +__ducklake_inlined_transaction_local_data 0 1 +__ducklake_inlined_transaction_local_data 1 1 + +statement ok +COMMIT + +query II +SELECT * FROM ducklake.test +---- +1 2 +NULL 3 + +query IIII +SELECT rowid, snapshot_id, * FROM ducklake.test +---- +0 2 1 2 +1 2 NULL 3 + +# virtual columns +query III +SELECT filename, file_row_number, file_index FROM ducklake.test +---- +ducklake_inlined_data_1_1 0 0 +ducklake_inlined_data_1_1 1 0 + +query II +SELECT * FROM ducklake.test WHERE i IS NULL +---- +NULL 3 + +query II +SELECT * FROM ducklake.test WHERE i=1 +---- +1 2 + +# all data is inlined - so we have no files +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/basic_ducklake_inlining_files/**') +---- +0 + +# insert more rows than the inlining limit allows +query I +INSERT INTO ducklake.test SELECT i, 100 + i FROM range(11) t(i); +---- +11 + +query III +SELECT COUNT(*), SUM(i), SUM(j) FROM ducklake.test +---- +13 56 1160 + +# now we have a file +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/basic_ducklake_inlining_files/**') +---- +1 diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_alter.test b/tests/sqllogictests/sql/data_inlining/data_inlining_alter.test new file mode 100644 index 0000000..833f114 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_alter.test @@ -0,0 +1,57 @@ +# name: test/sql/data_inlining/data_inlining_alter.test +# description: test data inlining with ALTER statements +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_inlining_alter_files', METADATA_CATALOG 'ducklake_meta', DATA_INLINING_ROW_LIMIT 10) + +statement ok +CREATE TABLE ducklake.test AS SELECT 1 i, 2 j + +query II +FROM ducklake.test +---- +1 2 + +statement ok +ALTER TABLE ducklake.test ADD COLUMN k INTEGER + +statement ok +INSERT INTO ducklake.test VALUES (10, 20, 30); + +query III +FROM ducklake.test +---- +1 2 NULL +10 20 30 + +statement ok +ALTER TABLE ducklake.test DROP COLUMN i + +query II +FROM ducklake.test +---- +2 NULL +20 30 + +statement ok +ALTER TABLE ducklake.test ALTER j SET TYPE BIGINT + +statement ok +INSERT INTO ducklake.test VALUES (1000000000000, 0) + +query II +FROM ducklake.test +---- +2 NULL +20 30 +1000000000000 0 diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_constraints.test b/tests/sqllogictests/sql/data_inlining/data_inlining_constraints.test new file mode 100644 index 0000000..34d759a --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_constraints.test @@ -0,0 +1,23 @@ +# name: test/sql/data_inlining/data_inlining_constraints.test +# description: test data inlining with NOT NULL constraints +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_inlining_constraint_files', METADATA_CATALOG 'ducklake_meta', DATA_INLINING_ROW_LIMIT 10) + +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER NOT NULL); + +statement error +INSERT INTO ducklake.test VALUES (42, NULL) +---- +NOT NULL constraint failed diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_delete.test b/tests/sqllogictests/sql/data_inlining/data_inlining_delete.test new file mode 100644 index 0000000..3ac9194 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_delete.test @@ -0,0 +1,86 @@ +# name: test/sql/data_inlining/data_inlining_delete.test +# description: test ducklake extension +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_inlining_delete_files', METADATA_CATALOG 'ducklake_meta', DATA_INLINING_ROW_LIMIT 10) + +statement ok +CREATE TABLE ducklake.test AS SELECT 1 i, 2 j UNION ALL SELECT NULL, 3 UNION ALL SELECT 10, 20 + +statement ok +BEGIN + +# we can delete inlined data +query I +DELETE FROM ducklake.test WHERE i=1 +---- +1 + +query II +SELECT * FROM ducklake.test +---- +NULL 3 +10 20 + +statement ok +COMMIT + +query II +SELECT * FROM ducklake.test +---- +NULL 3 +10 20 + +# no files +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_inlining_delete_files/**') +---- +0 + +# delete the other row +statement ok +BEGIN + +query I +DELETE FROM ducklake.test WHERE i=10 +---- +1 + +query II +SELECT * FROM ducklake.test +---- +NULL 3 + +statement ok +COMMIT + +query II +SELECT * FROM ducklake.test +---- +NULL 3 + +# delete all remaining rows in the table +query I +DELETE FROM ducklake.test +---- +1 + +query II +SELECT * FROM ducklake.test +---- + +# no files +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_inlining_delete_files/**') +---- +0 diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_encryption.test b/tests/sqllogictests/sql/data_inlining/data_inlining_encryption.test new file mode 100644 index 0000000..a1334eb --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_encryption.test @@ -0,0 +1,52 @@ +# name: test/sql/data_inlining/data_inlining_encryption.test +# description: Test ducklake data inlining with encryption +# group: [data_inlining] + +require ducklake + +require parquet + +require httpfs + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_inlining_encryption_files', ENCRYPTED, DATA_INLINING_ROW_LIMIT 10000) + +statement ok +CREATE TABLE ducklake.test AS SELECT i id FROM range(1000) t(i); + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.test +---- +1000 499500 + +# data is inlined +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_inlining_encryption_files/main/test/**') +---- +0 + +# we can flush the data +statement ok +CALL ducklake_flush_inlined_data('ducklake') + +# now we have a file +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_inlining_encryption_files/main/test/**') +---- +1 + +# that file is encrypted +statement error +SELECT * FROM '${DATA_PATH}/ducklake_inlining_encryption_files/main/test/*.parquet' +---- +encrypted + +# but we can read it +query II +SELECT COUNT(*), SUM(id) FROM ducklake.test +---- +1000 499500 diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_flush.test b/tests/sqllogictests/sql/data_inlining/data_inlining_flush.test new file mode 100644 index 0000000..814ecd0 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_flush.test @@ -0,0 +1,164 @@ +# name: test/sql/data_inlining/data_inlining_flush.test +# description: test flushing inlined data to disk +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_inlining_flush_data', DATA_INLINING_ROW_LIMIT 10) + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +query I +SELECT COUNT(*) FROM ducklake.test +---- +0 + +loop i 0 10 + +statement ok +INSERT INTO ducklake.test VALUES (${i}) + +endloop + +# all data is inlined +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_inlining_files/**') +---- +0 + +query I +SELECT COUNT(*) FROM ducklake.test +---- +10 + +query III +SELECT rowid, snapshot_id, * FROM ducklake.test ORDER BY ALL +---- +0 2 0 +1 3 1 +2 4 2 +3 5 3 +4 6 4 +5 7 5 +6 8 6 +7 9 7 +8 10 8 +9 11 9 + +query III +SELECT rowid, snapshot_id, * FROM ducklake.test AT (version => 4) ORDER BY ALL +---- +0 2 0 +1 3 1 +2 4 2 + +query IIII +FROM ducklake.table_changes('test', 3, 5) ORDER BY ALL +---- +3 1 insert 1 +4 2 insert 2 +5 3 insert 3 + +statement ok +BEGIN + +# flush inlined data +statement ok +CALL ducklake_flush_inlined_data('ducklake') + +query I +SELECT COUNT(*) FROM ducklake.test +---- +10 + +query III +SELECT rowid, snapshot_id, * FROM ducklake.test ORDER BY ALL +---- +0 2 0 +1 3 1 +2 4 2 +3 5 3 +4 6 4 +5 7 5 +6 8 6 +7 9 7 +8 10 8 +9 11 9 + +statement ok +COMMIT + +query II +SELECT snapshot_id, changes FROM ducklake.snapshots() WHERE snapshot_id IN (2, 12) ORDER BY snapshot_id +---- +2 {inlined_insert=[1]} +12 {flushed_inlined=[1]} + +# we now have one file +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_inlining_flush_data/**') +---- +1 + +# flushing inlined data has no changes +query IIII +FROM ducklake.table_changes('test', 11, 12) ORDER BY ALL +---- + +query III +SELECT rowid, snapshot_id, * FROM ducklake.test ORDER BY ALL +---- +0 2 0 +1 3 1 +2 4 2 +3 5 3 +4 6 4 +5 7 5 +6 8 6 +7 9 7 +8 10 8 +9 11 9 + +query I +SELECT * FROM ducklake.test AT (version => 4) ORDER BY ALL +---- +0 +1 +2 + +# we can still access other change feeds +query IIII +FROM ducklake.table_changes('test', 2, 5) ORDER BY ALL +---- +2 0 insert 0 +3 1 insert 1 +4 2 insert 2 +5 3 insert 3 + +# FIXME: this does not work correctly +mode skip + +query IIII +FROM ducklake.table_changes('test', 3, 5) ORDER BY ALL +---- +3 1 insert 1 +4 2 insert 2 +5 3 insert 3 + +mode unskip + +query III +SELECT rowid, snapshot_id, * FROM ducklake.test AT (version => 4) ORDER BY ALL +---- +0 2 0 +1 3 1 +2 4 2 diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_flush_schema.test b/tests/sqllogictests/sql/data_inlining/data_inlining_flush_schema.test new file mode 100644 index 0000000..e185f52 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_flush_schema.test @@ -0,0 +1,111 @@ +# name: test/sql/data_inlining/data_inlining_flush_schema.test +# description: test flushing inlined data to disk with the schema/table parameters +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_inlining_flush_schema_data', DATA_INLINING_ROW_LIMIT 10) + +statement ok +CREATE SCHEMA ducklake.s1 + +statement ok +CREATE SCHEMA ducklake.s2 + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok +CREATE TABLE ducklake.s1.test(j VARCHAR); + +statement ok +CREATE TABLE ducklake.s1.test2(d DATE); + +statement ok +CREATE TABLE ducklake.s2.test(i VARCHAR, j INT); + +statement ok +INSERT INTO ducklake.test VALUES (42); + +statement ok +INSERT INTO ducklake.s1.test VALUES ('hello world') + +statement ok +INSERT INTO ducklake.s1.test2 VALUES (DATE '1992-01-01') + +statement ok +INSERT INTO ducklake.s2.test VALUES (42, 84) + +# all data is inlined +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_inlining_flush_schema_data/**') +---- +0 + +# flush the "test" table in the main schema +statement ok +CALL ducklake_flush_inlined_data('ducklake', table_name => 'test') + +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_inlining_flush_schema_data/**') +---- +1 + +# flush the full "s1" schema +statement ok +CALL ducklake_flush_inlined_data('ducklake', schema_name => 's1') + +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_inlining_flush_schema_data/**') +---- +3 + +# flush a single table in a specific schema +statement ok +CALL ducklake_flush_inlined_data('ducklake', schema_name => 's2', table_name => 'test') + +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_inlining_flush_schema_data/**') +---- +4 + +# verify all data is in its place + +query I +FROM ducklake.test +---- +42 + +query I +FROM ducklake.s1.test +---- +hello world + +query I +FROM ducklake.s1.test2 +---- +1992-01-01 + +query II +FROM ducklake.s2.test +---- +42 84 + +# flush non-existent tables/schemas +statement error +CALL ducklake_flush_inlined_data('ducklake', table_name => 'non_existent_table') +---- +does not exist + +statement error +CALL ducklake_flush_inlined_data('ducklake', schema_name => 'non_existent_schema') +---- +not found diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_issue504.test b/tests/sqllogictests/sql/data_inlining/data_inlining_issue504.test new file mode 100644 index 0000000..2fe8e81 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_issue504.test @@ -0,0 +1,26 @@ +# name: test/sql/data_inlining/data_inlining_issue504.test +# description: test inlining with ORDER BY prior to flush +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_inlining_flush_data', DATA_INLINING_ROW_LIMIT 10000) + +statement ok +CREATE TABLE IF NOT EXISTS ducklake.example_table (created_at TIMESTAMPTZ, version INTEGER); + +statement ok +INSERT INTO ducklake.example_table (created_at, version) VALUES (now(), 1); + +query I +SELECT version FROM ducklake.example_table ORDER BY created_at DESC LIMIT 1 +---- +1 diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_large.test b/tests/sqllogictests/sql/data_inlining/data_inlining_large.test new file mode 100644 index 0000000..7a1b56b --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_large.test @@ -0,0 +1,37 @@ +# name: test/sql/data_inlining/data_inlining_large.test +# description: test data inlining with many rows +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_inlining_large_files', METADATA_CATALOG 'ducklake_meta', DATA_INLINING_ROW_LIMIT 999999) + +statement ok +CREATE TABLE bigtbl AS FROM range(1000000) t(i); + +statement ok +CREATE TABLE ducklake.test(i INTEGER) + +query I +INSERT INTO ducklake.test FROM bigtbl +---- +1000000 + +query IIII +SELECT COUNT(*), SUM(i), MIN(i), MAX(i) FROM bigtbl +---- +1000000 499999500000 0 999999 + +# this exceeds the inline limit so we have a file +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_inlining_large_files/**') +---- +1 diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_option.test b/tests/sqllogictests/sql/data_inlining/data_inlining_option.test new file mode 100644 index 0000000..bacde40 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_option.test @@ -0,0 +1,58 @@ +# name: test/sql/data_inlining/data_inlining_option.test +# description: test setting data inlining as an option +# group: [data_inlining] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_inlining_setting.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_inlining_setting_files', METADATA_CATALOG 'ducklake_metadata') + +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER); + +statement ok +CALL ducklake.set_option('data_inlining_row_limit', 10); + +statement ok +INSERT INTO ducklake.test VALUES (1, 2), (NULL, 3); + +query II +SELECT * FROM ducklake.test +---- +1 2 +NULL 3 + +# all data is inlined - so we have no files +query I +SELECT COUNT(*) FROM GLOB('__TEST_DIR__/ducklake_inlining_setting_files/**') +---- +0 + +statement ok +DETACH ducklake; + +# the option is persisted across restarts +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_inlining_setting.db' AS ducklake + +statement ok +INSERT INTO ducklake.test VALUES (5, 5); + +query II +SELECT * FROM ducklake.test +---- +1 2 +NULL 3 +5 5 + +query I +SELECT COUNT(*) FROM GLOB('__TEST_DIR__/ducklake_inlining_setting_files/**') +---- +0 + +query III +SELECT option_name, description IS NOT NULL, value FROM ducklake.options() WHERE option_name = 'data_inlining_row_limit' +---- +data_inlining_row_limit true 10 diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_option_transaction_local.test b/tests/sqllogictests/sql/data_inlining/data_inlining_option_transaction_local.test new file mode 100644 index 0000000..e130118 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_option_transaction_local.test @@ -0,0 +1,69 @@ +# name: test/sql/data_inlining/data_inlining_option_transaction_local.test +# description: test setting data inlining as an option when we have transaction local changes +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_inlining_setting_tl_files', METADATA_CATALOG 'ducklake_metadata') + +# transaction local ctable +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok +CALL ducklake.set_option('data_inlining_row_limit', 10); + +statement ok +INSERT INTO ducklake.test VALUES (42); + +statement ok +COMMIT + +query I +SELECT * FROM ducklake.test +---- +42 + +# disable inlining again +statement ok +CALL ducklake.set_option('data_inlining_row_limit', 0); + +# now perform a transaction local alter +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.test ADD COLUMN j INTEGER + +statement ok +CALL ducklake.set_option('data_inlining_row_limit', 10); + +statement ok +INSERT INTO ducklake.test VALUES (1, 2), (NULL, 3); + +statement ok +COMMIT + +query II +SELECT * FROM ducklake.test +---- +42 NULL +1 2 +NULL 3 + +# all data is inlined - so we have no files +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_inlining_setting_files/**') +---- +0 diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_partitions.test b/tests/sqllogictests/sql/data_inlining/data_inlining_partitions.test new file mode 100644 index 0000000..e60e35f --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_partitions.test @@ -0,0 +1,100 @@ +# name: test/sql/data_inlining/data_inlining_partitions.test +# description: Test data inlining with partitions +# group: [data_inlining] + +require ducklake + +require parquet + +# partitioning based on a column +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_inlining_partitions', METADATA_CATALOG 'ducklake_metadata', DATA_INLINING_ROW_LIMIT 1000) + +statement ok +USE ducklake + +statement ok +CREATE TABLE partitioned_tbl(id INTEGER, ts TIMESTAMP, values VARCHAR); + +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (year(ts), month(ts)); + +loop i 0 10 + +statement ok +INSERT INTO partitioned_tbl SELECT i, TIMESTAMP '2020-01-01' + interval (i) hours, concat('thisisastring_', i) FROM range(1000 * ${i}, 1000 * (${i} + 1)) t(i) + +endloop + +# all data is inlined +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_inlining_partitions/main/partitioned_tbl/**') +---- +0 + +query IIII +SELECT year(ts), COUNT(*), MIN(values), MAX(values) FROM partitioned_tbl GROUP BY year(ts) +---- +2020 8784 thisisastring_0 thisisastring_999 +2021 1216 thisisastring_8784 thisisastring_9999 + +query IIII +SELECT year(ts), COUNT(*), MIN(values), MAX(values) FROM partitioned_tbl AT (version => 4) GROUP BY year(ts) +---- +2020 2000 thisisastring_0 thisisastring_999 + +query I +SELECT COUNT(*) FROM partitioned_tbl WHERE year(ts) = 2021 +---- +1216 + +# flush the inlined data +statement ok +CALL ducklake_flush_inlined_data('ducklake') + +# we now have 14 files +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_inlining_partitions/main/partitioned_tbl/**') +---- +14 + +query IIII +SELECT year(ts), COUNT(*), MIN(values), MAX(values) FROM partitioned_tbl GROUP BY year(ts) +---- +2020 8784 thisisastring_0 thisisastring_999 +2021 1216 thisisastring_8784 thisisastring_9999 + +query IIII +SELECT year(ts), COUNT(*), MIN(values), MAX(values) FROM partitioned_tbl AT (version => 4) GROUP BY year(ts) +---- +2020 2000 thisisastring_0 thisisastring_999 + +query I +SELECT COUNT(*) FROM partitioned_tbl WHERE year(ts) = 2021 +---- +1216 + +# verify the result data is partitioned +query II +SELECT regexp_extract(path, '.*year=([0-9]+)[/\\].*', 1)::INT AS year_part, regexp_extract(path, '.*month=([0-9]+)[/\\].*', 1)::INT AS month_part +FROM glob('${DATA_PATH}/ducklake_inlining_partitions/**') t(path) ORDER BY ALL +---- +2020 1 +2020 2 +2020 3 +2020 4 +2020 5 +2020 6 +2020 7 +2020 8 +2020 9 +2020 10 +2020 11 +2020 12 +2021 1 +2021 2 diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_table_changes.test b/tests/sqllogictests/sql/data_inlining/data_inlining_table_changes.test new file mode 100644 index 0000000..52a0fe9 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_table_changes.test @@ -0,0 +1,92 @@ +# name: test/sql/data_inlining/data_inlining_table_changes.test +# description: test ducklake_table_changes function with inlined data +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_inlining_table_changes_files', DATA_INLINING_ROW_LIMIT 10) + +# snapshot 1 +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +# snapshot 2 +statement ok +INSERT INTO ducklake.test FROM range(3); + +# snapshot 3 +statement ok +UPDATE ducklake.test SET i=i+100 + +# snapshot 4 +statement ok +UPDATE ducklake.test SET i=i+100 + +# snapshot 5 +statement ok +DELETE FROM ducklake.test + +query IIII +FROM ducklake.table_changes('test', 0, 2) ORDER BY ALL +---- +2 0 insert 0 +2 1 insert 1 +2 2 insert 2 + +query IIII +FROM ducklake.table_changes('test', 3, 3) ORDER BY ALL +---- +3 0 update_postimage 100 +3 0 update_preimage 0 +3 1 update_postimage 101 +3 1 update_preimage 1 +3 2 update_postimage 102 +3 2 update_preimage 2 + +query IIII +FROM ducklake.table_changes('test', 4, 4) ORDER BY ALL +---- +4 0 update_postimage 200 +4 0 update_preimage 100 +4 1 update_postimage 201 +4 1 update_preimage 101 +4 2 update_postimage 202 +4 2 update_preimage 102 + +query IIII +FROM ducklake.table_changes('test', 5, 5) ORDER BY ALL +---- +5 0 delete 200 +5 1 delete 201 +5 2 delete 202 + +# all changes +query IIII +FROM ducklake.table_changes('test', 0, 5) ORDER BY ALL +---- +2 0 insert 0 +2 1 insert 1 +2 2 insert 2 +3 0 update_postimage 100 +3 0 update_preimage 0 +3 1 update_postimage 101 +3 1 update_preimage 1 +3 2 update_postimage 102 +3 2 update_preimage 2 +4 0 update_postimage 200 +4 0 update_preimage 100 +4 1 update_postimage 201 +4 1 update_preimage 101 +4 2 update_postimage 202 +4 2 update_preimage 102 +5 0 delete 200 +5 1 delete 201 +5 2 delete 202 diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_transaction_local_alter.test b/tests/sqllogictests/sql/data_inlining/data_inlining_transaction_local_alter.test new file mode 100644 index 0000000..2b88e23 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_transaction_local_alter.test @@ -0,0 +1,56 @@ +# name: test/sql/data_inlining/data_inlining_transaction_local_alter.test +# description: test alter of transaction local data that is inlined +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_inlining_local_alter_files', METADATA_CATALOG 'ducklake_meta', DATA_INLINING_ROW_LIMIT 10) + +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER) + +# insert -> alter is not supported +statement ok +BEGIN + +statement ok +INSERT INTO ducklake.test VALUES (42, 84); + +statement error +ALTER TABLE ducklake.test ADD COLUMN k INTEGER +---- +ALTER on a table with transaction-local inlined data is not supported + +statement ok +ROLLBACK + +# alter -> insert works +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.test ADD COLUMN k INTEGER + +statement ok +INSERT INTO ducklake.test VALUES (42, 84, 100); + +query III +FROM ducklake.test +---- +42 84 100 + +statement ok +COMMIT + +query III +FROM ducklake.test +---- +42 84 100 diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_transaction_local_delete.test b/tests/sqllogictests/sql/data_inlining/data_inlining_transaction_local_delete.test new file mode 100644 index 0000000..d0246c1 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_transaction_local_delete.test @@ -0,0 +1,110 @@ +# name: test/sql/data_inlining/data_inlining_transaction_local_delete.test +# description: test ducklake extension +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_inlining_delete_local_files', METADATA_CATALOG 'ducklake_meta', DATA_INLINING_ROW_LIMIT 10) + +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER) + +# delete data from transaction-local inlined insertions +statement ok +BEGIN + +statement ok +INSERT INTO ducklake.test VALUES (42, 84), (100, 200), (200, 300), (300, 400), (400, 500); + +query II +SELECT * FROM ducklake.test +---- +42 84 +100 200 +200 300 +300 400 +400 500 + +query I +DELETE FROM ducklake.test WHERE i=100 +---- +1 + +query II +SELECT * FROM ducklake.test +---- +42 84 +200 300 +300 400 +400 500 + +statement ok +COMMIT + +query II +SELECT * FROM ducklake.test +---- +42 84 +200 300 +300 400 +400 500 + +statement ok +DROP TABLE ducklake.test + +# multiple deletes in the same transaction from a fresh table +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.test AS FROM (VALUES (42, 84), (200, 300), (300, 400), (400, 500)) t(i, j) + +query I +DELETE FROM ducklake.test WHERE i=300; +---- +1 + +query II +SELECT * FROM ducklake.test +---- +42 84 +200 300 +400 500 + +query I +DELETE FROM ducklake.test WHERE i=200; +---- +1 + +query II +SELECT * FROM ducklake.test +---- +42 84 +400 500 + +query II +SELECT * FROM ducklake.test WHERE i=400 +---- +400 500 + +statement ok +COMMIT + +query II +SELECT * FROM ducklake.test +---- +42 84 +400 500 + +query II +SELECT * FROM ducklake.test WHERE i=400 +---- +400 500 diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_types.test b/tests/sqllogictests/sql/data_inlining/data_inlining_types.test new file mode 100644 index 0000000..e17dc62 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_types.test @@ -0,0 +1,86 @@ +# name: test/sql/data_inlining/data_inlining_types.test +# description: test data inlining with different data types +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_inlining_types_files', METADATA_CATALOG 'ducklake_meta', DATA_INLINING_ROW_LIMIT 10) + +statement ok +CREATE VIEW all_types AS SELECT * EXCLUDE (BIGNUM, BIT, small_enum, +medium_enum, +large_enum, +"union", +fixed_int_array, +fixed_varchar_array, +fixed_nested_int_array, +fixed_nested_varchar_array, +fixed_struct_array, +struct_of_fixed_array, +fixed_array_of_int_list, +list_of_fixed_int_array, hugeint, uhugeint, interval, time_tz, date, blob, varchar, timestamp_s, timestamp_ms, timestamp_ns,timestamp_tz) FROM test_all_types(); + +query I nosort alltypes +FROM all_types +---- + +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.data_types AS FROM all_types + +query I nosort alltypes +FROM ducklake.data_types +---- + +statement ok +COMMIT + +query I nosort alltypes +FROM ducklake.data_types +---- + +statement ok +CREATE TABLE extra_types ( + d DATE, + b BLOB, + v VARCHAR, + ts_s TIMESTAMP_S, + ts_ms TIMESTAMP_MS, + ts_ns TIMESTAMP_NS, + ts_tz TIMESTAMPTZ +); + +statement ok +INSERT INTO extra_types VALUES ('2025-11-12','\x48656c6c6f20576f726c64','Sample text','2025-11-12 15:30:45', + '2025-11-12 15:30:45.123','2025-11-12 15:30:45.123','2025-11-12 15:30:45+00'); + + +query I nosort extra_types +FROM extra_types +---- + +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.extra_types AS FROM extra_types + +query I nosort extra_types +FROM ducklake.extra_types +---- + +statement ok +COMMIT + +query I nosort extra_types +FROM ducklake.extra_types +---- \ No newline at end of file diff --git a/tests/sqllogictests/sql/data_inlining/data_inlining_update.test b/tests/sqllogictests/sql/data_inlining/data_inlining_update.test new file mode 100644 index 0000000..7e50c87 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/data_inlining_update.test @@ -0,0 +1,75 @@ +# name: test/sql/data_inlining/data_inlining_update.test +# description: test ducklake updating +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_inlining_update_files', METADATA_CATALOG 'ducklake_meta', DATA_INLINING_ROW_LIMIT 10) + +statement ok +CREATE TABLE ducklake.test AS SELECT 1 i, 2 j UNION ALL SELECT NULL, 3 UNION ALL SELECT 10, 20 + +statement ok +BEGIN + +# we can update inlined data +query I +UPDATE ducklake.test SET i=i+100 WHERE i=1 +---- +1 + +query IIII +SELECT rowid, snapshot_id, * FROM ducklake.test ORDER BY rowid +---- +0 NULL 101 2 +1 1 NULL 3 +2 1 10 20 + +statement ok +COMMIT + +query I +SELECT stats(i) FROM ducklake.test LIMIT 1 +---- +[Min: 1, Max: 101][Has Null: true, Has No Null: true] + +query IIII +SELECT rowid, snapshot_id, * FROM ducklake.test ORDER BY rowid +---- +0 2 101 2 +1 1 NULL 3 +2 1 10 20 + +# update the other row +statement ok +BEGIN + +query I +UPDATE ducklake.test SET i=i+1000 WHERE i=10 +---- +1 + +query IIII +SELECT rowid, snapshot_id, * FROM ducklake.test ORDER BY rowid +---- +0 2 101 2 +1 1 NULL 3 +2 NULL 1010 20 + +statement ok +COMMIT + +query IIII +SELECT rowid, snapshot_id, * FROM ducklake.test ORDER BY rowid +---- +0 2 101 2 +1 1 NULL 3 +2 3 1010 20 diff --git a/tests/sqllogictests/sql/data_inlining/inlining_global_options.test b/tests/sqllogictests/sql/data_inlining/inlining_global_options.test new file mode 100644 index 0000000..97dd2ad --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/inlining_global_options.test @@ -0,0 +1,131 @@ +# name: test/sql/data_inlining/inlining_global_options.test +# description: Test global options for the inlining function ducklake_flush_inlined_data +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/inlining_global_options', DATA_INLINING_ROW_LIMIT 2) + +statement ok +use ducklake + +# Create one Table with two files +statement ok +CREATE TABLE example (key VARCHAR, value VARCHAR); + +statement ok +INSERT INTO example (key, value) VALUES ('foo', 'bar'); + +statement ok +INSERT INTO example (key, value) VALUES ('baz', 'qux'); + +# Create another Table with two files +statement ok +CREATE TABLE example_2 (key VARCHAR, value VARCHAR); + +statement ok +INSERT INTO example_2 (key, value) VALUES ('foo', 'bar'); + +statement ok +INSERT INTO example_2 (key, value) VALUES ('baz', 'qux'); + +# Different schema with another table with two files +statement ok +CREATE SCHEMA s1; + +statement ok +CREATE TABLE s1.example (key VARCHAR, value VARCHAR); + +statement ok +INSERT INTO s1.example (key, value) VALUES ('foo', 'bar'); + +statement ok +INSERT INTO s1.example (key, value) VALUES ('baz', 'qux'); + +statement ok +CREATE TABLE s1.example_2 (key VARCHAR, value VARCHAR); + +statement ok +INSERT INTO s1.example_2 (key, value) VALUES ('foo', 'bar'); + +statement ok +INSERT INTO s1.example_2 (key, value) VALUES ('baz', 'qux'); + +statement ok +CREATE TABLE s1.example_3 (key VARCHAR, value VARCHAR); + +statement ok +INSERT INTO s1.example_3 (key, value) VALUES ('foo', 'bar'); + +statement ok +INSERT INTO s1.example_3 (key, value) VALUES ('baz', 'qux'); + +query II +SELECT data_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_data_file +---- + +statement ok +CALL ducklake.set_option('auto_compact', false, schema=> 'main') + +statement ok +CALL ducklake.set_option('auto_compact', false, schema=> 's1') + +statement ok +CALL ducklake.set_option('auto_compact', true, schema=> 'main', table_name =>'example_2') + +statement ok +CALL ducklake_flush_inlined_data('ducklake'); + +# We write a new file for table id 2 +query II +SELECT data_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +10 2 + +statement ok +CALL ducklake.set_option('auto_compact', true, schema=> 's1', table_name =>'example_2') + +statement ok +CALL ducklake_flush_inlined_data('ducklake'); + +query II +SELECT data_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +10 2 +11 5 + +statement ok +CALL ducklake.set_option('auto_compact', true, schema=> 's1') + +statement ok +CALL ducklake_flush_inlined_data('ducklake'); + +query II +SELECT data_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +10 2 +11 5 +12 4 +13 6 + +statement ok +CALL ducklake.set_option('auto_compact', true, schema=> 'main') + +statement ok +CALL ducklake_flush_inlined_data('ducklake'); + +query II +SELECT data_file_id, table_id FROM __ducklake_metadata_ducklake.ducklake_data_file +---- +10 2 +11 5 +12 4 +13 6 +14 1 \ No newline at end of file diff --git a/tests/sqllogictests/sql/data_inlining/inlining_issue_on_empty_inline.test b/tests/sqllogictests/sql/data_inlining/inlining_issue_on_empty_inline.test new file mode 100644 index 0000000..735e6c6 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/inlining_issue_on_empty_inline.test @@ -0,0 +1,46 @@ +# name: test/sql/data_inlining/inlining_issue_on_empty_inline.test +# description: test ducklake extension +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:__TEST_DIR__/inlining_issue_on_empty_inline.db' AS inlining (DATA_PATH '${DATA_PATH}/inlining_issue_on_empty_inline', DATA_INLINING_ROW_LIMIT 10); + +statement ok +use inlining; + +statement ok +CREATE TABLE tbl(col INTEGER); + +statement ok +INSERT INTO tbl VALUES (1), (2), (3); + +statement ok +CALL ducklake_flush_inlined_data('inlining'); + +statement ok +create table test (id integer); + +statement ok +ALTER TABLE test +ADD COLUMN k INTEGER; + +statement ok +use memory; + +statement ok +detach inlining + +statement ok +ATTACH 'ducklake:__TEST_DIR__/inlining_issue_on_empty_inline.db' AS inlining (DATA_PATH '${DATA_PATH}/inlining_issue_on_empty_inline', DATA_INLINING_ROW_LIMIT 10); + +statement ok +use inlining; + +statement ok +CALL ducklake_flush_inlined_data('inlining'); diff --git a/tests/sqllogictests/sql/data_inlining/inlining_unsupported_systems.test b/tests/sqllogictests/sql/data_inlining/inlining_unsupported_systems.test new file mode 100644 index 0000000..00a58b2 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/inlining_unsupported_systems.test @@ -0,0 +1,22 @@ +# name: test/sql/data_inlining/inlining_unsupported_systems.test +# description: test ducklake updating +# group: [data_inlining] + +require ducklake + +require parquet + +# partitioning based on a column +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/inlining_unsupported_systems', METADATA_CATALOG 'ducklake_metadata') + + +statement maybe +CALL ducklake.set_option('data_inlining_row_limit', '10'); +---- +Data Inlining is currently only implemented for DuckDB as a DBMS + diff --git a/tests/sqllogictests/sql/data_inlining/insert_inlining_concurrent.test b/tests/sqllogictests/sql/data_inlining/insert_inlining_concurrent.test new file mode 100644 index 0000000..d75f567 --- /dev/null +++ b/tests/sqllogictests/sql/data_inlining/insert_inlining_concurrent.test @@ -0,0 +1,34 @@ +# name: test/sql/data_inlining/insert_inlining_concurrent.test +# description: test ducklake threaded inlining +# group: [data_inlining] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/insert_inlining_concurrent', DATA_INLINING_ROW_LIMIT 10) + +statement ok +set threads to 10 + +statement ok +CREATE TABLE ducklake.test(i INTEGER) + +statement ok +INSERT INTO ducklake.test VALUES (1) + +query I +FROM ducklake.test +---- +1 + +statement ok +DROP TABLE ducklake.test; + +statement ok +DETACH ducklake diff --git a/tests/sqllogictests/sql/default/add_column_with_default.test b/tests/sqllogictests/sql/default/add_column_with_default.test new file mode 100644 index 0000000..05fb889 --- /dev/null +++ b/tests/sqllogictests/sql/default/add_column_with_default.test @@ -0,0 +1,113 @@ +# name: test/sql/default/add_column_with_default.test +# description: Test adding a column with default values with DuckLake +# group: [default] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_add_default', METADATA_CATALOG 'xx') + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok +INSERT INTO ducklake.test VALUES (1); + +statement ok +INSERT INTO ducklake.test VALUES (2); + +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.test ADD COLUMN j INTEGER DEFAULT 42 + +statement ok +INSERT INTO ducklake.test VALUES (100, 100) + +query II +FROM ducklake.test ORDER BY ALL +---- +1 42 +2 42 +100 100 + +statement ok +COMMIT + +query II +FROM ducklake.test ORDER BY ALL +---- +1 42 +2 42 +100 100 + +# alter the default +foreach finaltransaction ROLLBACK COMMIT + +statement ok +BEGIN + +statement ok +INSERT INTO ducklake.test DEFAULT VALUES + +statement ok +ALTER TABLE ducklake.test ALTER i SET DEFAULT 1000 + +statement ok +ALTER TABLE ducklake.test ALTER j DROP DEFAULT + +statement ok +INSERT INTO ducklake.test DEFAULT VALUES + +query II +FROM ducklake.test ORDER BY ALL +---- +1 42 +2 42 +100 100 +1000 NULL +NULL 42 + +statement ok +${finaltransaction} + +endloop + +query IIIIII +SELECT table_id, column_id, initial_default, default_value, default_value_type, default_value_dialect FROM xx.ducklake_column +---- +1 1 NULL NULL literal duckdb +1 2 42 42 literal duckdb +1 1 NULL 1000 literal duckdb +1 2 42 NULL literal duckdb + +statement ok +INSERT INTO ducklake.test DEFAULT VALUES + +query II +FROM ducklake.test ORDER BY ALL +---- +1 42 +2 42 +100 100 +1000 NULL +1000 NULL +NULL 42 + +statement error +ALTER TABLE ducklake.test ALTER nonexistent_column SET DEFAULT 1000 +---- +nonexistent_column + +statement error +ALTER TABLE ducklake.test ALTER nonexistent_column DROP DEFAULT +---- +nonexistent_column diff --git a/tests/sqllogictests/sql/default/default_expressions.test b/tests/sqllogictests/sql/default/default_expressions.test new file mode 100644 index 0000000..0dd8a1e --- /dev/null +++ b/tests/sqllogictests/sql/default/default_expressions.test @@ -0,0 +1,87 @@ +# name: test/sql/default/default_expressions.test +# description: Test adding a column with default expression with DuckLake +# group: [default] + +require ducklake + +require parquet + +require icu + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/default_expressions', METADATA_CATALOG 'xx') + +statement ok +use ducklake + +statement ok +create table t (id integer, created_at timestamp default now()); + +statement ok +insert into t(id) values(1); + +query I +select created_at < NOW() from t; +---- +true + +# Lets now create a table with +statement ok +create table t_1 (id integer, id_plus integer default 1); + +statement ok +insert into t_1(id) values (0); + +query II +FROM t_1 +---- +0 1 + +statement ok +ALTER TABLE t_1 ALTER id_plus SET DEFAULT round(pi()) + +statement ok +insert into t_1(id) values (1); + +query II +FROM t_1 +---- +0 1 +1 3 + + +# Try literal with function name +statement ok +create table t_2 (a integer, b varchar); + +statement ok +ALTER TABLE t_2 ALTER b SET DEFAULT 'random()'; + +statement ok +insert into t_2(a) values (1) + +query II +FROM t_2 +---- +1 random() + +query IIIII +SELECT table_id, column_id, default_value, default_value_type, default_value_dialect FROM xx.ducklake_column +---- +1 1 NULL literal duckdb +1 2 now() expression duckdb +2 1 NULL literal duckdb +2 2 1 literal duckdb +2 2 round(pi()) expression duckdb +3 1 NULL literal duckdb +3 2 NULL literal duckdb +3 2 random() literal duckdb + +statement error +ALTER TABLE t_2 ADD COLUMN j INTEGER DEFAULT RANDOM() +---- +We cannot add a column with a non-literal default value. Add the column and then explicitly set the default for new values using "ALTER ... SET DEFAULT" \ No newline at end of file diff --git a/tests/sqllogictests/sql/default/default_values.test b/tests/sqllogictests/sql/default/default_values.test new file mode 100644 index 0000000..40b75b3 --- /dev/null +++ b/tests/sqllogictests/sql/default/default_values.test @@ -0,0 +1,47 @@ +# name: test/sql/default/default_values.test +# description: Test default values with DuckLake +# group: [default] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_default_files', METADATA_CATALOG 'xx') + +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.test(i INTEGER DEFAULT 42, j INTEGER); + +statement ok +INSERT INTO ducklake.test (j) VALUES (100) + +statement ok +COMMIT + +statement ok +INSERT INTO ducklake.test (j) VALUES (200) + +query II +SELECT * FROM ducklake.test +---- +42 100 +42 200 + +statement ok +CREATE TABLE ducklake.test_special_values(i INTEGER, s1 VARCHAR DEFAULT '', s2 VARCHAR DEFAULT 'NULL'); + +statement ok +INSERT INTO ducklake.test_special_values (i) VALUES (100) + +query III +SELECT * FROM ducklake.test_special_values WHERE s2 IS NULL +---- +100 (empty) NULL diff --git a/tests/sqllogictests/sql/default/struct_field_default.test b/tests/sqllogictests/sql/default/struct_field_default.test new file mode 100644 index 0000000..7ad612b --- /dev/null +++ b/tests/sqllogictests/sql/default/struct_field_default.test @@ -0,0 +1,47 @@ +# name: test/sql/default/struct_field_default.test +# description: Test adding a struct field with default values +# group: [default] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_struct_default_files', METADATA_CATALOG 'xx') + + +statement ok +CREATE TABLE ducklake.test(col1 STRUCT(i INT, j INT)); + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 1, 'j': 2}) + +# add k INTEGER +statement ok +BEGIN + +statement ok +ALTER TABLE ducklake.test ADD COLUMN col1.k INTEGER DEFAULT 42 + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 100, 'j': 200, 'k': 300}) + +query I +FROM ducklake.test +---- +{'i': 1, 'j': 2, 'k': 42} +{'i': 100, 'j': 200, 'k': 300} + +statement ok +COMMIT + +query I +FROM ducklake.test +---- +{'i': 1, 'j': 2, 'k': 42} +{'i': 100, 'j': 200, 'k': 300} diff --git a/tests/sqllogictests/sql/delete/basic_delete.test b/tests/sqllogictests/sql/delete/basic_delete.test new file mode 100644 index 0000000..2f855f1 --- /dev/null +++ b/tests/sqllogictests/sql/delete/basic_delete.test @@ -0,0 +1,48 @@ +# name: test/sql/delete/basic_delete.test +# description: Test ducklake deletes +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_delete_files') + +statement ok +CREATE TABLE ducklake.test AS SELECT i id FROM range(1000) t(i); + +statement ok +INSERT INTO ducklake.test SELECT i id FROM range(15000, 16000) t(i) + +statement ok +BEGIN + +query I +DELETE FROM ducklake.test WHERE id%2=0 +---- +1000 + +query II +SELECT COUNT(*), COUNT(*) FILTER(WHERE id%2=0) FROM ducklake.test +---- +1000 0 + +statement ok +COMMIT + +query II +SELECT COUNT(*), COUNT(*) FILTER(WHERE id%2=0) FROM ducklake.test +---- +1000 0 + +# we can time travel to see the state of the table before deletes +query II +SELECT COUNT(*), COUNT(*) FILTER(WHERE id%2=0) FROM ducklake.test AT (VERSION => 2) +---- +2000 1000 diff --git a/tests/sqllogictests/sql/delete/delete_ignore_extra_columns.test b/tests/sqllogictests/sql/delete/delete_ignore_extra_columns.test new file mode 100644 index 0000000..3c1c86d --- /dev/null +++ b/tests/sqllogictests/sql/delete/delete_ignore_extra_columns.test @@ -0,0 +1,20 @@ +# name: test/sql/delete/delete_ignore_extra_columns.test +# description: Test ducklake deletes +# group: [delete] + +require ducklake + +require parquet + +unzip data/iceberg_deletes/delete_ignore_extra_columns.db.gz __TEST_DIR__/delete_ignore_extra_columns.db + +statement ok +ATTACH 'ducklake:__TEST_DIR__/delete_ignore_extra_columns.db' AS ducklake (DATA_PATH 'data/iceberg_deletes/delete_ignore_extra_columns', OVERRIDE_DATA_PATH TRUE) + +statement ok +use ducklake + +query I +SELECT count(*) from test; +---- +91 diff --git a/tests/sqllogictests/sql/delete/delete_join.test b/tests/sqllogictests/sql/delete/delete_join.test new file mode 100644 index 0000000..15ab9f5 --- /dev/null +++ b/tests/sqllogictests/sql/delete/delete_join.test @@ -0,0 +1,40 @@ +# name: test/sql/delete/delete_join.test +# description: Test ducklake delete using a join +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_delete_join_files') + +statement ok +CREATE TABLE ducklake.test AS SELECT i id FROM range(500) t(i); + +statement ok +BEGIN + +statement ok +INSERT INTO ducklake.test FROM range(500, 1000) + +statement ok +CREATE TEMPORARY TABLE deleted_rows AS FROM range(0, 1000, 2) t(delete_id); + +query I +DELETE FROM ducklake.test USING deleted_rows WHERE id=deleted_rows.delete_id +---- +500 + +statement ok +COMMIT + +query I +SELECT COUNT(*) FROM ducklake.test +---- +500 diff --git a/tests/sqllogictests/sql/delete/delete_rollback_cleanup.test b/tests/sqllogictests/sql/delete/delete_rollback_cleanup.test new file mode 100644 index 0000000..b6090ee --- /dev/null +++ b/tests/sqllogictests/sql/delete/delete_rollback_cleanup.test @@ -0,0 +1,40 @@ +# name: test/sql/delete/delete_rollback_cleanup.test +# description: Test ducklake cleaning up files after rollback of delete +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_delete_rollback_cleanup_files') + +statement ok +CREATE TABLE ducklake.test AS SELECT i id FROM range(1000) t(i); + +statement ok +BEGIN + +query I +DELETE FROM ducklake.test WHERE id%2=0 +---- +500 + +statement ok +ROLLBACK + +query I +SELECT COUNT(*) FROM ducklake.test +---- +1000 + +# verify that we don't have the delete file there anymore +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_delete_rollback_cleanup_files/*-delete.parquet') +---- +0 diff --git a/tests/sqllogictests/sql/delete/delete_same_transaction.test b/tests/sqllogictests/sql/delete/delete_same_transaction.test new file mode 100644 index 0000000..13b0d8c --- /dev/null +++ b/tests/sqllogictests/sql/delete/delete_same_transaction.test @@ -0,0 +1,60 @@ +# name: test/sql/delete/delete_same_transaction.test +# description: Test ducklake deleting and creating in the same transaction +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_delete_same_transaction_files') + +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.test AS SELECT i id FROM range(1000) t(i); + +query I +DELETE FROM ducklake.test WHERE id%2=0 +---- +500 + +query I +SELECT COUNT(*) FROM ducklake.test WHERE id<=250 +---- +125 + +query I +DELETE FROM ducklake.test WHERE id<=250 +---- +125 + +query I +SELECT COUNT(*) FROM ducklake.test WHERE id<=250 +---- +0 + +query II +SELECT COUNT(*), COUNT(*) FILTER(WHERE id%2=0) FROM ducklake.test +---- +375 0 + +statement ok +COMMIT + +# verify that we only have one delete file written after the commit (i.e. we cleaned up the first file +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_delete_same_transaction_files/main/test/*-delete.parquet') +---- +1 + +query II +SELECT COUNT(*), COUNT(*) FILTER(WHERE id%2=0) FROM ducklake.test +---- +375 0 diff --git a/tests/sqllogictests/sql/delete/empty_delete.test b/tests/sqllogictests/sql/delete/empty_delete.test new file mode 100644 index 0000000..e611263 --- /dev/null +++ b/tests/sqllogictests/sql/delete/empty_delete.test @@ -0,0 +1,39 @@ +# name: test/sql/delete/empty_delete.test +# description: Test ducklake empty delete +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_empty_delete_files') + +statement ok +CREATE TABLE ducklake.test AS SELECT i id FROM range(1000) t(i); + +statement ok +BEGIN + +query I +DELETE FROM ducklake.test WHERE id>10000 +---- +0 + +query II +SELECT COUNT(*), COUNT(*) FILTER(WHERE id%2=0) FROM ducklake.test +---- +1000 500 + +statement ok +COMMIT + +query II +SELECT COUNT(*), COUNT(*) FILTER(WHERE id%2=0) FROM ducklake.test +---- +1000 500 diff --git a/tests/sqllogictests/sql/delete/multi_deletes.test b/tests/sqllogictests/sql/delete/multi_deletes.test new file mode 100644 index 0000000..8ae5641 --- /dev/null +++ b/tests/sqllogictests/sql/delete/multi_deletes.test @@ -0,0 +1,67 @@ +# name: test/sql/delete/multi_deletes.test +# description: Test ducklake deleting multiple batches +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_multi_deletes_files') + +statement ok +CREATE TABLE ducklake.test AS SELECT i id FROM range(10000) t(i); + +# multiple deletes of the same table in the same transaction +statement ok +BEGIN + +query I +DELETE FROM ducklake.test WHERE id%8=0 +---- +1250 + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.test +---- +8750 43750000 + +query I +DELETE FROM ducklake.test WHERE id%4=0 +---- +1250 + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.test +---- +7500 37500000 + +statement ok +COMMIT + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.test +---- +7500 37500000 + +# verify that we only have one delete file written after the commit (i.e. we cleaned up the first file) +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_multi_deletes_files/main/test/*-delete.parquet') +---- +1 + +# deleting from the file again +query I +DELETE FROM ducklake.test WHERE id%2=0 +---- +2500 + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.test +---- +5000 25000000 diff --git a/tests/sqllogictests/sql/delete/truncate_table.test b/tests/sqllogictests/sql/delete/truncate_table.test new file mode 100644 index 0000000..b7027fb --- /dev/null +++ b/tests/sqllogictests/sql/delete/truncate_table.test @@ -0,0 +1,81 @@ +# name: test/sql/delete/truncate_table.test +# description: Test ducklake truncating a table +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_truncate_files') + +# transaction local truncate +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.test_local AS SELECT i id FROM range(10000) t(i); + +query I +SELECT COUNT(*) FROM ducklake.test_local +---- +10000 + +query I +DELETE FROM ducklake.test_local +---- +10000 + +query I +SELECT COUNT(*) FROM ducklake.test_local +---- +0 + +statement ok +COMMIT + +# verify that we are not writing any files here +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_truncate_files/*') +---- +0 + +statement ok +CREATE TABLE ducklake.test AS SELECT i id FROM range(10000) t(i); + +statement ok +BEGIN + +query I +DELETE FROM ducklake.test +---- +10000 + +query I +SELECT COUNT(*) FROM ducklake.test +---- +0 + +statement ok +COMMIT + +query I +SELECT COUNT(*) FROM ducklake.test +---- +0 + +# verify that we are not writing a delete file if we clear the file list +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_truncate_files/*-delete.parquet') +---- +0 + +query I +DELETE FROM ducklake.test +---- +0 diff --git a/tests/sqllogictests/sql/ducklake_basic.test b/tests/sqllogictests/sql/ducklake_basic.test new file mode 100644 index 0000000..f5fee38 --- /dev/null +++ b/tests/sqllogictests/sql/ducklake_basic.test @@ -0,0 +1,92 @@ +# name: test/sql/ducklake_basic.test +# description: test ducklake extension +# group: [sql] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER); + +query II +SELECT * FROM ducklake.test +---- + +statement ok +INSERT INTO ducklake.test VALUES (1, 2), (NULL, 3); + +query II +SELECT * FROM ducklake.test +---- +1 2 +NULL 3 + +query I +SELECT COUNT(*) FROM ducklake.test +---- +2 + +statement ok +INSERT INTO ducklake.test VALUES (4, 5), (6, 7); + +query II +SELECT * FROM ducklake.test +---- +1 2 +NULL 3 +4 5 +6 7 + +statement ok +CREATE TABLE ducklake.test2 AS SELECT 'hello world' AS j, DATE '1992-01-01' date + +query II +SELECT * FROM ducklake.test2 +---- +hello world 1992-01-01 + +# re-attach +statement ok +DETACH ducklake + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake.db' AS ducklake + +query II +SELECT * FROM ducklake.test +---- +1 2 +NULL 3 +4 5 +6 7 + +query II +SELECT * FROM ducklake.test2 +---- +hello world 1992-01-01 + +statement ok +SHOW ALL TABLES + +statement ok +USE ducklake + +query I +SHOW TABLES +---- +test +test2 + +# data path is not required for DuckDB databases +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_no_data_path.db' AS ducklake_no_data_path + +statement ok +CREATE TABLE ducklake_no_data_path.tbl(i INT); + +statement ok +INSERT INTO ducklake_no_data_path.tbl VALUES (42); diff --git a/tests/sqllogictests/sql/encryption/encryption.test b/tests/sqllogictests/sql/encryption/encryption.test new file mode 100644 index 0000000..ef989d3 --- /dev/null +++ b/tests/sqllogictests/sql/encryption/encryption.test @@ -0,0 +1,77 @@ +# name: test/sql/encryption/encryption.test +# description: Test ducklake encryption support +# group: [encryption] + +require ducklake + +require parquet + +require httpfs + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_encryption.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_encryption_files', ENCRYPTED) + +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.test AS SELECT i id FROM range(1000) t(i); + +# we can read them through ducklake +query II +SELECT COUNT(*), SUM(id) FROM ducklake.test +---- +1000 499500 + +statement ok +COMMIT + +# also after committing +query II +SELECT COUNT(*), SUM(id) FROM ducklake.test +---- +1000 499500 + +# we cannot read the files using a regular parquet read as they have been encrypted - we need the encryption key +statement error +SELECT * FROM '__TEST_DIR__/ducklake_encryption_files/**/*.parquet' +---- +encrypted + +# deletes are also encrypted +statement ok +BEGIN + +statement ok +DELETE FROM ducklake.test WHERE id%2=0 + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.test +---- +500 250000 + +statement ok +COMMIT + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.test +---- +500 250000 + +# similarly we cannot read deletes, as they are encrypted +statement error +SELECT * FROM '__TEST_DIR__/ducklake_encryption_files/**/*-del*.parquet' +---- +encrypted + +# restart an encrypted database works +statement ok +DETACH ducklake + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_encryption.db' AS ducklake + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.test +---- +500 250000 diff --git a/tests/sqllogictests/sql/encryption/partitioning_encryption.test b/tests/sqllogictests/sql/encryption/partitioning_encryption.test new file mode 100644 index 0000000..82320ab --- /dev/null +++ b/tests/sqllogictests/sql/encryption/partitioning_encryption.test @@ -0,0 +1,37 @@ +# name: test/sql/encryption/partitioning_encryption.test +# description: Test partitioning with encryption +# group: [encryption] + +require ducklake + +require parquet + +require httpfs + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_partitioning_encryption', METADATA_CATALOG 'ducklake_metadata', ENCRYPTED) + +statement ok +USE ducklake + +statement ok +CREATE TABLE partitioned_tbl(part_key INTEGER, values VARCHAR); + +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (part_key); + +statement ok +INSERT INTO partitioned_tbl SELECT i%2, concat('thisisastring_', i) FROM range(10000) t(i) + +# verify files are partitioned, but the information is not leaked in the filename +query III +SELECT data_file_id, partition_id, regexp_extract(path, '.*(part_key=[0-9])/.*', 1) FROM ducklake_metadata.ducklake_data_file +ORDER BY ALL +---- +0 2 (empty) +1 2 (empty) diff --git a/tests/sqllogictests/sql/functions/ducklake_snapshots.test b/tests/sqllogictests/sql/functions/ducklake_snapshots.test new file mode 100644 index 0000000..8a83917 --- /dev/null +++ b/tests/sqllogictests/sql/functions/ducklake_snapshots.test @@ -0,0 +1,160 @@ +# name: test/sql/functions/ducklake_snapshots.test +# description: View DuckLake Snapshots +# group: [functions] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_snapshots_files') + +# initial snapshot +query III +SELECT snapshot_id, schema_version, changes FROM ducklake.snapshots() +---- +0 0 {schemas_created=[main]} + +# perform some operations in separate snapshots +statement ok +CREATE SCHEMA ducklake.s1 + +statement ok +CREATE TABLE ducklake.s1.tbl(i INT); + +statement ok +INSERT INTO ducklake.s1.tbl VALUES (42); + +statement ok +DROP TABLE ducklake.s1.tbl + +statement ok +DROP SCHEMA ducklake.s1 + +query III +SELECT snapshot_id, schema_version, changes FROM ducklake.snapshots() +---- +0 0 {schemas_created=[main]} +1 1 {schemas_created=[s1]} +2 2 {tables_created=[s1.tbl]} +3 2 {tables_inserted_into=[2]} +4 3 {tables_dropped=[2]} +5 4 {schemas_dropped=[1]} + +# this transaction does nothing in a round-about way +# no snapshot is created here +statement ok +BEGIN + +statement ok +CREATE SCHEMA ducklake.s1 + +statement ok +CREATE TABLE ducklake.s1.tbl(i INT); + +statement ok +INSERT INTO ducklake.s1.tbl VALUES (42); + +statement ok +DROP TABLE ducklake.s1.tbl + +statement ok +DROP SCHEMA ducklake.s1 + +statement ok +COMMIT + +query III +SELECT snapshot_id, schema_version, changes FROM ducklake.snapshots() WHERE snapshot_id=6 +---- + +# this transaction actually makes some changes +statement ok +BEGIN + +statement ok +CREATE SCHEMA ducklake.s1 + +statement ok +CREATE TABLE ducklake.s1.tbl(i INT); + +statement ok +INSERT INTO ducklake.s1.tbl VALUES (42); + +statement ok +COMMIT + +query III +SELECT snapshot_id, schema_version, changes FROM ducklake.snapshots() WHERE snapshot_id=6 +---- +6 5 {schemas_created=[s1], tables_created=[s1.tbl], tables_inserted_into=[4]} + +# alter table +statement ok +ALTER TABLE ducklake.s1.tbl SET PARTITIONED BY (i) + +query III +SELECT snapshot_id, schema_version, changes FROM ducklake.snapshots() WHERE snapshot_id=7 +---- +7 6 {tables_altered=[4]} + +# create a table and alter it +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.s1.tbl2(i INT); + +statement ok +ALTER TABLE ducklake.s1.tbl2 SET PARTITIONED BY (i) + +statement ok +COMMIT + +query III +SELECT snapshot_id, schema_version, changes FROM ducklake.snapshots() WHERE snapshot_id=8 +---- +8 7 {tables_created=[s1.tbl2], tables_altered=[6]} + +# create a view +statement ok +CREATE VIEW ducklake.v1 AS SELECT 42 + +query III +SELECT snapshot_id, schema_version, changes FROM ducklake.snapshots() WHERE snapshot_id=9 +---- +9 8 {views_created=[main.v1]} + +statement ok +DROP VIEW ducklake.v1 + +query III +SELECT snapshot_id, schema_version, changes FROM ducklake.snapshots() WHERE snapshot_id=10 +---- +10 9 {views_dropped=[8]} + +# comments +statement ok +CREATE VIEW ducklake.comment_view AS SELECT 42; + +statement ok con1 +COMMENT ON VIEW ducklake.comment_view IS 'con1' + +query III +SELECT snapshot_id, schema_version, changes FROM ducklake.snapshots() WHERE snapshot_id=12 +---- +12 11 {views_altered=[9]} + +# deletes +statement ok +DELETE FROM ducklake.s1.tbl + +query III +SELECT snapshot_id, schema_version, changes FROM ducklake_snapshots('ducklake') WHERE snapshot_id=13 +---- +13 11 {tables_deleted_from=[4]} diff --git a/tests/sqllogictests/sql/functions/ducklake_table_info.test b/tests/sqllogictests/sql/functions/ducklake_table_info.test new file mode 100644 index 0000000..e4e8242 --- /dev/null +++ b/tests/sqllogictests/sql/functions/ducklake_table_info.test @@ -0,0 +1,28 @@ +# name: test/sql/functions/ducklake_table_info.test +# group: [functions] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_table_info_files') + +statement ok +CREATE TABLE ducklake.test(i INTEGER) + +statement ok +INSERT INTO ducklake.test FROM range(1000) + +statement ok +DELETE FROM ducklake.test WHERE i%2=0 + +query IIIIIII +SELECT table_name, schema_id, table_id, file_count, file_size_bytes > 0, delete_file_count, delete_file_size_bytes > 0 FROM ducklake.table_info(); +---- +test 0 1 1 true 1 true diff --git a/tests/sqllogictests/sql/general/attach_at_snapshot.test b/tests/sqllogictests/sql/general/attach_at_snapshot.test new file mode 100644 index 0000000..dbb3008 --- /dev/null +++ b/tests/sqllogictests/sql/general/attach_at_snapshot.test @@ -0,0 +1,56 @@ +# name: test/sql/general/attach_at_snapshot.test +# description: test ducklake extension +# group: [general] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_attach_at_snapshot.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_files_attach_at_snapshot') + +# snapshot 1 +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER); + +# snapshot 2 +statement ok +INSERT INTO ducklake.test VALUES (1, 2), (NULL, 3); + +statement ok +DETACH ducklake + +# attach at snapshot +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_attach_at_snapshot.db' AS ducklake (SNAPSHOT_VERSION 1) + +query II +FROM ducklake.test +---- + +# attaching at a specific snapshot means read-only +statement error +INSERT INTO ducklake.test VALUES (10, 100); +---- +read-only + +statement ok +DETACH ducklake + +# snapshot does not exist +statement error +ATTACH 'ducklake:__TEST_DIR__/ducklake_attach_at_snapshot.db' AS ducklake (SNAPSHOT_VERSION 33) +---- +No snapshot found at version 33 + +# cannot open a database at a specified snapshot in read_write mode +statement error +ATTACH 'ducklake:__TEST_DIR__/ducklake_attach_at_snapshot.db' AS ducklake (SNAPSHOT_VERSION 1, READ_WRITE) +---- +can only be used in read-only mode + +# cannot combine snapshot_version/snapshot_time +statement error +ATTACH 'ducklake:__TEST_DIR__/ducklake_attach_at_snapshot.db' AS ducklake (SNAPSHOT_VERSION 1, SNAPSHOT_TIME '2020-01-01') +---- +Cannot specify both VERSION and TIMESTAMP diff --git a/tests/sqllogictests/sql/general/data_path_tag.test b/tests/sqllogictests/sql/general/data_path_tag.test new file mode 100644 index 0000000..512a3eb --- /dev/null +++ b/tests/sqllogictests/sql/general/data_path_tag.test @@ -0,0 +1,73 @@ +# name: test/sql/general/data_path_tag.test +# description: test ducklakes database have a tag['data_path'] via duckdb_databases() +# group: [general] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_duckdb_tag_data_path.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_duckdb_tables_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok +INSERT INTO ducklake.test FROM range(100); + +query III +SELECT table_name, estimated_size, column_count FROM duckdb_tables() WHERE database_name='ducklake'; +---- +test 100 1 + +### Newly created DuckLake with DATA_PATH, returns the one provided + +query I +SELECT count(*) FROM (SELECT tags['data_path'] as data_path FROM duckdb_databases() WHERE tags['data_path'] IS NOT NULL AND data_path ILIKE '%ducklake_duckdb_tables_files%'); +---- +1 + +statement ok +DETACH ducklake; + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_duckdb_tag_data_path.db' AS ducklake; + +### Existing DuckLake with no DATA_PATH, returns the one from the metadata catalog + +query I +SELECT count(*) FROM (SELECT tags['data_path'] as data_path FROM duckdb_databases() WHERE tags['data_path'] IS NOT NULL AND data_path ILIKE '%ducklake_duckdb_tables_files%'); +---- +1 + +statement ok +DETACH ducklake; + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_duckdb_tag_data_path.db' AS ducklake (DATA_PATH something_else, OVERRIDE_DATA_PATH TRUE); + +### Existing DuckLake with different DATA_PATH, returns the currently provided one + +query I +SELECT count(*) FROM (SELECT tags['data_path'] as data_path FROM duckdb_databases() WHERE tags['data_path'] IS NOT NULL AND data_path ILIKE '%ducklake_duckdb_tables_files%'); +---- +0 + +query I +SELECT count(*) FROM (SELECT tags['data_path'] as data_path FROM duckdb_databases() WHERE tags['data_path'] IS NOT NULL AND data_path ILIKE '%something_else%'); +---- +1 + +statement ok +DETACH ducklake; + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_duckdb_tag_data_path.db' AS ducklake; + +### Existing DuckLake with no DATA_PATH, returns the one from the metadata catalog (the original one, not the overridden one) + +query I +SELECT count(*) FROM (SELECT tags['data_path'] as data_path FROM duckdb_databases() WHERE tags['data_path'] IS NOT NULL AND data_path ILIKE '%ducklake_duckdb_tables_files%'); +---- +1 + diff --git a/tests/sqllogictests/sql/general/database_size.test b/tests/sqllogictests/sql/general/database_size.test new file mode 100644 index 0000000..9995f75 --- /dev/null +++ b/tests/sqllogictests/sql/general/database_size.test @@ -0,0 +1,29 @@ +# name: test/sql/general/database_size.test +# description: test ducklake extension +# group: [general] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_database_size_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER); + +statement ok +INSERT INTO ducklake.test SELECT i, i + 100 FROm range(1000) t(i); + +statement ok +PRAGMA database_size + +query I +SELECT database_size <> '0 bytes' FROM PRAGMA_database_size() WHERE database_name='ducklake' +---- +true diff --git a/tests/sqllogictests/sql/general/default_path.test b/tests/sqllogictests/sql/general/default_path.test new file mode 100644 index 0000000..8ca65d4 --- /dev/null +++ b/tests/sqllogictests/sql/general/default_path.test @@ -0,0 +1,49 @@ +# name: test/sql/general/default_path.test +# description: test ducklake with default data path +# group: [general] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/ducklake_default_paths.db + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake + +# built-in paths +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER); + +statement ok +INSERT INTO ducklake.test VALUES (1, 2), (NULL, 3); + +query I +SELECT COUNT(*) FROM glob('__TEST_DIR__/ducklake_default_paths.db.files/**') WHERE 'main/test/' IN file.replace('\', '/') +---- +1 + +# custom schema +statement ok +CREATE SCHEMA ducklake.s1 + +statement ok +CREATE TABLE ducklake.s1.test(i INTEGER); + +statement ok +INSERT INTO ducklake.s1.test VALUES (42); + +query I +SELECT COUNT(*) FROM glob('__TEST_DIR__/ducklake_default_paths.db.files/**') WHERE 's1/test/' IN file.replace('\', '/') +---- +1 + +# special names +statement ok +CREATE SCHEMA ducklake."asd/fgh" + +statement ok +CREATE TABLE ducklake."asd/fgh"."🦆"(col INTEGER) + +statement ok +INSERT INTO ducklake."asd/fgh"."🦆" VALUES (42); diff --git a/tests/sqllogictests/sql/general/detach_ducklake.test b/tests/sqllogictests/sql/general/detach_ducklake.test new file mode 100644 index 0000000..75f1958 --- /dev/null +++ b/tests/sqllogictests/sql/general/detach_ducklake.test @@ -0,0 +1,30 @@ +# name: test/sql/general/detach_ducklake.test +# description: Test detaching ducklake +# group: [general] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_detach_files', METADATA_CATALOG 'ducklake_metadata') + +statement ok +DETACH ducklake + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_detach_files', METADATA_CATALOG 'ducklake_metadata') + +# what happens if we detach the metadata catalog? +statement ok +DETACH ducklake_metadata + +statement error +CREATE TABLE ducklake.tbl(i INTEGER) +---- +Catalog "ducklake_metadata" does not exist! diff --git a/tests/sqllogictests/sql/general/ducklake_read_only.test b/tests/sqllogictests/sql/general/ducklake_read_only.test new file mode 100644 index 0000000..98adaee --- /dev/null +++ b/tests/sqllogictests/sql/general/ducklake_read_only.test @@ -0,0 +1,36 @@ +# name: test/sql/general/ducklake_read_only.test +# description: test ducklake with read-only +# group: [general] + +require ducklake + +require parquet + +# non-existent +statement error +ATTACH 'ducklake:__TEST_DIR__/ducklake_read_only.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_read_only_files', READ_ONLY) +---- + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_read_only.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_read_only_files') + +statement ok +CREATE TABLE ducklake.tbl(i INTEGER) + +statement ok +DETACH ducklake + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_read_only.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_read_only_files', READ_ONLY) + +query I +FROM ducklake.tbl +---- + +statement error +INSERT INTO ducklake.tbl VALUES (42); +---- +read-only + + + diff --git a/tests/sqllogictests/sql/general/generated_columns.test b/tests/sqllogictests/sql/general/generated_columns.test new file mode 100644 index 0000000..fde2a69 --- /dev/null +++ b/tests/sqllogictests/sql/general/generated_columns.test @@ -0,0 +1,20 @@ +# name: test/sql/general/generated_columns.test +# description: Test generated columns in ducklake +# group: [general] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_generated') + +statement error +CREATE TABLE ducklake.t0(c0 INT AS (1), c1 INT); +---- +does not support diff --git a/tests/sqllogictests/sql/general/metadata_cache.test b/tests/sqllogictests/sql/general/metadata_cache.test new file mode 100644 index 0000000..7d0c771 --- /dev/null +++ b/tests/sqllogictests/sql/general/metadata_cache.test @@ -0,0 +1,34 @@ +# name: test/sql/general/metadata_cache.test +# description: Test COUNT(*) with Parquet metadata cache +# group: [general] + +require ducklake + +require parquet + +statement ok +SET parquet_metadata_cache=true; + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_pq_metadata_cache_files') + +statement ok +CREATE TABLE ducklake.tbl AS FROM range(1000) t(i); + +statement ok +DELETE FROM ducklake.tbl WHERE i%2=0 + +query I +SELECT COUNT(*) FROM ducklake.tbl +---- +500 + +query I +SELECT COUNT(*) FROM ducklake.tbl +---- +500 diff --git a/tests/sqllogictests/sql/general/metadata_parameters.test b/tests/sqllogictests/sql/general/metadata_parameters.test new file mode 100644 index 0000000..7ecb5ac --- /dev/null +++ b/tests/sqllogictests/sql/general/metadata_parameters.test @@ -0,0 +1,27 @@ +# name: test/sql/general/metadata_parameters.test +# description: Test attach with metadata parameters +# group: [general] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_metadata_parameters', META_TYPE 'DUCKDB') + +statement ok +CREATE TABLE ducklake.tbl AS FROM range(1000) t(i); + +query I +SELECT COUNT(*) FROM ducklake.tbl +---- +1000 + +statement error +ATTACH 'ducklake:${DATA_PATH}/ducklake_metadata_parametersxx.db' AS s (DATA_PATH '${DATA_PATH}/ducklake_metadata_parameters', META_TYPE 'DUCKDBXX') +---- +duckdbxx diff --git a/tests/sqllogictests/sql/general/missing_parquet.test b/tests/sqllogictests/sql/general/missing_parquet.test new file mode 100644 index 0000000..79e9edd --- /dev/null +++ b/tests/sqllogictests/sql/general/missing_parquet.test @@ -0,0 +1,25 @@ +# name: test/sql/general/missing_parquet.test +# description: Test with missing parquet extension +# group: [general] + +require ducklake + +require no_extension_autoloading "EXPECTED This is meant to test missing parquet extension" + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_missing_parquet') + +statement ok +SELECT snapshot_id, schema_version, changes FROM ducklake_snapshots('ducklake') + +statement error +CREATE TABLE ducklake.tbl AS FROM range(1000) t(i); +---- +Missing Extension Error: Could not load the copy function for "parquet". Try explicitly loading the "parquet" extension + +require parquet + +statement ok +CREATE TABLE ducklake.tbl AS FROM range(1000) t(i); diff --git a/tests/sqllogictests/sql/general/paths.test b/tests/sqllogictests/sql/general/paths.test new file mode 100644 index 0000000..a2eb03c --- /dev/null +++ b/tests/sqllogictests/sql/general/paths.test @@ -0,0 +1,73 @@ +# name: test/sql/general/paths.test +# description: test ducklake default paths +# group: [general] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_paths_files', METADATA_CATALOG 'ducklake_meta') + +# built-in paths +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER); + +statement ok +INSERT INTO ducklake.test VALUES (1, 2), (NULL, 3); + +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_paths_files/**') WHERE 'test' IN file +---- +1 + +# all paths are relative, and we always use / in DuckLake regardless of OS also for local paths +query I +SELECT case when '\' IN value then value else 'correct' end FROM ducklake_meta.ducklake_metadata WHERE key='data_path' +---- +correct + +query II +SELECT path, path_is_relative FROM ducklake_meta.ducklake_schema +---- +main/ true + +query II +SELECT path, path_is_relative FROM ducklake_meta.ducklake_table +---- +test/ true + +query I +SELECT path_is_relative FROM ducklake_meta.ducklake_data_file +---- +true + +# custom schema +statement ok +CREATE SCHEMA ducklake.s1 + +statement ok +CREATE TABLE ducklake.s1.test(i INTEGER); + +statement ok +INSERT INTO ducklake.s1.test VALUES (42); + +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_paths_files/**') WHERE 's1' IN file +---- +1 + +# special names +statement ok +CREATE SCHEMA ducklake."asd/fgh" + +statement ok +CREATE TABLE ducklake."asd/fgh"."🦆"(col INTEGER) + +statement ok +INSERT INTO ducklake."asd/fgh"."🦆" VALUES (42); diff --git a/tests/sqllogictests/sql/general/prepared_statement.test b/tests/sqllogictests/sql/general/prepared_statement.test new file mode 100644 index 0000000..f45668a --- /dev/null +++ b/tests/sqllogictests/sql/general/prepared_statement.test @@ -0,0 +1,32 @@ +# name: test/sql/general/prepared_statement.test +# description: Test prepared statements with DuckLake +# group: [general] + +require ducklake + +require parquet + +statement ok +SET parquet_metadata_cache=true; + +# FIXME: succeeds on v1.3 branch but fails on v1.3.0 release +mode skip + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_prepared_files') + +statement ok +CREATE TABLE ducklake.tbl AS FROM range(1000) t(i); + +statement ok +PREPARE v1 AS SELECT * FROM ducklake.tbl LIMIT 3 + +statement error +EXECUTE v1 +---- +this use case is not yet supported diff --git a/tests/sqllogictests/sql/general/recursive_metadata_catalog.test b/tests/sqllogictests/sql/general/recursive_metadata_catalog.test new file mode 100644 index 0000000..2317777 --- /dev/null +++ b/tests/sqllogictests/sql/general/recursive_metadata_catalog.test @@ -0,0 +1,17 @@ +# name: test/sql/general/recursive_metadata_catalog.test +# description: Test recursive metadata catalogs +# group: [general] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement error +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_recursive_files', METADATA_CATALOG 'ducklake') +---- + diff --git a/tests/sqllogictests/sql/geo/ducklake_geometry.test b/tests/sqllogictests/sql/geo/ducklake_geometry.test new file mode 100644 index 0000000..83bfd0a --- /dev/null +++ b/tests/sqllogictests/sql/geo/ducklake_geometry.test @@ -0,0 +1,105 @@ +# name: test/sql/geo/ducklake_geometry.test +# group: [geo] + +require ducklake + +require parquet + +require spatial + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +USE ducklake; + +statement ok +create table t1 (g GEOMETRY); + +statement ok +insert into t1 VALUES (ST_POINT(1,2)); + +query I +select * from t1; +---- +POINT (1 2) + +# Inspect file stats +query I +select extra_stats from ducklake_meta.ducklake_file_column_stats; +---- +{"bbox": {"xmin": 1.000000, "xmax": 1.000000, "ymin": 2.000000, "ymax": 2.000000, "zmin": null, "zmax": null, "mmin": null, "mmax": null}, "types": ["point"]} + +query I +select extra_stats from ducklake_meta.ducklake_table_column_stats; +---- +{"bbox": {"xmin": 1.000000, "xmax": 1.000000, "ymin": 2.000000, "ymax": 2.000000, "zmin": null, "zmax": null, "mmin": null, "mmax": null}, "types": ["point"]} + +statement ok +insert into t1 VALUES ('LINESTRING Z (5 5 5, 10 10 10)'::GEOMETRY); + +query I +select * from t1; +---- +POINT (1 2) +LINESTRING Z (5 5 5, 10 10 10) + +# Inspect file stats - should be separate for each file +query I +select extra_stats from ducklake_meta.ducklake_file_column_stats order by all; +---- +{"bbox": {"xmin": 1.000000, "xmax": 1.000000, "ymin": 2.000000, "ymax": 2.000000, "zmin": null, "zmax": null, "mmin": null, "mmax": null}, "types": ["point"]} +{"bbox": {"xmin": 5.000000, "xmax": 10.000000, "ymin": 5.000000, "ymax": 10.000000, "zmin": 5.000000, "zmax": 10.000000, "mmin": null, "mmax": null}, "types": ["linestring_z"]} + +# Check table stats are a union of the two files +query I +select extra_stats from ducklake_meta.ducklake_table_column_stats order by all; +---- +{"bbox": {"xmin": 1.000000, "xmax": 10.000000, "ymin": 2.000000, "ymax": 10.000000, "zmin": 5.000000, "zmax": 10.000000, "mmin": null, "mmax": null}, "types": ["linestring_z", "point"]} + +# Merge files into a single file and check stats remain the same +statement ok +CALL ducklake_merge_adjacent_files('ducklake') + +query I +select extra_stats from ducklake_meta.ducklake_file_column_stats order by all; +---- +{"bbox": {"xmin": 1.000000, "xmax": 10.000000, "ymin": 2.000000, "ymax": 10.000000, "zmin": 5.000000, "zmax": 10.000000, "mmin": null, "mmax": null}, "types": ["linestring_z", "point"]} + +# Insert a geometry with M dimension +statement ok +insert into t1 VALUES ('POINT M (20 20 5)'::GEOMETRY); + +# Insert a geometry with a ZM dimension +statement ok +insert into t1 VALUES ('POINT ZM (-30 -30 -30 -30)'::GEOMETRY); + +query I +select * from t1 ORDER BY ALL; +---- +POINT (1 2) +POINT M (20 20 5) +POINT ZM (-30 -30 -30 -30) +LINESTRING Z (5 5 5, 10 10 10) + +# Check stats +query I +select extra_stats from ducklake_meta.ducklake_file_column_stats order by all; +---- +{"bbox": {"xmin": -30.000000, "xmax": -30.000000, "ymin": -30.000000, "ymax": -30.000000, "zmin": -30.000000, "zmax": -30.000000, "mmin": -30.000000, "mmax": -30.000000}, "types": ["point_zm"]} +{"bbox": {"xmin": 1.000000, "xmax": 10.000000, "ymin": 2.000000, "ymax": 10.000000, "zmin": 5.000000, "zmax": 10.000000, "mmin": null, "mmax": null}, "types": ["linestring_z", "point"]} +{"bbox": {"xmin": 20.000000, "xmax": 20.000000, "ymin": 20.000000, "ymax": 20.000000, "zmin": null, "zmax": null, "mmin": 5.000000, "mmax": 5.000000}, "types": ["point_m"]} + +# Now merge all again +statement ok +CALL ducklake_merge_adjacent_files('ducklake') + +# Check merged stats +query I +select extra_stats from ducklake_meta.ducklake_file_column_stats order by all; +---- +{"bbox": {"xmin": -30.000000, "xmax": 20.000000, "ymin": -30.000000, "ymax": 20.000000, "zmin": -30.000000, "zmax": 10.000000, "mmin": -30.000000, "mmax": 5.000000}, "types": ["linestring_z", "point", "point_m", "point_zm"]} diff --git a/tests/sqllogictests/sql/geo/ducklake_geometry_add_files.test b/tests/sqllogictests/sql/geo/ducklake_geometry_add_files.test new file mode 100644 index 0000000..198a82c --- /dev/null +++ b/tests/sqllogictests/sql/geo/ducklake_geometry_add_files.test @@ -0,0 +1,51 @@ +# name: test/sql/geo/ducklake_geometry_add_files.test +# group: [geo] + +require ducklake + +require parquet + +require spatial + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake ( + DATA_PATH '${DATA_PATH}/ducklake_files', + METADATA_CATALOG 'ducklake_meta' +) + +# Copy over a test parquet file with geometry column +statement ok +COPY (select st_point(5, 5) as g) TO '${DATA_PATH}/points_geoparquet.parquet' (FORMAT PARQUET); + +statement ok +COPY (select st_point(5, 5) as g) TO '${DATA_PATH}/points.parquet' (FORMAT PARQUET, GEOPARQUET_VERSION NONE); + +statement ok +USE ducklake; + +statement ok +create table t1 (g GEOMETRY); + +# Now add the file +statement ok +CALL ducklake_add_data_files('ducklake', 't1', '${DATA_PATH}/points.parquet'); + +statement error +CALL ducklake_add_data_files('ducklake', 't1', '${DATA_PATH}/points_geoparquet.parquet'); +---- +* Expected type "GEOMETRY" but found type "BLOB". Is this a GeoParquet v1.*.* file? DuckLake only supports GEOMETRY types stored in native Parquet(V3) format, not GeoParquet(v1.*.*) + + +query I +select * from t1; +---- +POINT (5 5) + +query I +select extra_stats from ducklake_meta.ducklake_file_column_stats order by all; +---- +{"bbox": {"xmin": 5.000000, "xmax": 5.000000, "ymin": 5.000000, "ymax": 5.000000, "zmin": null, "zmax": null, "mmin": null, "mmax": null}, "types": ["point"]} \ No newline at end of file diff --git a/tests/sqllogictests/sql/geo/ducklake_geometry_inlining.test b/tests/sqllogictests/sql/geo/ducklake_geometry_inlining.test new file mode 100644 index 0000000..21b151f --- /dev/null +++ b/tests/sqllogictests/sql/geo/ducklake_geometry_inlining.test @@ -0,0 +1,36 @@ +# name: test/sql/geo/ducklake_geometry_inlining.test +# group: [geo] + +require ducklake + +require parquet + +require spatial + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake ( + DATA_PATH '${DATA_PATH}/ducklake_files', + METADATA_CATALOG 'ducklake_meta', + DATA_INLINING_ROW_LIMIT 5 +) + +statement ok +USE ducklake; + +statement ok +create table t1 (g GEOMETRY); + +statement error +insert into t1 VALUES (ST_POINT(1,2)); +---- +Not implemented Error: DuckLake does not yet support data-inlining of 'GEOMETRY' columns + + +statement error +create table t2 as select ST_POINT(1,2) as g; +---- +Not implemented Error: DuckLake does not yet support data-inlining of 'GEOMETRY' columns diff --git a/tests/sqllogictests/sql/geo/ducklake_geometry_merge.test b/tests/sqllogictests/sql/geo/ducklake_geometry_merge.test new file mode 100644 index 0000000..43c69b1 --- /dev/null +++ b/tests/sqllogictests/sql/geo/ducklake_geometry_merge.test @@ -0,0 +1,63 @@ +# name: test/sql/geo/ducklake_geometry_merge.test +# group: [geo] + +require ducklake + +require parquet + +require spatial + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +USE ducklake; + +statement ok +create table t1 (i INT, g GEOMETRY); + +statement ok +insert into t1 VALUES (1, ST_POINT(1,2)); + +query II +select * from t1; +---- +1 POINT (1 2) + +# Merge into +statement ok +MERGE INTO t1 USING + (SELECT 1 as i, 'LINESTRING Z (5 5 5, 10 10 10)'::GEOMETRY AS g) AS src + ON t1.i = src.i + WHEN MATCHED THEN UPDATE + WHEN NOT MATCHED THEN INSERT; + +query II +select * from t1; +---- +1 LINESTRING Z (5 5 5, 10 10 10) + +# Normal update +statement ok +UPDATE t1 SET g = ST_POINT(3,4) WHERE i = 1; + +query II +select * from t1; +---- +1 POINT (3 4) + +# Also test with an alter table +statement ok +ALTER TABLE t1 ADD COLUMN g2 geometry; + +statement ok +UPDATE t1 SET g2 = ST_POINT(7,8) WHERE i = 1 + +query III +select * from t1; +---- +1 POINT (3 4) POINT (7 8) diff --git a/tests/sqllogictests/sql/geo/ducklake_geometry_nested.test b/tests/sqllogictests/sql/geo/ducklake_geometry_nested.test new file mode 100644 index 0000000..7034d8f --- /dev/null +++ b/tests/sqllogictests/sql/geo/ducklake_geometry_nested.test @@ -0,0 +1,33 @@ +# name: test/sql/geo/ducklake_geometry_nested.test +# group: [geo] + +require ducklake + +require parquet + +require spatial + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +USE ducklake; + +statement error +create table t1 (g GEOMETRY[]); +---- +Invalid Input Error: GEOMETRY type is only supported as a top-level type + +statement error +create table t2 (g struct(a GEOMETRY)); +---- +Invalid Input Error: GEOMETRY type is only supported as a top-level type + +statement error +create table t3 (g map(int, GEOMETRY)); +---- +Invalid Input Error: GEOMETRY type is only supported as a top-level type \ No newline at end of file diff --git a/tests/sqllogictests/sql/initialize/ducklake_create_new.test b/tests/sqllogictests/sql/initialize/ducklake_create_new.test new file mode 100644 index 0000000..0001cab --- /dev/null +++ b/tests/sqllogictests/sql/initialize/ducklake_create_new.test @@ -0,0 +1,24 @@ +# name: test/sql/initialize/ducklake_create_new.test +# description: test ducklake extension +# group: [initialize] + +require ducklake + +require parquet + +# when CREATE_IF_NOT_EXISTS is specified we cannot connect to a non-existent ducklake +statement error +ATTACH 'ducklake:__TEST_DIR__/ducklake_non_existent.db' AS ducklake (CREATE_IF_NOT_EXISTS false) +---- +creating a new DuckLake is explicitly disabled + +# create a new one +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_existent.db' AS ducklake (CREATE_IF_NOT_EXISTS true) + +statement ok +DETACH ducklake; + +# we can attach to an existing ducklake +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_existent.db' AS ducklake (CREATE_IF_NOT_EXISTS false) diff --git a/tests/sqllogictests/sql/initialize/read_only_mode.test b/tests/sqllogictests/sql/initialize/read_only_mode.test new file mode 100644 index 0000000..cfbcb43 --- /dev/null +++ b/tests/sqllogictests/sql/initialize/read_only_mode.test @@ -0,0 +1,28 @@ +# name: test/sql/initialize/read_only_mode.test +# description: test ducklake on realy only mode +# group: [initialize] + +require ducklake + +require parquet + +statement ok +ATTACH '__TEST_DIR__/ducklake_read_only.db' AS db1 + +statement ok +DETACH db1; + +statement error +ATTACH 'ducklake:__TEST_DIR__/ducklake_read_only.db' AS ducklake (READONLY 1, CREATE_IF_NOT_EXISTS false) +---- +creating a new DuckLake is explicitly disabled + +statement error +ATTACH 'ducklake:__TEST_DIR__/ducklake_read_only.db' AS ducklake (READONLY 1) +---- +creating a new DuckLake is explicitly disabled + +statement error +ATTACH 'ducklake:__TEST_DIR__/ducklake_read_only.db' AS ducklake (READONLY 1, CREATE_IF_NOT_EXISTS true) +---- +"CREATE" on database "__ducklake_metadata_ducklake" which is attached in read-only mode! \ No newline at end of file diff --git a/tests/sqllogictests/sql/insert/insert_column_list.test b/tests/sqllogictests/sql/insert/insert_column_list.test new file mode 100644 index 0000000..2635560 --- /dev/null +++ b/tests/sqllogictests/sql/insert/insert_column_list.test @@ -0,0 +1,64 @@ +# name: test/sql/insert/insert_column_list.test +# description: test ducklake insert column list +# group: [insert] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_insert_list_files') + +statement ok +CREATE TABLE ducklake.test(i INTEGER, j VARCHAR); + +statement ok +INSERT INTO ducklake.test (j, i) VALUES ('hello', 84); + +query II +SELECT * FROM ducklake.test +---- +84 hello + +# insert default value +statement ok +INSERT INTO ducklake.test (j) VALUES ('world'); + +statement ok +INSERT INTO ducklake.test (i) VALUES (100); + +query II +SELECT * FROM ducklake.test +---- +84 hello +NULL world +100 NULL + +statement ok +INSERT INTO ducklake.test DEFAULT VALUES + +query II +SELECT * FROM ducklake.test +---- +84 hello +NULL world +100 NULL +NULL NULL + +statement ok +INSERT INTO ducklake.test VALUES (1000, DEFAULT), (DEFAULT, 'xxx'); + +query II +SELECT * FROM ducklake.test +---- +84 hello +NULL world +100 NULL +NULL NULL +1000 NULL +NULL xxx diff --git a/tests/sqllogictests/sql/insert/insert_file_size.test b/tests/sqllogictests/sql/insert/insert_file_size.test new file mode 100644 index 0000000..8ed7b2c --- /dev/null +++ b/tests/sqllogictests/sql/insert/insert_file_size.test @@ -0,0 +1,32 @@ +# name: test/sql/insert/insert_file_size.test +# description: test ducklake split up insert across files +# group: [insert] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_insert_file_size') + +statement ok +CREATE TABLE ducklake.test(id INTEGER, s VARCHAR); + +statement ok +CALL ducklake.set_option('target_file_size', '10KB') + +query I +INSERT INTO ducklake.test SELECT i, concat('thisisalongstring', i) FROM range(500000) t(i) +---- +500000 + +# we should be splitting this up into multiple files +query I +SELECT COUNT(*) > 1 FROM glob('${DATA_PATH}/ducklake_insert_file_size/main/test/*.parquet') +---- +true diff --git a/tests/sqllogictests/sql/insert/insert_into_self.test b/tests/sqllogictests/sql/insert/insert_into_self.test new file mode 100644 index 0000000..e70a362 --- /dev/null +++ b/tests/sqllogictests/sql/insert/insert_into_self.test @@ -0,0 +1,54 @@ +# name: test/sql/insert/insert_into_self.test +# description: Test ducklake insert into self +# group: [insert] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_insert_self_files') + +statement ok +CREATE TABLE ducklake.test(i INTEGER, j VARCHAR); + +statement ok +BEGIN + +query I +INSERT INTO ducklake.test VALUES (1, '2'), (NULL, '3'); +---- +2 + +query I +INSERT INTO ducklake.test FROM ducklake.test +---- +2 + +query I +INSERT INTO ducklake.test FROM ducklake.test +---- +4 + +query I +INSERT INTO ducklake.test FROM ducklake.test +---- +8 + +query I +INSERT INTO ducklake.test SELECT a.i, a.j FROM ducklake.test a, ducklake.test b +---- +256 + +statement ok +COMMIT + +query III +SELECT SUM(i), SUM(STRLEN(j)), COUNT(*) FROM ducklake.test +---- +136 272 272 diff --git a/tests/sqllogictests/sql/issues/late_materialization.test b/tests/sqllogictests/sql/issues/late_materialization.test new file mode 100644 index 0000000..57014cf --- /dev/null +++ b/tests/sqllogictests/sql/issues/late_materialization.test @@ -0,0 +1,44 @@ +# name: test/sql/issues/late_materialization.test +# description: Test late materialization +# group: [issues] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_late_materialization'); + +statement ok +USE ducklake + +statement ok +CREATE TABLE my_table(id INTEGER, value VARCHAR); + +statement ok +INSERT INTO my_table VALUES (1, 'hello'); + +statement ok +INSERT INTO my_table VALUES (2, 'world'); + +statement ok +INSERT INTO my_table VALUES (3, 'this'); + +statement ok +INSERT INTO my_table VALUES (4, 'is'); + +statement ok +INSERT INTO my_table VALUES (5, 'a'); + +statement ok +INSERT INTO my_table VALUES (6, 'test'); + +query II +SELECT * FROM my_table WHERE id > 3 ORDER BY value DESC LIMIT 1 +---- +6 test diff --git a/tests/sqllogictests/sql/list_files/ducklake_list_files.test b/tests/sqllogictests/sql/list_files/ducklake_list_files.test new file mode 100644 index 0000000..dff15b2 --- /dev/null +++ b/tests/sqllogictests/sql/list_files/ducklake_list_files.test @@ -0,0 +1,118 @@ +# name: test/sql/list_files/ducklake_list_files.test +# description: test duckdb_tables with ducklake +# group: [list_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_list_files'); + +# snapshot 1 +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +# snapshot 2 +statement ok +INSERT INTO ducklake.test FROM range(100); + +# snapshot 3 +statement ok +INSERT INTO ducklake.test FROM range(100, 200); + +# snapshot 4 +statement ok +INSERT INTO ducklake.test FROM range(200, 300); + +# partitions +# snapshot 5 +statement ok +CREATE TABLE ducklake.partitioned_tbl(part_key INT, value INT); + +# snapshot 6 +statement ok +ALTER TABLE ducklake.partitioned_tbl SET PARTITIONED BY (part_key); + +# snapshot 7 +statement ok +INSERT INTO ducklake.partitioned_tbl VALUES (1, 50), (2, 100); + +query I +SELECT COUNT(*) FROM ducklake_list_files('ducklake', 'test') +---- +3 + +query I +SELECT COUNT(*) FROM ducklake_list_files('ducklake', 'partitioned_tbl') +---- +2 + +# schema parameter +query I +SELECT COUNT(*) FROM ducklake_list_files('ducklake', 'test', schema => 'main') +---- +3 + +# query from DuckLake directly, and then query using the file list directly +query IIII +SELECT MIN(i), MAX(i), COUNT(*), AVG(i) FROM ducklake.test +---- +0 299 300 149.5 + +statement ok +SET VARIABLE parquet_files = (SELECT LIST(data_file) FROM ducklake_list_files('ducklake', 'test')) + +query IIII +SELECT MIN(i), MAX(i), COUNT(*), AVG(i) FROM read_parquet(getvariable('parquet_files')) +---- +0 299 300 149.5 + +# get a file list at a specified version +query I +SELECT COUNT(*) FROM ducklake_list_files('ducklake', 'test', snapshot_version => 2) +---- +1 + +# at a specified timestamp +query I +SELECT COUNT(*) FROM ducklake_list_files('ducklake', 'test', snapshot_time => NOW()) +---- +3 + +# no delete files currently +query I +SELECT COUNT(*) FROM ducklake_list_files('ducklake', 'test') WHERE delete_file IS NOT NULL +---- +0 + +# now delete +statement ok +DELETE FROM ducklake.test WHERE i%2=0 AND i < 150 + +# we have two delete files now (we did not delete from the last file) +query I +SELECT COUNT(*) FROM ducklake_list_files('ducklake', 'test') WHERE delete_file IS NOT NULL +---- +2 + +statement error +SELECT COUNT(*) FROM ducklake_list_files('ducklake', 'test', schema => 'unknown_schema') +---- +does not exist + +statement error +SELECT COUNT(*) FROM ducklake_list_files('ducklake', 'unknown_table', schema => 'main') +---- +does not exist + +# cannot specify both snapshot version and time +statement error +SELECT COUNT(*) FROM ducklake_list_files('ducklake', 'test', snapshot_version => 2, snapshot_time => NOW()) +---- +not both diff --git a/tests/sqllogictests/sql/macros/test_attach_timetravel.test b/tests/sqllogictests/sql/macros/test_attach_timetravel.test new file mode 100644 index 0000000..d5ff64f --- /dev/null +++ b/tests/sqllogictests/sql/macros/test_attach_timetravel.test @@ -0,0 +1,48 @@ +# name: test/sql/macros/test_attach_timetravel.test +# description: test attach timetravel +# group: [macros] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/test_attach_timetravel.db' AS ducklake (DATA_PATH '__TEST_DIR__/test_attach_timetravel'); + +statement ok +use ducklake + +statement ok +CREATE MACRO simple(a) AS a; + +statement ok +DROP MACRO simple + +query II +select snapshot_id, changes FROM snapshots() +---- +0 {schemas_created=[main]} +1 {scalar_macros_created=['main."simple"']} +2 {scalar_macros_dropped=[1]} + +statement error +select simple(1) +---- +Scalar Function with name simple does not exist! + +statement ok +USE memory + +statement ok +DETACH ducklake + +statement ok +ATTACH 'ducklake:__TEST_DIR__/test_attach_timetravel.db' AS ducklake (DATA_PATH '__TEST_DIR__/test_attach_timetravel', SNAPSHOT_VERSION 1); + +statement ok +use ducklake + +query I +select simple(1) +---- +1 diff --git a/tests/sqllogictests/sql/macros/test_default_parameter.test b/tests/sqllogictests/sql/macros/test_default_parameter.test new file mode 100644 index 0000000..42af83b --- /dev/null +++ b/tests/sqllogictests/sql/macros/test_default_parameter.test @@ -0,0 +1,60 @@ +# name: test/sql/macros/test_default_parameter.test +# description: test macro with a default parameter +# group: [macros] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/test_default_parameter'); + +statement ok +use ducklake + +statement ok +CREATE MACRO add_default(a, b := 5) AS a + b; + +# Check Macro Tables +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro; +---- +0 1 add_default 1 NULL + +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_impl; +---- +1 0 duckdb (a + b) scalar + +query IIIIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_parameters; +---- +1 0 0 a unknown NULL unknown +1 0 1 b unknown 5 int32 + +query I +select add_default(1) +---- +6 + +query I +select add_default(1,3) +---- +4 + +statement ok +CREATE MACRO add_default_sec(a :=5) AS a; + +query I +select add_default_sec('a') +---- +a + +query I +select add_default_sec() +---- +5 \ No newline at end of file diff --git a/tests/sqllogictests/sql/macros/test_defined_types.test b/tests/sqllogictests/sql/macros/test_defined_types.test new file mode 100644 index 0000000..6032d05 --- /dev/null +++ b/tests/sqllogictests/sql/macros/test_defined_types.test @@ -0,0 +1,105 @@ +# name: test/sql/macros/test_defined_types.test +# description: test macro where types are explicitly user-defined +# group: [macros] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/test_defined_types'); + +statement ok +USE ducklake; + +statement ok +CREATE MACRO add_one(x INTEGER) AS (x + 1); + +# Check Macro Tables +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro; +---- +0 1 add_one 1 NULL + +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_impl; +---- +1 0 duckdb (x + 1) scalar + +query IIIIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_parameters; +---- +1 0 0 x int32 NULL unknown + +query I +select add_one(1) +---- +2 + + +# Check with default parameter and defined type on different parameters +statement ok +CREATE MACRO add_two_def(x INTEGER, y := 5) AS (x + y); + +# Check Macro Tables +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro; +---- +0 1 add_one 1 NULL +0 2 add_two_def 2 NULL + +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_impl; +---- +1 0 duckdb (x + 1) scalar +2 0 duckdb (x + y) scalar + +query IIIIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_parameters; +---- +1 0 0 x int32 NULL unknown +2 0 0 x int32 NULL unknown +2 0 1 y unknown 5 int32 + +query I +select add_two_def(1) +---- +6 + +# Check with default parameter and defined type on same parameters + +statement ok +CREATE MACRO add_one_same(x BIGINT := 1) AS (x + 1); + +# Check Macro Tables +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro; +---- +0 1 add_one 1 NULL +0 2 add_two_def 2 NULL +0 3 add_one_same 3 NULL + +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_impl; +---- +1 0 duckdb (x + 1) scalar +2 0 duckdb (x + y) scalar +3 0 duckdb (x + 1) scalar + +query IIIIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_parameters; +---- +1 0 0 x int32 NULL unknown +2 0 0 x int32 NULL unknown +2 0 1 y unknown 5 int32 +3 0 0 x int64 1 int32 + +query I +select add_two_def(1) +---- +6 + diff --git a/tests/sqllogictests/sql/macros/test_macro_multiple_connections.test b/tests/sqllogictests/sql/macros/test_macro_multiple_connections.test new file mode 100644 index 0000000..8435bde --- /dev/null +++ b/tests/sqllogictests/sql/macros/test_macro_multiple_connections.test @@ -0,0 +1,111 @@ +# name: test/sql/macros/test_macro_multiple_connections.test +# description: test macros are transaction safe over multiple connections +# group: [macros] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/test_macro_multiple_connections'); + +statement ok +USE ducklake + +statement ok con1 +USE ducklake + +statement ok con2 +USE ducklake + +# two transactions try to create a macro conflict + +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE MACRO add_one(x INTEGER) AS (x + 1); + +statement ok con2 +CREATE MACRO add_one(x INTEGER) AS (x + 1); + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- +conflict + +# two transactions try to drop a macro: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +DROP MACRO add_one; + +statement ok con2 +DROP MACRO add_one; + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- +conflict + +statement ok +CREATE TABLE test_tbl (id INT, name string); + + +# Doing a scalar and table macro on different transactions should be ok +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE MACRO gimme_one() AS 1; + +statement ok con2 +CREATE MACRO gimme_one() as TABLE SELECT * FROM test_tbl LIMIT 1; + +statement ok con1 +COMMIT + +statement ok con2 +COMMIT + + +# Two transactions trying to drop same macro table should fail +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +DROP MACRO TABLE gimme_one; + +statement ok con2 +DROP MACRO TABLE gimme_one; + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- +conflict diff --git a/tests/sqllogictests/sql/macros/test_macro_tables.test b/tests/sqllogictests/sql/macros/test_macro_tables.test new file mode 100644 index 0000000..2191066 --- /dev/null +++ b/tests/sqllogictests/sql/macros/test_macro_tables.test @@ -0,0 +1,54 @@ +# name: test/sql/macros/test_macro_tables.test +# description: test table macros +# group: [macros] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/test_defined_types'); + +statement ok +USE ducklake; + +statement ok +CREATE TABLE test_tbl (id INT, name string); + +statement ok +INSERT INTO test_tbl VALUES (1,'tom'), (2,'dick'),(3,'harry'), (4,'mary'), (5,'mungo'), (6,'midge'); + +statement ok +CREATE MACRO xt(a,_name) as TABLE SELECT * FROM test_tbl WHERE(id>=a or name=_name); + +# Check Macro Tables +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro; +---- +0 2 xt 3 NULL + +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_impl; +---- +2 0 duckdb SELECT * FROM test_tbl WHERE ((id >= a) OR ("name" = _name)) table + + +query IIIIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_parameters; +---- +2 0 0 a unknown NULL unknown +2 0 1 _name unknown NULL unknown + +query II +FROM xt(5,'tom'); +---- +1 tom +5 mungo +6 midge + +statement ok +DROP MACRO TABLE xt; \ No newline at end of file diff --git a/tests/sqllogictests/sql/macros/test_macro_transactions.test b/tests/sqllogictests/sql/macros/test_macro_transactions.test new file mode 100644 index 0000000..90deb52 --- /dev/null +++ b/tests/sqllogictests/sql/macros/test_macro_transactions.test @@ -0,0 +1,106 @@ +# name: test/sql/macros/test_macro_transactions.test +# description: test macros are transaction safe +# group: [macros] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/test_macro_transactions'); + +statement ok +use ducklake + +# transaction-local macro drop and re-create +statement ok +CREATE MACRO simple(a) AS a; + +statement ok +BEGIN + +statement error +CREATE MACRO simple(a) AS a; +---- +Macro Function with name "simple" already exists + +statement ok +DROP MACRO simple + +statement error +select simple(1) +---- +does not exist + +statement ok +CREATE MACRO simple(a) AS a; + +query I +select simple(1) +---- +1 + +statement ok +COMMIT + +query I +select simple(1) +---- +1 + +# Check schema drop in transaction + +statement ok +CREATE SCHEMA test; + +statement ok +CREATE MACRO test.simple(a) AS a; + +statement ok +BEGIN + +statement error +DROP schema test; +---- +CASCADE + +statement ok +DROP MACRO test.simple + +statement ok +DROP schema test; + +statement ok +COMMIT + +# Create and drop macro in same transaction +statement ok +BEGIN + +statement error +CREATE MACRO simple(a) AS a; +---- +Macro Function with name "simple" already exists + +statement ok +DROP MACRO simple + +statement error +select simple(1) +---- +does not exist + +statement ok +CREATE MACRO simple(a) AS a; + +query I +select simple(1) +---- +1 + +statement ok +COMMIT \ No newline at end of file diff --git a/tests/sqllogictests/sql/macros/test_multiple_implementations.test b/tests/sqllogictests/sql/macros/test_multiple_implementations.test new file mode 100644 index 0000000..b0dfef0 --- /dev/null +++ b/tests/sqllogictests/sql/macros/test_multiple_implementations.test @@ -0,0 +1,90 @@ +# name: test/sql/macros/test_multiple_implementations.test +# group: [macros] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/test_multiple_implementations'); + +statement ok +use ducklake + +statement ok +CREATE MACRO multi_add + () AS 0, + (a) AS a, + (a, b) AS a + b, + (a, b, c) AS a + b + c, + (a, b, c, d) AS a + b + c + d, + (a, b, c, d, e) AS a + b + c + d + e + +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro; +---- +0 1 multi_add 1 NULL + +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_impl; +---- +1 0 duckdb 0 scalar +1 1 duckdb a scalar +1 2 duckdb (a + b) scalar +1 3 duckdb ((a + b) + c) scalar +1 4 duckdb (((a + b) + c) + d) scalar +1 5 duckdb ((((a + b) + c) + d) + e) scalar + + +query IIIIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_parameters; +---- +1 1 0 a unknown NULL unknown +1 2 0 a unknown NULL unknown +1 2 1 b unknown NULL unknown +1 3 0 a unknown NULL unknown +1 3 1 b unknown NULL unknown +1 3 2 c unknown NULL unknown +1 4 0 a unknown NULL unknown +1 4 1 b unknown NULL unknown +1 4 2 c unknown NULL unknown +1 4 3 d unknown NULL unknown +1 5 0 a unknown NULL unknown +1 5 1 b unknown NULL unknown +1 5 2 c unknown NULL unknown +1 5 3 d unknown NULL unknown +1 5 4 e unknown NULL unknown + +query I +SELECT multi_add(); +---- +0 + +query I +SELECT multi_add(1); +---- +1 + +query I +SELECT multi_add(1,1); +---- +2 + +query I +SELECT multi_add(1,1,1); +---- +3 + +query I +SELECT multi_add(1,1,1,1); +---- +4 + +query I +SELECT multi_add(1,1,1,1,1); +---- +5 \ No newline at end of file diff --git a/tests/sqllogictests/sql/macros/test_scalar_table_macros.test b/tests/sqllogictests/sql/macros/test_scalar_table_macros.test new file mode 100644 index 0000000..2f6a01f --- /dev/null +++ b/tests/sqllogictests/sql/macros/test_scalar_table_macros.test @@ -0,0 +1,72 @@ +# name: test/sql/macros/test_scalar_table_macros.test +# description: test table macros +# group: [macros] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/test_defined_types'); + +statement ok +USE ducklake; + +statement ok +CREATE TABLE test_tbl (id INT, name string); + +statement ok +INSERT INTO test_tbl VALUES (1,'tom'), (2,'dick'),(3,'harry'), (4,'mary'), (5,'mungo'), (6,'midge'); + +statement ok +CREATE MACRO xt(a,_name) as TABLE SELECT * FROM test_tbl WHERE(id>=a or name=_name); + +statement ok +CREATE MACRO xt(a) as 1; + +# Check Macro Tables +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro; +---- +0 2 xt 3 NULL +0 3 xt 4 NULL + +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_impl; +---- +2 0 duckdb SELECT * FROM test_tbl WHERE ((id >= a) OR ("name" = _name)) table +3 0 duckdb 1 scalar + + +query IIIIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_parameters; +---- +2 0 0 a unknown NULL unknown +2 0 1 _name unknown NULL unknown +3 0 0 a unknown NULL unknown + +query II +FROM xt(5,'tom'); +---- +1 tom +5 mungo +6 midge + +query I +SELECT xt(1) +---- +1 + +statement error +CREATE MACRO xt(a,_name) as TABLE SELECT * FROM test_tbl WHERE(id>=a or name=_name); +---- +Table Macro Function with name "xt" already exists + +statement error +CREATE MACRO xt(a) as 1; +---- +Macro Function with name "xt" already exists \ No newline at end of file diff --git a/tests/sqllogictests/sql/macros/test_schema_dependency.test b/tests/sqllogictests/sql/macros/test_schema_dependency.test new file mode 100644 index 0000000..af2a74f --- /dev/null +++ b/tests/sqllogictests/sql/macros/test_schema_dependency.test @@ -0,0 +1,45 @@ +# name: test/sql/macros/test_schema_dependency.test +# group: [macros] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/test_schema_dependency'); + +statement ok +use ducklake + +statement ok +CREATE SCHEMA test; + +statement ok +CREATE MACRO test.simple(a) AS a; + +statement ok +CREATE TABLE test.test_tbl (id INT, name string); + +statement ok +CREATE MACRO test.xt(a,_name) as TABLE SELECT * FROM test.test_tbl WHERE(id>=a or name=_name); + +# Check Macro Tables +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro; +---- +1 2 simple 2 NULL +1 4 xt 4 NULL + +statement ok +DROP SCHEMA test CASCADE; + +# Check Macro Tables +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro; +---- +1 2 simple 2 5 +1 4 xt 4 5 \ No newline at end of file diff --git a/tests/sqllogictests/sql/macros/test_simple_macro.test b/tests/sqllogictests/sql/macros/test_simple_macro.test new file mode 100644 index 0000000..ab7a080 --- /dev/null +++ b/tests/sqllogictests/sql/macros/test_simple_macro.test @@ -0,0 +1,92 @@ +# name: test/sql/macros/test_simple_macro.test +# description: test simple macros with ducklake +# group: [macros] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/test_simple_macro'); + +statement ok +use ducklake + +statement ok +CREATE MACRO simple(a) AS a; + +query I +select simple(1) +---- +1 + +# # Check Macro Tables +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro; +---- +0 1 simple 1 NULL + +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_impl; +---- +1 0 duckdb a scalar + +query IIIIIII +FROM __ducklake_metadata_ducklake.ducklake_macro_parameters; +---- +1 0 0 a unknown NULL unknown + +statement ok +drop macro simple; + +query II +SELECT begin_snapshot, end_snapshot FROM __ducklake_metadata_ducklake.ducklake_macro; +---- +1 2 + +statement error +select simple(1) +---- +Scalar Function with name simple does not exist! + + +query II +select snapshot_id, changes FROM snapshots() +---- +0 {schemas_created=[main]} +1 {scalar_macros_created=['main."simple"']} +2 {scalar_macros_dropped=[1]} + +# We can recreate it and that's fine + +statement ok +CREATE MACRO simple(a) AS a; + +query I +select simple(1) +---- +1 + +# It now gets a new id +query IIIII +FROM __ducklake_metadata_ducklake.ducklake_macro; +---- +0 1 simple 1 2 +0 2 simple 3 NULL + +query II +select snapshot_id, changes FROM snapshots() +---- +0 {schemas_created=[main]} +1 {scalar_macros_created=['main."simple"']} +2 {scalar_macros_dropped=[1]} +3 {scalar_macros_created=['main."simple"']} + +statement error +CREATE MACRO simple(a) AS a; +---- +Macro Function with name "simple" already exists \ No newline at end of file diff --git a/tests/sqllogictests/sql/merge/merge_into_tpch.test_slow b/tests/sqllogictests/sql/merge/merge_into_tpch.test_slow new file mode 100644 index 0000000..cb19f22 --- /dev/null +++ b/tests/sqllogictests/sql/merge/merge_into_tpch.test_slow @@ -0,0 +1,61 @@ +# name: test/sql/merge/merge_into_tpch.test_slow +# description: Test merge into with TPC-H SF1 +# group: [merge] + +require ducklake + +require parquet + +require tpch + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_merge_into_tpch_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +CALL dbgen(sf=1); + +statement ok +CREATE TABLE ducklake.random_lineitem AS FROM lineitem LIMIT 0 + +# create lineitem but with a random subset of the rows +statement ok +MERGE INTO ducklake.random_lineitem USING lineitem USING (l_orderkey, l_linenumber) +WHEN NOT MATCHED AND random() < 0.2 THEN INSERT + +# insert a bunch of rows with unchanged values +statement ok +MERGE INTO ducklake.random_lineitem USING (SELECT * REPLACE (l_orderkey + 10000000 AS l_orderkey) FROM lineitem) USING (l_orderkey, l_linenumber) +WHEN MATCHED THEN ERROR +WHEN NOT MATCHED AND random() < 0.2 THEN INSERT + +# randomly update a bunch of rows +statement ok +MERGE INTO ducklake.random_lineitem USING lineitem USING (l_orderkey, l_linenumber) +WHEN MATCHED AND random() < 0.1 THEN UPDATE SET l_discount = random() + +# run two merges that should fully equalize the tables +statement ok +MERGE INTO ducklake.random_lineitem USING lineitem USING (l_orderkey, l_linenumber) +WHEN MATCHED THEN UPDATE + +statement ok +MERGE INTO ducklake.random_lineitem USING lineitem USING (l_orderkey, l_linenumber) +WHEN NOT MATCHED BY TARGET THEN INSERT +WHEN NOT MATCHED BY SOURCE THEN DELETE + +# both tables should now be identical - despite all the random stuff we did +query IIIIIIIIIIIIIIII +FROM lineitem EXCEPT FROM ducklake.random_lineitem +---- + +query IIIIIIIIIIIIIIII +FROM ducklake.random_lineitem EXCEPT FROM lineitem +---- + +statement ok +DROP TABLE ducklake.random_lineitem diff --git a/tests/sqllogictests/sql/merge/merge_partition.test b/tests/sqllogictests/sql/merge/merge_partition.test new file mode 100644 index 0000000..2fd6173 --- /dev/null +++ b/tests/sqllogictests/sql/merge/merge_partition.test @@ -0,0 +1,84 @@ +# name: test/sql/merge/merge_partition.test +# description: Test merge into with partitions +# group: [merge] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '__TEST_DIR__/merge_partition', METADATA_CATALOG 'ducklake_meta') + +statement ok +USE ducklake + +statement ok +CREATE TABLE my_timeseries (ts TIMESTAMP, x DOUBLE PRECISION); + +statement ok +ALTER TABLE my_timeseries SET PARTITIONED BY (year(ts)); + +statement ok +insert into my_timeseries VALUES ('2025-09-15',42) + +statement ok +MERGE INTO my_timeseries + USING ( + SELECT + '2025-09-17'::TIMESTAMP as ts, + 42::DOUBLE PRECISION as x + ) AS timeseries_updates + ON my_timeseries.ts = timeseries_updates.ts + WHEN NOT MATCHED THEN INSERT; + +query II +SELECT * FROM my_timeseries +---- +2025-09-15 00:00:00 42.0 +2025-09-17 00:00:00 42.0 + +statement ok +MERGE INTO my_timeseries + USING ( + SELECT + '2025-09-17'::TIMESTAMP as ts, + 43::DOUBLE PRECISION as x + ) AS timeseries_updates +ON my_timeseries.ts = timeseries_updates.ts +WHEN MATCHED THEN UPDATE; + +query II +SELECT * FROM my_timeseries +---- +2025-09-15 00:00:00 42.0 +2025-09-17 00:00:00 43.0 + +statement ok +DROP TABLE my_timeseries; + +statement ok +CREATE TABLE my_timeseries (ts TIMESTAMP, x DOUBLE PRECISION, y DOUBLE PRECISION); + +statement ok +insert into my_timeseries VALUES ('2025-09-15', 43, 39) + +statement ok +CREATE TABLE my_timeseries_new (ts TIMESTAMP, x DOUBLE PRECISION, y DOUBLE PRECISION); + +statement ok +insert into my_timeseries_new VALUES ('2025-09-15', 43, 33) + +statement ok +MERGE INTO my_timeseries old + USING my_timeseries_new new + USING(ts) +WHEN MATCHED AND ( + old.x IS DISTINCT FROM new.y +) THEN UPDATE; + +query III +FROM my_timeseries +---- +2025-09-15 00:00:00 43.0 33.0 diff --git a/tests/sqllogictests/sql/merge/merge_partition_update.test b/tests/sqllogictests/sql/merge/merge_partition_update.test new file mode 100644 index 0000000..cab0448 --- /dev/null +++ b/tests/sqllogictests/sql/merge/merge_partition_update.test @@ -0,0 +1,54 @@ +# name: test/sql/merge/merge_partition_update.test +# description: Test merge into with partitions +# group: [merge] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/merge_partition_update', METADATA_CATALOG 'ducklake_meta') + +statement ok +USE ducklake + +statement ok +CREATE TABLE my_timeseries (ts TIMESTAMP, x DOUBLE PRECISION); + +statement ok +ALTER TABLE my_timeseries SET PARTITIONED BY (year(ts)); + +statement ok +INSERT INTO my_timeseries VALUES ('2025-09-17'::TIMESTAMP, 42::DOUBLE PRECISION); + + +statement ok +MERGE INTO my_timeseries + USING ( + SELECT + '2025-09-17'::TIMESTAMP as ts, + 43::DOUBLE PRECISION as x + ) AS timeseries_updates + ON my_timeseries.ts = timeseries_updates.ts + WHEN MATCHED THEN UPDATE; + +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/merge_partition_update/**/year=2025/*') +---- +2 + + + +statement ok +UPDATE my_timeseries +SET x = 43::DOUBLE PRECISION +WHERE ts = '2025-09-17'::TIMESTAMP; + +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/merge_partition_update/**/year=2025/*') +---- +3 diff --git a/tests/sqllogictests/sql/merge/merge_timestamp.test b/tests/sqllogictests/sql/merge/merge_timestamp.test new file mode 100644 index 0000000..9fcb2a3 --- /dev/null +++ b/tests/sqllogictests/sql/merge/merge_timestamp.test @@ -0,0 +1,37 @@ +# name: test/sql/merge/merge_timestamp.test +# description: Test merge into with timestamp partitions +# group: [merge] + +require ducklake + +require parquet + +require icu + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/merge_partition_update', METADATA_CATALOG 'ducklake_meta') + +statement ok +USE ducklake + +statement ok +create table t (id uuid, ts timestamptz); + +statement ok +alter table t set partitioned by (year(ts), month(ts)); + +statement ok +merge into t +using (select uuidv7() id, current_timestamp ts) m on (t.id = m.id) +when matched then update set id = m.id, ts=m.ts +when not matched then insert (id, ts) values (m.id, m.ts); + +query I +SELECT count (*) FROM t; +---- +1 + diff --git a/tests/sqllogictests/sql/merge/merge_update_insert.test b/tests/sqllogictests/sql/merge/merge_update_insert.test new file mode 100644 index 0000000..c601204 --- /dev/null +++ b/tests/sqllogictests/sql/merge/merge_update_insert.test @@ -0,0 +1,51 @@ +# name: test/sql/merge/merge_update_insert.test +# description: Test merge into with update/insert +# group: [merge] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '__TEST_DIR__/merge_update_insert', METADATA_CATALOG 'ducklake_meta') + +statement ok +USE ducklake + +statement ok +CREATE TABLE Stock(item_id int, balance int); + +statement ok +insert into stock values (10,2200), (20,1900) + +statement ok +CREATE TABLE Buy(item_id int, volume int); + +statement ok +INSERT INTO Buy values(10, 1000); + +statement ok +INSERT INTO Buy values(30, 300); + +query II +FROM Stock ORDER BY item_id +---- +10 2200 +20 1900 + +# update and insert +query I +MERGE INTO Stock AS s USING Buy AS b ON s.item_id = b.item_id +WHEN MATCHED THEN UPDATE SET balance = balance + b.volume +WHEN NOT MATCHED THEN INSERT VALUES (b.item_id, b.volume) +---- +2 + +query II +FROM Stock ORDER BY item_id +---- +10 3200 +20 1900 +30 300 diff --git a/tests/sqllogictests/sql/metadata/ducklake_duckdb_tables.test b/tests/sqllogictests/sql/metadata/ducklake_duckdb_tables.test new file mode 100644 index 0000000..2cde172 --- /dev/null +++ b/tests/sqllogictests/sql/metadata/ducklake_duckdb_tables.test @@ -0,0 +1,26 @@ +# name: test/sql/metadata/ducklake_duckdb_tables.test +# description: test duckdb_tables with ducklake +# group: [metadata] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_duckdb_tables_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok +INSERT INTO ducklake.test FROM range(100); + +query III +SELECT table_name, estimated_size, column_count FROM duckdb_tables() WHERE database_name='ducklake'; +---- +test 100 1 diff --git a/tests/sqllogictests/sql/migration/migration.test b/tests/sqllogictests/sql/migration/migration.test new file mode 100644 index 0000000..ba62b09 --- /dev/null +++ b/tests/sqllogictests/sql/migration/migration.test @@ -0,0 +1,68 @@ +# name: test/sql/migration/migration.test +# description: test migration from previous releases to the latest ducklake version. +# group: [migration] + +require ducklake + +require parquet + +unzip data/old_ducklake/v01.db.gz __TEST_DIR__/v01.db + +unzip data/old_ducklake/v02.db.gz __TEST_DIR__/v02.db + +unzip data/old_ducklake/v03-dev1.db.gz __TEST_DIR__/v03-dev1.db + +foreach version v01 v02 v03-dev1 + +statement ok +ATTACH 'ducklake:__TEST_DIR__/${version}.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_migrate_${version}', OVERRIDE_DATA_PATH TRUE) + +statement ok +INSERT INTO ducklake.test VALUES (42); + +query I +FROM ducklake.test +---- +42 + +query I +SELECT COUNT(*) FROM glob('__TEST_DIR__/ducklake_migrate_${version}/**/*.parquet') +---- +1 + +# test add files +statement ok +COPY (SELECT 84 i) TO '__TEST_DIR__/ducklake_migrate_${version}/my_file.parquet' + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '__TEST_DIR__/ducklake_migrate_${version}/my_file.parquet') + +query I +FROM ducklake.test +---- +42 +84 + +# re-attach after migration +statement ok +DETACH ducklake + +statement ok +ATTACH 'ducklake:__TEST_DIR__/${version}.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_migrate_${version}', OVERRIDE_DATA_PATH TRUE) + +query I +FROM ducklake.test +---- +42 +84 + +query II +FROM __ducklake_metadata_ducklake.ducklake_schema_versions; +---- +0 0 +1 1 + +statement ok +DETACH ducklake; + +endloop \ No newline at end of file diff --git a/tests/sqllogictests/sql/migration/v01_partitioned.test b/tests/sqllogictests/sql/migration/v01_partitioned.test new file mode 100644 index 0000000..9193222 --- /dev/null +++ b/tests/sqllogictests/sql/migration/v01_partitioned.test @@ -0,0 +1,36 @@ +# name: test/sql/migration/v01_partitioned.test +# description: test migration from v0.1 +# group: [migration] + +require ducklake + +require parquet + +test-env DATA_PATH __TEST_DIR__ + +unzip data/old_ducklake/v01_partitioned.db.gz ${DATA_PATH}/v01_partitioned.db + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/v01_partitioned.db + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_migrate_v01_partitioned', METADATA_CATALOG 'ducklake_metadata', OVERRIDE_DATA_PATH TRUE) + +statement ok +INSERT INTO ducklake.partitioned_tbl SELECT NULL, i%2, concat('thisisastring_', i) FROM range(10000) t(i) + +statement ok +INSERT INTO ducklake.other_partitioned_tbl SELECT i%2, concat('thisisastring_', i) FROM range(10000) t(i) + +query III +SELECT data_file_id, partition_id, regexp_extract(path, '([a-zA-Z0-9_]+=[0-9])[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file +ORDER BY ALL +---- +0 3 part_key=0 +1 3 part_key=1 +2 4 other_part_key=0 +3 4 other_part_key=1 + +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_migrate_v01_partitioned/**/*.parquet') +---- +4 diff --git a/tests/sqllogictests/sql/partitioning/basic_partitioning.test b/tests/sqllogictests/sql/partitioning/basic_partitioning.test new file mode 100644 index 0000000..9451151 --- /dev/null +++ b/tests/sqllogictests/sql/partitioning/basic_partitioning.test @@ -0,0 +1,180 @@ +# name: test/sql/partitioning/basic_partitioning.test +# description: Test partitioning +# group: [partitioning] + +require ducklake + +require parquet + +# partitioning based on a column +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_partitioning.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_partitioning', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +statement ok +CREATE TABLE partitioned_tbl(part_key INTEGER, values VARCHAR); + +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (part_key); + +statement ok +INSERT INTO partitioned_tbl SELECT i%2, concat('thisisastring_', i) FROM range(10000) t(i) + +statement ok +ALTER TABLE partitioned_tbl RENAME TO partitioned_tbl_renamed + +# check if rename causes partition info to get dropped +query I +SELECT COUNT(*) FROM ducklake_metadata.ducklake_partition_info WHERE end_snapshot IS NOT NULL +---- +0 + +statement ok +ALTER TABLE partitioned_tbl_renamed RENAME TO partitioned_tbl + +query I +SELECT COUNT(*) FROM partitioned_tbl WHERE part_key=0 +---- +5000 + +# verify files are partitioned +query III +SELECT data_file_id, partition_id, regexp_extract(path, '.*(part_key=[0-9])[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file +ORDER BY ALL +---- +0 2 part_key=0 +1 2 part_key=1 + +# verify files are pruned when querying the partitions +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM partitioned_tbl WHERE part_key=1 +---- +analyzed_plan :.*Total Files Read: 1.* + +# verify partition values are written +query IIII +SELECT * FROM ducklake_metadata.ducklake_file_partition_value +---- +0 1 0 0 +1 1 0 1 + +# append to partition +statement ok +INSERT INTO partitioned_tbl SELECT (i%2) + 1, concat('thisisanotherstring_', i) FROM range(10000) t(i) + +query I +SELECT COUNT(*) FROM partitioned_tbl WHERE part_key=2 +---- +5000 + +query III +SELECT data_file_id, partition_id, regexp_extract(path, '.*(part_key=[0-9])[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file +ORDER BY ALL +---- +0 2 part_key=0 +1 2 part_key=1 +2 2 part_key=1 +3 2 part_key=2 + +# set partition and append in the same transaction +statement ok +CREATE TABLE partition_tbl2(part_key INTEGER, values VARCHAR); + +statement ok +BEGIN + +statement ok +ALTER TABLE partition_tbl2 SET PARTITIONED BY (part_key); + +statement ok +INSERT INTO partition_tbl2 SELECT i%2, concat('thisisastring_', i) FROM range(10000) t(i) + +query I +SELECT COUNT(*) FROM partition_tbl2 WHERE part_key=0 +---- +5000 + +statement ok +COMMIT + +query III +SELECT data_file_id, partition_id, regexp_extract(path, '.*(part_key=[0-9])[/\\].*', 1) +FROM ducklake_metadata.ducklake_data_file +WHERE table_id = 3 +ORDER BY ALL +---- +4 4 part_key=0 +5 4 part_key=1 + +query I +SELECT COUNT(*) FROM partition_tbl2 WHERE part_key=0 +---- +5000 + +# create table, set partition and append in the same transaction +statement ok +BEGIN + +statement ok +CREATE TABLE partition_tbl3(part_key INTEGER, values VARCHAR); + +statement ok +ALTER TABLE partition_tbl3 SET PARTITIONED BY (part_key); + +statement ok +INSERT INTO partition_tbl3 SELECT i%2, concat('yetanotherstring_', i) FROM range(10000) t(i) + +query I +SELECT COUNT(*) FROM partition_tbl3 WHERE part_key=0 +---- +5000 + +statement ok +COMMIT + +query I +SELECT COUNT(*) FROM partition_tbl3 WHERE part_key=0 +---- +5000 + +query III +SELECT data_file_id, partition_id, regexp_extract(path, '.*(part_key=[0-9])[/\\].*', 1) +FROM ducklake_metadata.ducklake_data_file +WHERE table_id = 5 +ORDER BY ALL +---- +6 6 part_key=0 +7 6 part_key=1 + +# detach and re-run +statement ok +USE memory + +statement ok +DETACH ducklake + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_partitioning.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_partitioning', METADATA_CATALOG 'ducklake_metadata2') + +statement ok +USE ducklake + +query I +SELECT COUNT(*) FROM partitioned_tbl WHERE part_key=2 +---- +5000 + +query I +SELECT COUNT(*) FROM partition_tbl2 WHERE part_key=0 +---- +5000 + +statement ok +DROP TABLE partitioned_tbl + +query I +SELECT COUNT(*) FROM partition_tbl2 WHERE part_key=0 +---- +5000 diff --git a/tests/sqllogictests/sql/partitioning/disable_hive_partitioning.test b/tests/sqllogictests/sql/partitioning/disable_hive_partitioning.test new file mode 100644 index 0000000..cce0d40 --- /dev/null +++ b/tests/sqllogictests/sql/partitioning/disable_hive_partitioning.test @@ -0,0 +1,80 @@ +# name: test/sql/partitioning/disable_hive_partitioning.test +# description: Test option to disable subfolder creation for partitioning +# group: [partitioning] + +require ducklake + +require parquet + +# partitioning based on a column +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_partition_use_hive', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +statement ok +CREATE TABLE partitioned_tbl(part_key INTEGER, values VARCHAR); + +statement ok +CREATE TABLE hiveless_partitioned_tbl(part_key INTEGER, values VARCHAR); + +statement ok +CALL ducklake.set_option('hive_file_pattern', False, table_name => 'hiveless_partitioned_tbl') + +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (part_key); + +statement ok +INSERT INTO partitioned_tbl SELECT i%2, concat('thisisastring_', i) FROM range(5) t(i) + +# This is partitioned +query II +FROM '${DATA_PATH}/ducklake_partition_use_hive/main/partitioned_tbl/part_key=0/*.parquet' +---- +0 thisisastring_0 +0 thisisastring_2 +0 thisisastring_4 + +statement ok +ALTER TABLE hiveless_partitioned_tbl SET PARTITIONED BY (part_key); + +statement ok +INSERT INTO hiveless_partitioned_tbl SELECT i%2, concat('thisisastring_', i) FROM range(5) t(i) + +statement error +FROM '${DATA_PATH}/ducklake_partition_use_hive/main/hiveless_partitioned_tbl/part_key=0/*.parquet' +---- +No files found that match the pattern + +statement ok +CALL ducklake.set_option('hive_file_pattern', False, table_name => 'partitioned_tbl') + + +# What if we now insert more data into our partitioned table +statement ok +INSERT INTO partitioned_tbl SELECT i%2, concat('thisisastring_', i) FROM range(5) t(i) + +# Data remains the same +query II +FROM '${DATA_PATH}/ducklake_partition_use_hive/main/partitioned_tbl/part_key=0/*.parquet' +---- +0 thisisastring_0 +0 thisisastring_2 +0 thisisastring_4 + +# But we can query it in its totality normally +query II +FROM partitioned_tbl where part_key = 0 +---- +0 thisisastring_0 +0 thisisastring_2 +0 thisisastring_4 +0 thisisastring_0 +0 thisisastring_2 +0 thisisastring_4 \ No newline at end of file diff --git a/tests/sqllogictests/sql/partitioning/drop_partition_column.test b/tests/sqllogictests/sql/partitioning/drop_partition_column.test new file mode 100644 index 0000000..bec8639 --- /dev/null +++ b/tests/sqllogictests/sql/partitioning/drop_partition_column.test @@ -0,0 +1,54 @@ +# name: test/sql/partitioning/drop_partition_column.test +# description: Drop partitioning column +# group: [partitioning] + +require ducklake + +require parquet + +# partitioning based on a column +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_drop_partition_column') + +statement ok +USE ducklake + +statement ok +CREATE TABLE partitioned_tbl(part_key INTEGER, values VARCHAR); + +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (part_key); + +statement ok +INSERT INTO partitioned_tbl SELECT i%2, concat('thisisastring_', i) FROM range(10000) t(i) + +query I +SELECT COUNT(*) FROM partitioned_tbl WHERE part_key=0 +---- +5000 + +# cannot drop column on which we are partitioning +statement error +ALTER TABLE partitioned_tbl DROP COLUMN part_key +---- +the table is partitioned by this column + +statement ok +ALTER TABLE partitioned_tbl RESET PARTITIONED BY; + +# now we can +statement ok +ALTER TABLE partitioned_tbl DROP COLUMN part_key + +statement ok +INSERT INTO partitioned_tbl SELECT concat('thisisastring_', i) FROM range(5000, 10000) t(i) + +query II +SELECT COUNT(*), SUM(STRLEN(values)) FROM partitioned_tbl +---- +15000 268890 diff --git a/tests/sqllogictests/sql/partitioning/multi_key_merge.test b/tests/sqllogictests/sql/partitioning/multi_key_merge.test new file mode 100644 index 0000000..d0c495e --- /dev/null +++ b/tests/sqllogictests/sql/partitioning/multi_key_merge.test @@ -0,0 +1,41 @@ +# name: test/sql/partitioning/multi_key_merge.test +# description: Test partitioning by multy keys with a merge adjacent +# group: [partitioning] + +require ducklake + +require parquet + +# partitioning based on a column +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/multi_key_merge', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +statement ok +CREATE TABLE sales_data ( + sale_id INTEGER, + product_name VARCHAR, + country VARCHAR, +); + +statement ok +ALTER TABLE sales_data SET PARTITIONED BY (product_name, country); + +statement ok +INSERT INTO sales_data VALUES + (1, 'Laptop', 'UK'), + (2, 'Mouse', 'GR'); + +statement ok +INSERT INTO sales_data VALUES + (3, 'Monitor', 'ES'), + (4, 'Laptop', 'UK'); + +statement ok +CALL merge_adjacent_files(); diff --git a/tests/sqllogictests/sql/partitioning/multi_key_partition.test b/tests/sqllogictests/sql/partitioning/multi_key_partition.test new file mode 100644 index 0000000..178428a --- /dev/null +++ b/tests/sqllogictests/sql/partitioning/multi_key_partition.test @@ -0,0 +1,61 @@ +# name: test/sql/partitioning/multi_key_partition.test +# description: Test multi-key partitioning +# group: [partitioning] + +require ducklake + +require parquet + +# partitioning based on a column +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_mk_partitioning', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +statement ok +CREATE TABLE partitioned_tbl(a INTEGER, b INTEGER, c INTEGER, values VARCHAR); + +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (a,b,c); + +statement ok +INSERT INTO partitioned_tbl VALUES (10, 100, 1000, 'data 1'), (20,200,2000, 'data 2') + +query IIIII +SELECT data_file_id, partition_id, regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1), regexp_extract(path, '.*(b=[0-9]+)[/\\].*', 1), regexp_extract(path, '.*(c=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file +ORDER BY ALL +---- +0 2 a=10 b=100 c=1000 +1 2 a=20 b=200 c=2000 + +# verify files are pruned when querying the partitions +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM partitioned_tbl WHERE a=10 +---- +analyzed_plan :.*Total Files Read: 1.* + +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM partitioned_tbl WHERE b=100 +---- +analyzed_plan :.*Total Files Read: 1.* + +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM partitioned_tbl WHERE c=1000 +---- +analyzed_plan :.*Total Files Read: 1.* + +query IIII +SELECT * FROM ducklake_metadata.ducklake_file_partition_value ORDER BY ALL +---- +0 1 0 10 +0 1 1 100 +0 1 2 1000 +1 1 0 20 +1 1 1 200 +1 1 2 2000 diff --git a/tests/sqllogictests/sql/partitioning/multi_table_partition.test b/tests/sqllogictests/sql/partitioning/multi_table_partition.test new file mode 100644 index 0000000..4b8d746 --- /dev/null +++ b/tests/sqllogictests/sql/partitioning/multi_table_partition.test @@ -0,0 +1,36 @@ +# name: test/sql/partitioning/multi_table_partition.test +# description: Test setting the partitioning key of multiple tables in the same transaction +# group: [partitioning] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_partitioning_multi_table', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +statement ok +CREATE TABLE partitioned_tbl(part_key INTEGER, values VARCHAR); + +statement ok +CREATE TABLE partitioned_tbl2(part_key INTEGER, values VARCHAR); + +statement ok +BEGIN + +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (part_key); + +statement ok +ALTER TABLE partitioned_tbl2 SET PARTITIONED BY (part_key); + +statement ok +COMMIT diff --git a/tests/sqllogictests/sql/partitioning/partition_nop.test b/tests/sqllogictests/sql/partitioning/partition_nop.test new file mode 100644 index 0000000..c6e4627 --- /dev/null +++ b/tests/sqllogictests/sql/partitioning/partition_nop.test @@ -0,0 +1,126 @@ +# name: test/sql/partitioning/partition_nop.test +# description: Test partitioning nop options. +# group: [partitioning] + +require ducklake + +require parquet + +# partitioning based on a column +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_partitioning_nop', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +statement ok +CREATE TABLE partitioned_tbl(part_key INTEGER, values VARCHAR); + +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (part_key); + + +statement ok +INSERT INTO partitioned_tbl SELECT i%2, concat('thisisastring_', i) FROM range(5) t(i) + +query I +select count(*) FROM ducklake_metadata.ducklake_partition_info +---- +1 + +# This should do nothing +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (part_key); + +query I +select count(*) FROM ducklake_metadata.ducklake_partition_info +---- +1 + +# This should now change the partition, since it's different +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (part_key, values); + +query I +select count(*) FROM ducklake_metadata.ducklake_partition_info +---- +2 + +# This doesn't change, since it's the same +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (part_key, values); + +query I +select count(*) FROM ducklake_metadata.ducklake_partition_info +---- +2 + +# This will now change it, since it's a different order +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (values, part_key); + +query I +select count(*) FROM ducklake_metadata.ducklake_partition_info +---- +3 + +# We can return to a previous partition +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (part_key); + +query I +select count(*) FROM ducklake_metadata.ducklake_partition_info +---- +4 + +# Lets check that multiple tables still works fine +statement ok +CREATE TABLE partitioned_tbl_2(part_key INTEGER, values VARCHAR); + +statement ok +ALTER TABLE partitioned_tbl_2 SET PARTITIONED BY (part_key); + +query I +select count(*) FROM ducklake_metadata.ducklake_partition_info +---- +5 + +statement ok +ALTER TABLE partitioned_tbl_2 SET PARTITIONED BY (part_key); + +query I +select count(*) FROM ducklake_metadata.ducklake_partition_info +---- +5 + +statement ok +ALTER TABLE partitioned_tbl_2 SET PARTITIONED BY (values, part_key); + +query I +select count(*) FROM ducklake_metadata.ducklake_partition_info +---- +6 + +# We can remove all partition of a table +statement ok +ALTER TABLE partitioned_tbl RESET PARTITIONED BY; + +query I +select count(*) FROM ducklake_metadata.ducklake_partition_info +---- +7 + +# But if it is already empty we nop +statement ok +ALTER TABLE partitioned_tbl RESET PARTITIONED BY; + +query I +select count(*) FROM ducklake_metadata.ducklake_partition_info +---- +7 + diff --git a/tests/sqllogictests/sql/partitioning/partition_tpch.test_slow b/tests/sqllogictests/sql/partitioning/partition_tpch.test_slow new file mode 100644 index 0000000..933c0e9 --- /dev/null +++ b/tests/sqllogictests/sql/partitioning/partition_tpch.test_slow @@ -0,0 +1,83 @@ +# name: test/sql/partitioning/partition_tpch.test_slow +# description: Test partitioning of TPC-H +# group: [partitioning] + +require ducklake + +require parquet + +require tpch + +statement ok +CALL dbgen(sf=1); + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_partition_tpch', METADATA_CATALOG 'ducklake_metadata') + +statement ok +COPY FROM DATABASE memory TO ducklake (SCHEMA) + +statement ok +ALTER TABLE ducklake.lineitem SET PARTITIONED BY (year(l_shipdate), month(l_shipdate)); + +statement ok +ALTER TABLE ducklake.orders SET PARTITIONED BY (year(o_orderdate), o_orderstatus); + +statement ok +ALTER TABLE ducklake.part SET PARTITIONED BY (p_mfgr); + +statement ok +COPY FROM DATABASE memory TO ducklake (DATA) + +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_partition_tpch/main/lineitem/**') +---- +84 + +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_partition_tpch/main/orders/**') +---- +9 + +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_partition_tpch/main/part/**') +---- +5 + +statement ok +USE ducklake + +# verify partitions are used +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM lineitem WHERE l_shipdate >= DATE '1993-07-01' AND l_shipdate < DATE '1993-08-01' +---- +analyzed_plan :.*Total Files Read: 1.* + +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM orders WHERE o_orderstatus = 'F' +---- +analyzed_plan :.*Total Files Read: 4.* + +# run TPC-H +loop i 1 9 + +query I +PRAGMA tpch(${i}) +---- +:duckdb/extension/tpch/dbgen/answers/sf1/q0${i}.csv + +endloop + +loop i 10 23 + +query I +PRAGMA tpch(${i}) +---- +:duckdb/extension/tpch/dbgen/answers/sf1/q${i}.csv + +endloop diff --git a/tests/sqllogictests/sql/partitioning/partitioning_alter.test b/tests/sqllogictests/sql/partitioning/partitioning_alter.test new file mode 100644 index 0000000..0d0b192 --- /dev/null +++ b/tests/sqllogictests/sql/partitioning/partitioning_alter.test @@ -0,0 +1,59 @@ +# name: test/sql/partitioning/partitioning_alter.test +# description: Test partitioning by year/month/day +# group: [partitioning] + +require ducklake + +require parquet + +# partitioning based on a column +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_partition_alter', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +statement ok +CREATE TABLE partitioned_tbl(id INTEGER, ts TIMESTAMP, values VARCHAR); + +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (year(ts), month(ts)); + +statement ok +INSERT INTO partitioned_tbl SELECT i, TIMESTAMP '2020-01-01' + interval (i) hours, concat('thisisastring_', i) FROM range(5000) t(i) + +statement ok +ALTER TABLE partitioned_tbl DROP COLUMN id + +statement ok +INSERT INTO partitioned_tbl SELECT TIMESTAMP '2020-01-01' + interval (i) hours, concat('thisisastring_', i) FROM range(5000, 10000) t(i) + +query IIII +SELECT year(ts), COUNT(*), MIN(values), MAX(values) FROM partitioned_tbl GROUP BY year(ts) +---- +2020 8784 thisisastring_0 thisisastring_999 +2021 1216 thisisastring_8784 thisisastring_9999 + +query II +SELECT DISTINCT regexp_extract(path, '.*year=([0-9]+)[/\\].*', 1)::INT AS year_part, regexp_extract(path, '.*month=([0-9]+)[/\\].*', 1)::INT AS month_part +FROM glob('${DATA_PATH}/ducklake_partition_alter/**') t(path) ORDER BY ALL +---- +2020 1 +2020 2 +2020 3 +2020 4 +2020 5 +2020 6 +2020 7 +2020 8 +2020 9 +2020 10 +2020 11 +2020 12 +2021 1 +2021 2 diff --git a/tests/sqllogictests/sql/partitioning/year_month_day.test b/tests/sqllogictests/sql/partitioning/year_month_day.test new file mode 100644 index 0000000..9111f18 --- /dev/null +++ b/tests/sqllogictests/sql/partitioning/year_month_day.test @@ -0,0 +1,53 @@ +# name: test/sql/partitioning/year_month_day.test +# description: Test partitioning by year/month/day +# group: [partitioning] + +require ducklake + +require parquet + +# partitioning based on a column +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_partition_transform', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +statement ok +CREATE TABLE partitioned_tbl(id INTEGER, ts TIMESTAMP, values VARCHAR); + +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (year(ts), month(ts)); + +statement ok +INSERT INTO partitioned_tbl SELECT i, TIMESTAMP '2020-01-01' + interval (i) hours, concat('thisisastring_', i) FROM range(10000) t(i) + +query IIII +SELECT year(ts), COUNT(*), MIN(values), MAX(values) FROM partitioned_tbl GROUP BY year(ts) +---- +2020 8784 thisisastring_0 thisisastring_999 +2021 1216 thisisastring_8784 thisisastring_9999 + +query II +SELECT regexp_extract(path, '.*year=([0-9]+)[/\\].*', 1)::INT AS year_part, regexp_extract(path, '.*month=([0-9]+)[/\\].*', 1)::INT AS month_part +FROM glob('${DATA_PATH}/ducklake_partition_transform/**') t(path) ORDER BY ALL +---- +2020 1 +2020 2 +2020 3 +2020 4 +2020 5 +2020 6 +2020 7 +2020 8 +2020 9 +2020 10 +2020 11 +2020 12 +2021 1 +2021 2 diff --git a/tests/sqllogictests/sql/remove_orphans/mixed_paths.test b/tests/sqllogictests/sql/remove_orphans/mixed_paths.test new file mode 100644 index 0000000..e3c240a --- /dev/null +++ b/tests/sqllogictests/sql/remove_orphans/mixed_paths.test @@ -0,0 +1,70 @@ +# name: test/sql/remove_orphans/mixed_paths.test +# group: [remove_orphans] + +require ducklake + +require parquet + +# partitioning based on a column +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/mixed_paths', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +statement ok +CREATE TABLE test (a integer) + +statement ok +insert into test values (1); + +statement ok +CREATE TABLE test_2 (a integer) + +statement ok +insert into test_2 values (1); + +# Add orphaned file to both tables and to schema +statement ok +COPY test to '${DATA_PATH}/mixed_paths/main/test/bla.parquet'; + +# Add orphaned file to both tables and to schema +statement ok +COPY test to '${DATA_PATH}/mixed_paths/main/test_2/bla.parquet'; + +# Add orphaned file to both tables and to schema +statement ok +COPY test to '${DATA_PATH}/mixed_paths/main/bla.parquet'; + +query I +SELECT count(*) FROM ducklake_delete_orphaned_files('ducklake', cleanup_all => true, dry_run => true); +---- +3 + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/mixed_paths/main/test/bla.parquet'); + +query I +SELECT count(*) FROM ducklake_delete_orphaned_files('ducklake', cleanup_all => true, dry_run => true); +---- +2 + +statement ok +CALL ducklake_add_data_files('ducklake', 'test', '${DATA_PATH}/mixed_paths/main/test_2/bla.parquet'); + +query I +SELECT count(*) FROM ducklake_delete_orphaned_files('ducklake', cleanup_all => true, dry_run => true); +---- +1 + +statement ok +CALL ducklake_add_data_files('ducklake', 'test_2', '${DATA_PATH}/mixed_paths/main/bla.parquet'); + +query I +SELECT count(*) FROM ducklake_delete_orphaned_files('ducklake', cleanup_all => true, dry_run => true); +---- +0 \ No newline at end of file diff --git a/tests/sqllogictests/sql/remove_orphans/remove_orphaned_files.test b/tests/sqllogictests/sql/remove_orphans/remove_orphaned_files.test new file mode 100644 index 0000000..84feb40 --- /dev/null +++ b/tests/sqllogictests/sql/remove_orphans/remove_orphaned_files.test @@ -0,0 +1,89 @@ +# name: test/sql/remove_orphans/remove_orphaned_files.test +# description: Test the removal of orphaned files +# group: [remove_orphans] + +require ducklake + +require parquet + +require icu + +# partitioning based on a column +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/remove_orphaned_files', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +statement ok +CREATE TABLE test (a integer) + +statement ok +insert into test values (1); + +statement ok +insert into test values (2); + +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +# Add orphaned file +statement ok +COPY test to '${DATA_PATH}/remove_orphaned_files/main/test/bla.parquet'; + +# We should have 3 files +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/remove_orphaned_files/main/test/*') +---- +4 + +statement ok +CALL ducklake.set_option('delete_older_than', '1 week') + +# Check preference of manually set option +query I +SELECT count(*) FROM ducklake_delete_orphaned_files('ducklake', cleanup_all => true, dry_run => true); +---- +1 + + +statement ok +CALL ducklake.set_option('delete_older_than', '1 millisecond') + +query I +SELECT count(*) FROM ducklake_delete_orphaned_files('ducklake', dry_run => true); +---- +1 + +# Test invalid +statement error +CALL ducklake.set_option('delete_older_than', 'pedro') +---- +delete_older_than is not a valid interval value. + +statement ok +CALL ducklake.set_option('delete_older_than', '') + +query I +SELECT ends_with(path,'bla.parquet') FROM ducklake_delete_orphaned_files('ducklake', cleanup_all => true, dry_run => true); +---- +True + +# Check older_than filter +query I +SELECT count(*) FROM ducklake_delete_orphaned_files('ducklake', older_than => NOW() - INTERVAL '1 week', dry_run => true); +---- +0 + +statement ok +CALL ducklake_delete_orphaned_files('ducklake', cleanup_all => true); + +# Check file is removed +query I +SELECT count(*) FROM ducklake_delete_orphaned_files('ducklake', cleanup_all => true, dry_run => true); +---- +0 diff --git a/tests/sqllogictests/sql/rewrite_data_files/insert_delete_loop.test b/tests/sqllogictests/sql/rewrite_data_files/insert_delete_loop.test new file mode 100644 index 0000000..185f2d9 --- /dev/null +++ b/tests/sqllogictests/sql/rewrite_data_files/insert_delete_loop.test @@ -0,0 +1,44 @@ +# name: test/sql/rewrite_data_files/insert_delete_loop.test +# description: Test rewrite deletes in a insert delete loop +# group: [rewrite_data_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/insert_delete_loop', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +statement ok +CREATE TABLE tbl(i INTEGER); + +loop i 1 10 + +statement ok +INSERT INTO tbl FROM range(${i} * 1000, ${i} * 1000 + 1000); + +statement ok +DELETE FROM tbl WHERE i BETWEEN ${i} * 1000 AND ${i} * 1000 + 950 + +query I +SELECT COUNT(*) = ${i} * 49 FROM tbl +---- +true + +statement ok +CALL ducklake_rewrite_data_files('ducklake', 'tbl'); + +query I +SELECT COUNT(*) = ${i} * 49 FROM tbl +---- +true + +endloop diff --git a/tests/sqllogictests/sql/rewrite_data_files/last_snapshot_multiple_inserts.test b/tests/sqllogictests/sql/rewrite_data_files/last_snapshot_multiple_inserts.test new file mode 100644 index 0000000..2c5951d --- /dev/null +++ b/tests/sqllogictests/sql/rewrite_data_files/last_snapshot_multiple_inserts.test @@ -0,0 +1,131 @@ +# name: test/sql/rewrite_data_files/last_snapshot_multiple_inserts.test +# description: Test deletion works over a series of insertions and deletions +# group: [rewrite_data_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_multiple_inserts', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +statement ok +CREATE TABLE test(key INTEGER, values VARCHAR); + +# 2 {tables_inserted_into=[1]} +statement ok +INSERT INTO test SELECT i, concat('thisisastring_', i) FROM range(100) t(i) + +# Let's do a few insertions interleaved with deletions +# 3 {tables_deleted_from=[1]} +statement ok +DELETE FROM test +WHERE key < 50; + +# 4 {tables_inserted_into=[1]} +statement ok +INSERT INTO test SELECT i, concat('thisisastring_', i) FROM range(100,200) t(i) + +# 5 {tables_deleted_from=[1]} +statement ok +DELETE FROM test +WHERE key < 80; + +# 6 {tables_deleted_from=[1]} +statement ok +DELETE FROM test +WHERE key = 120; + +# 7 {tables_inserted_into=[1]} +statement ok +INSERT INTO test SELECT i, concat('thisisastring_', i) FROM range(200,300) t(i) + +# 8 {tables_deleted_from=[1]} +statement ok +DELETE FROM test +WHERE key > 200 and key < 250; + +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_multiple_inserts/**/*') +---- +7 + +statement ok +CALL ducklake_rewrite_data_files('ducklake', 'test', delete_threshold => 0); + +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +# Add one Remove one, same stuff +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_multiple_inserts/**/*') +---- +7 + +query I +select count(*) from test; +---- +170 + +# Test previous snapshots are all-right +query II +SELECT snapshot_id, changes FROM snapshots(); +---- +0 {schemas_created=[main]} +1 {tables_created=[main.test]} +2 {tables_inserted_into=[1]} +3 {tables_deleted_from=[1]} +4 {tables_inserted_into=[1]} +5 {tables_deleted_from=[1]} +6 {tables_deleted_from=[1]} +7 {tables_inserted_into=[1]} +8 {tables_deleted_from=[1]} +9 {} + +loop i 0 2 + +query I +select count(*) from test AT (VERSION => 3); +---- +50 + +query I +select count(*) from test AT (VERSION => 4); +---- +150 + +query I +select count(*) from test AT (VERSION => 5); +---- +120 + +query I +select count(*) from test AT (VERSION => 6); +---- +119 + +query I +select count(*) from test AT (VERSION => 7); +---- +219 + +query I +select count(*) from test AT (VERSION => 8); +---- +170 + +statement ok +CALL ducklake_rewrite_data_files('ducklake', 'test', delete_threshold => 0); + +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +endloop \ No newline at end of file diff --git a/tests/sqllogictests/sql/rewrite_data_files/test_last_snapshot_merge_rewrite.test b/tests/sqllogictests/sql/rewrite_data_files/test_last_snapshot_merge_rewrite.test new file mode 100644 index 0000000..f2351ba --- /dev/null +++ b/tests/sqllogictests/sql/rewrite_data_files/test_last_snapshot_merge_rewrite.test @@ -0,0 +1,167 @@ +# name: test/sql/rewrite_data_files/test_last_snapshot_merge_rewrite.test +# description: Test deletion works in combination with merge +# group: [rewrite_data_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_last_merge_rewrite', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +loop i 0 2 + +statement ok +CREATE TABLE test(key INTEGER, values VARCHAR); + +# 2 {tables_inserted_into=[1]} +statement ok +INSERT INTO test SELECT i, concat('thisisastring_', i) FROM range(100) t(i) + +# Let's do a few insertions interleaved with deletions +# 3 {tables_deleted_from=[1]} +statement ok +DELETE FROM test +WHERE key < 50; + +# 4 {tables_inserted_into=[1]} +statement ok +INSERT INTO test SELECT i, concat('thisisastring_', i) FROM range(100,200) t(i) + +# 5 {tables_deleted_from=[1]} +statement ok +DELETE FROM test +WHERE key < 80; + +# 6 {tables_deleted_from=[1]} +statement ok +DELETE FROM test +WHERE key = 120; + +# 7 {tables_inserted_into=[1]} +statement ok +INSERT INTO test SELECT i, concat('thisisastring_', i) FROM range(200,300) t(i) + +# 8 {tables_deleted_from=[1]} +statement ok +DELETE FROM test +WHERE key > 200 and key < 250; + +skipif i=1 +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_last_merge_rewrite/**/*') +---- +7 + +skipif i=0 +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_last_merge_rewrite/**/*') +---- +14 + + +skipif i=0 +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +skipif i=1 +statement ok +CALL ducklake_rewrite_data_files('ducklake', 'test', delete_threshold => 0); + +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +skipif i=0 +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_last_merge_rewrite/**/*') +---- +14 + +skipif i=1 +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_last_merge_rewrite/**/*') +---- +7 + +skipif i=0 +statement ok +CALL ducklake_rewrite_data_files('ducklake', 'test', delete_threshold => 0); + +skipif i=1 +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +skipif i=0 +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_last_merge_rewrite/**/*') +---- +15 + +skipif i=1 +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_last_merge_rewrite/**/*') +---- +7 + +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +skipif i=1 +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_last_merge_rewrite/**/*') +---- +7 + +skipif i=0 +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_last_merge_rewrite/**/*') +---- +14 + +query I +select count(*) from test; +---- +170 + +query I +select count(*) from test AT (VERSION => 3); +---- +50 + +query I +select count(*) from test AT (VERSION => 4); +---- +150 + +query I +select count(*) from test AT (VERSION => 5); +---- +120 + +query I +select count(*) from test AT (VERSION => 6); +---- +119 + +query I +select count(*) from test AT (VERSION => 7); +---- +219 + +query I +select count(*) from test AT (VERSION => 8); +---- +170 + +statement ok +drop table test; + +endloop \ No newline at end of file diff --git a/tests/sqllogictests/sql/rewrite_data_files/test_last_snapshot_rewrite.test b/tests/sqllogictests/sql/rewrite_data_files/test_last_snapshot_rewrite.test new file mode 100644 index 0000000..badd4a3 --- /dev/null +++ b/tests/sqllogictests/sql/rewrite_data_files/test_last_snapshot_rewrite.test @@ -0,0 +1,223 @@ +# name: test/sql/rewrite_data_files/test_last_snapshot_rewrite.test +# description: Test calling the delete rewrite function multiple times, over a series of deletes, test the threshold parameter behaves well +# group: [rewrite_data_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_rewrite_data', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +# Start off with simple case +statement ok +CREATE TABLE test(key INTEGER, values VARCHAR); + +statement ok +INSERT INTO test SELECT i, concat('thisisastring_', i) FROM range(10000) t(i) + +# Let's do three rounds of deletions +statement ok +DELETE FROM test +WHERE key < 1000; + +statement ok +DELETE FROM test +WHERE key > 9000; + +statement ok +DELETE FROM test +WHERE key > 5000; + +query II +SELECT snapshot_id, changes FROM snapshots(); +---- +0 {schemas_created=[main]} +1 {tables_created=[main.test]} +2 {tables_inserted_into=[1]} +3 {tables_deleted_from=[1]} +4 {tables_deleted_from=[1]} +5 {tables_deleted_from=[1]} + +query I +SELECT delete_file is not null FROM ducklake_list_files('ducklake', 'test') +---- +True + +query I +SELECT delete_file_id FROM ducklake_metadata.ducklake_delete_file +---- +1 +2 +3 + +# This won't do much, since our default threshold is too high +statement ok +CALL ducklake_rewrite_data_files('ducklake', 'test'); + +query I +SELECT delete_file_id FROM ducklake_metadata.ducklake_delete_file +---- +1 +2 +3 + +statement ok +CALL ducklake_rewrite_data_files('ducklake', 'test', delete_threshold => 0.5); + +# We are deleting more than one file, so we can get it out +query I +SELECT delete_file_id FROM ducklake_metadata.ducklake_delete_file +---- +1 +2 + +query I +SELECT COUNT(*) from test +---- +4001 + +# Let's do one more deletion to test the global setting +statement ok +CALL ducklake.set_option('rewrite_delete_threshold', 0.1) + +statement ok +DELETE FROM test +WHERE key > 4000; + +query I +SELECT delete_file_id FROM ducklake_metadata.ducklake_delete_file +---- +1 +2 +5 + +statement ok +CALL ducklake_rewrite_data_files('ducklake', 'test'); + +query I +SELECT delete_file_id FROM ducklake_metadata.ducklake_delete_file +---- +1 +2 + +query I +SELECT COUNT(*) from test +---- +3001 + +statement ok +DELETE FROM test +WHERE key = 3000; + +statement ok +CALL ducklake_rewrite_data_files('ducklake', 'test'); + +query I +SELECT delete_file_id FROM ducklake_metadata.ducklake_delete_file +---- +1 +2 +7 + +# Test that a manual delete_threshold option has priority over the global value. +statement ok +CALL ducklake_rewrite_data_files('ducklake', 'test', delete_threshold => 0); + +query I +SELECT delete_file_id FROM ducklake_metadata.ducklake_delete_file +---- +1 +2 + +# Try invalid values for threshold +statement error +CALL ducklake_rewrite_data_files('ducklake', 'test', delete_threshold => -1); +---- +The delete_threshold option must be between 0 and 1 + +statement error +CALL ducklake.set_option('rewrite_delete_threshold', -1) +---- +The rewrite_delete_threshold must be between 0 and 1 + +# Test cleanup +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_rewrite_data/**/*') +---- +9 + +# delete the old files +statement ok +CALL ducklake_cleanup_old_files('ducklake', cleanup_all => true); + +# We can safely remove 3 deleted files +query I +SELECT COUNT(*) FROM GLOB('${DATA_PATH}/ducklake_rewrite_data/**/*') +---- +6 + +# Let's check every snapshot has the right answer +query II +SELECT snapshot_id, changes FROM snapshots(); +---- +0 {schemas_created=[main]} +1 {tables_created=[main.test]} +2 {tables_inserted_into=[1]} +3 {tables_deleted_from=[1]} +4 {tables_deleted_from=[1]} +5 {tables_deleted_from=[1]} +6 {} +7 {tables_deleted_from=[1]} +8 {} +9 {tables_deleted_from=[1]} +10 {} + +query I +select count(*) from test AT (VERSION => 3); +---- +9000 + +query I +select count(*) from test AT (VERSION => 4); +---- +8001 + +query I +select count(*) from test AT (VERSION => 5); +---- +4001 + +query I +select count(*) from test AT (VERSION => 6); +---- +4001 + +query I +select count(*) from test AT (VERSION => 7); +---- +3001 + +query I +select count(*) from test AT (VERSION => 8); +---- +3001 + +query I +select count(*) from test AT (VERSION => 9); +---- +3000 + +query I +select count(*) from test AT (VERSION => 10); +---- +3000 + diff --git a/tests/sqllogictests/sql/rewrite_data_files/test_rewrite_concurrency.test b/tests/sqllogictests/sql/rewrite_data_files/test_rewrite_concurrency.test new file mode 100644 index 0000000..b4b75b8 --- /dev/null +++ b/tests/sqllogictests/sql/rewrite_data_files/test_rewrite_concurrency.test @@ -0,0 +1,68 @@ +# name: test/sql/rewrite_data_files/test_rewrite_concurrency.test +# description: test concurrent rewrites +# group: [rewrite_data_files] + +require ducklake + +require parquet + +require notwindows + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_concurrent_rewrite', METADATA_CATALOG 'ducklake_metadata') + +statement ok +SET ducklake_retry_wait_ms=100 + +statement ok +SET ducklake_retry_backoff=2.0 + +statement ok +use ducklake + +# Start off with simple case +statement ok +CREATE TABLE test(key INTEGER, values VARCHAR); + +statement ok +INSERT INTO test SELECT i, concat('thisisastring_', i) FROM range(100) t(i) + +# Let's do three rounds of deletions +statement ok +DELETE FROM test +WHERE key < 39; + +statement ok +DELETE FROM test +WHERE key > 90; + +statement ok +DELETE FROM test +WHERE key > 41; + +concurrentloop i 0 2 + +statement maybe +CALL ducklake_rewrite_data_files('ducklake', 'test', delete_threshold => 0); +---- +attempting to compact table with index "1" - but another transaction has compacted it + +endloop + +query II +FROM test +---- +39 thisisastring_39 +40 thisisastring_40 +41 thisisastring_41 + +query III +SELECT data_file_id, begin_snapshot, end_snapshot FROM ducklake_metadata.ducklake_data_file +---- +0 2 5 +4 5 NULL diff --git a/tests/sqllogictests/sql/rewrite_data_files/test_rewrite_db.test b/tests/sqllogictests/sql/rewrite_data_files/test_rewrite_db.test new file mode 100644 index 0000000..68d2d75 --- /dev/null +++ b/tests/sqllogictests/sql/rewrite_data_files/test_rewrite_db.test @@ -0,0 +1,150 @@ +# name: test/sql/rewrite_data_files/test_rewrite_db.test +# description: Test calling the delete rewrite function multiple times, over a series of deletes, test the threshold parameter behaves well +# group: [rewrite_data_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/rewrite_db', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +# Lets create multiple tables, and do insertions and deletions +loop i 0 2 + +statement ok +CREATE TABLE test_${i} (key INTEGER, values VARCHAR); + +# 2 {tables_inserted_into=[1]} +statement ok +INSERT INTO test_${i} SELECT i, concat('thisisastring_', i) FROM range(100) t(i) + +# Let's do a few insertions interleaved with deletions +# 3 {tables_deleted_from=[1]} +statement ok +DELETE FROM test_${i} +WHERE key < 50; + +# 4 {tables_inserted_into=[1]} +statement ok +INSERT INTO test_${i} SELECT i, concat('thisisastring_', i) FROM range(100,200) t(i) + +# 5 {tables_deleted_from=[1]} +statement ok +DELETE FROM test_${i} +WHERE key < 80; + +# 6 {tables_deleted_from=[1]} +statement ok +DELETE FROM test_${i} +WHERE key = 120; + +endloop + +# Check ze files before rewrite +query IIII +SELECT data_file_id, delete_file_id, begin_snapshot, end_snapshot FROM ducklake_metadata.ducklake_delete_file +---- +0 1 3 5 +0 3 5 NULL +2 4 6 NULL +5 6 9 11 +5 8 11 NULL +7 9 12 NULL + + +query III +SELECT data_file_id, begin_snapshot, end_snapshot FROM ducklake_metadata.ducklake_data_file +---- +0 2 NULL +2 4 NULL +5 8 NULL +7 10 NULL + +# Now lets call the rewrite function in the whole DB +statement ok +CALL ducklake_rewrite_data_files('ducklake', delete_threshold => 0); + +# Check ze files after rewrite, we should have removed the last file of each table (i.e., id 4 and 9) +query IIII +SELECT data_file_id, delete_file_id, begin_snapshot, end_snapshot FROM ducklake_metadata.ducklake_delete_file ORDER BY ALL +---- +0 1 3 5 +0 3 5 6 +5 6 9 11 +5 8 11 12 + +query III +SELECT data_file_id, begin_snapshot, end_snapshot FROM ducklake_metadata.ducklake_data_file ORDER BY ALL +---- +0 2 6 +2 4 6 +5 8 12 +7 10 12 +10 6 NULL +11 12 NULL + +# Validate some results +query I +select count(*) from test_0 AT (VERSION => 3); +---- +50 + +query I +select count(*) from test_0 AT (VERSION => 4); +---- +150 + +query I +select count(*) from test_0 AT (VERSION => 5); +---- +120 + +query I +select count(*) from test_0 AT (VERSION => 6); +---- +119 + +# Check other table +query I +select count(*) from test_1 AT (VERSION => 8); +---- +100 + +query I +select count(*) from test_1 AT (VERSION => 9); +---- +50 + +query I +select count(*) from test_1 AT (VERSION => 10); +---- +150 + +query I +select count(*) from test_1 AT (VERSION => 11); +---- +120 + +query I +select count(*) from test_1 AT (VERSION => 12); +---- +119 + +query I +select count(*) from test_0; +---- +119 + +query I +select count(*) from test_1; +---- +119 \ No newline at end of file diff --git a/tests/sqllogictests/sql/rewrite_data_files/test_rewrite_merge_adjacent.test b/tests/sqllogictests/sql/rewrite_data_files/test_rewrite_merge_adjacent.test new file mode 100644 index 0000000..600ea29 --- /dev/null +++ b/tests/sqllogictests/sql/rewrite_data_files/test_rewrite_merge_adjacent.test @@ -0,0 +1,49 @@ +# name: test/sql/rewrite_data_files/test_rewrite_merge_adjacent.test +# description: Test calling rewrite and merge adjacent +# group: [rewrite_data_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/test_rewrite_merge_adjacent/', METADATA_CATALOG 'test_rewrite_merge_adjacent'); + +statement ok +USE ducklake; + +statement ok +CREATE TABLE t (a VARCHAR, b INT); + +# Generates the 1st file +statement ok +INSERT INTO t VALUES ('text', 1); + +# Generates the 2nd file +statement ok +INSERT INTO t VALUES ('text2', 2); + +# Generates the 3rd file +statement ok +INSERT INTO t SELECT 'text' a, range b FROM range(100_000); + +# Deletes on the 3rd file +statement ok +DELETE FROM t WHERE b > 100; + +# Rewrites 3rd file +statement ok +CALL ducklake_rewrite_data_files('ducklake', 't'); + +# Should merge everyone +statement ok +CALL ducklake_merge_adjacent_files('ducklake'); + +query I +SELECT COUNT(*) FROM ducklake_list_files('ducklake', 't'); +---- +1 diff --git a/tests/sqllogictests/sql/rewrite_data_files/test_rewrite_transaction_conflict.test b/tests/sqllogictests/sql/rewrite_data_files/test_rewrite_transaction_conflict.test new file mode 100644 index 0000000..47d109a --- /dev/null +++ b/tests/sqllogictests/sql/rewrite_data_files/test_rewrite_transaction_conflict.test @@ -0,0 +1,123 @@ +# name: test/sql/rewrite_data_files/test_rewrite_transaction_conflict.test +# description: test transaction conflict on delete rewrites +# group: [rewrite_data_files] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_conflict_rewrite', METADATA_CATALOG 'ducklake_metadata') + +statement ok +SET immediate_transaction_mode=true + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok +INSERT INTO ducklake.test FROM range(100) t(i) + +statement ok +DELETE FROM ducklake.TEST WHERE i <10 + +# try to commit a delete after a compaction: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CALL ducklake_rewrite_data_files('ducklake', 'test', delete_threshold => 0); + +statement ok con2 +DELETE FROM ducklake.test + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + +statement ok +DELETE FROM ducklake.TEST WHERE i <20 + +# try to commit a compaction after a deletion: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +DELETE FROM ducklake.TEST WHERE i <30 + +statement ok con2 +CALL ducklake_rewrite_data_files('ducklake', 'test', delete_threshold => 0); + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + +statement ok +DELETE FROM ducklake.TEST WHERE i <40 + +# two transactions both try to compact: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CALL ducklake_rewrite_data_files('ducklake', 'test', delete_threshold => 0); + +statement ok con2 +CALL ducklake_rewrite_data_files('ducklake', 'test', delete_threshold => 0); + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + +statement ok +INSERT INTO ducklake.test VALUES (7); + +statement ok +INSERT INTO ducklake.test VALUES (8); + +# compaction and insert: no conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CALL ducklake_rewrite_data_files('ducklake', 'test', delete_threshold => 0); + +statement ok con2 +INSERT INTO ducklake.test VALUES (9); + +statement ok con1 +COMMIT + +statement ok con2 +COMMIT + +query I +SELECT count(*) FROM ducklake.test ORDER BY ALL +---- +63 \ No newline at end of file diff --git a/tests/sqllogictests/sql/rowid/ducklake_row_id.test b/tests/sqllogictests/sql/rowid/ducklake_row_id.test new file mode 100644 index 0000000..884e640 --- /dev/null +++ b/tests/sqllogictests/sql/rowid/ducklake_row_id.test @@ -0,0 +1,162 @@ +# name: test/sql/rowid/ducklake_row_id.test +# description: test ducklake row-id tracking +# group: [rowid] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_rowid_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +# empty +query I +SELECT rowid FROM ducklake.test +---- + +statement ok +BEGIN + +statement ok +INSERT INTO ducklake.test SELECT i FROM range(3) t(i) + +statement ok +INSERT INTO ducklake.test SELECT i FROM range(5, 7) t(i) + +# transaction local row ids +query I +SELECT rowid FROM ducklake.test +---- +1000000000000000000 +1000000000000000001 +1000000000000000002 +1000000000000000003 +1000000000000000004 + +query I +SELECT rowid FROM ducklake.test WHERE rowid=1000000000000000001 +---- +1000000000000000001 + +statement ok +COMMIT + +statement ok +INSERT INTO ducklake.test SELECT i FROM range(10, 15) t(i) + +query II +SELECT rowid, i FROM ducklake.test ORDER BY rowid +---- +0 0 +1 1 +2 2 +3 5 +4 6 +5 10 +6 11 +7 12 +8 13 +9 14 + +query II +SELECT rowid, i FROM ducklake.test WHERE rowid=3 +---- +3 5 + +statement ok +DELETE FROM ducklake.test WHERE i%2=1 + +query II +SELECT rowid, i FROM ducklake.test ORDER BY rowid +---- +0 0 +2 2 +4 6 +5 10 +7 12 +9 14 + +query I +UPDATE ducklake.test SET i=i+1000 WHERE i<3 OR i>10 +---- +4 + +query II +SELECT rowid, i FROM ducklake.test ORDER BY rowid +---- +0 1000 +2 1002 +4 6 +5 10 +7 1012 +9 1014 + +query II +SELECT rowid, i FROM ducklake.test WHERE rowid=9 +---- +9 1014 + +statement ok +BEGIN + +statement ok +UPDATE ducklake.test SET i=i+2000 + +query II +SELECT rowid, i FROM ducklake.test ORDER BY rowid +---- +0 3000 +2 3002 +4 2006 +5 2010 +7 3012 +9 3014 + +statement ok +UPDATE ducklake.test SET i=i+10000 WHERE i<3000 OR i>3006 + +statement ok +COMMIT + +query II +SELECT rowid, i FROM ducklake.test ORDER BY rowid +---- +0 3000 +2 3002 +4 12006 +5 12010 +7 13012 +9 13014 + +query I +UPDATE ducklake.test SET i=rowid WHERE rowid=5 +---- +1 + +query II +SELECT rowid, i FROM ducklake.test WHERE rowid >= 4 AND rowid < 10 ORDER BY rowid +---- +4 12006 +5 5 +7 13012 +9 13014 + +statement ok +DELETE FROM ducklake.test WHERE rowid=7 + +query II +SELECT rowid, i FROM ducklake.test ORDER BY rowid +---- +0 3000 +2 3002 +4 12006 +5 5 +9 13014 diff --git a/tests/sqllogictests/sql/rowid/ducklake_row_id_update.test b/tests/sqllogictests/sql/rowid/ducklake_row_id_update.test new file mode 100644 index 0000000..7696813 --- /dev/null +++ b/tests/sqllogictests/sql/rowid/ducklake_row_id_update.test @@ -0,0 +1,42 @@ +# name: test/sql/rowid/ducklake_row_id_update.test +# description: test ducklake row-id tracking over partition and updates +# group: [rowid] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_row_id_update_2', METADATA_CATALOG 'ducklake_meta') + +statement ok +USE ducklake; + +statement ok +CREATE OR REPLACE TABLE test(i INTEGER, j INTEGER); + +statement ok +ALTER TABLE test SET PARTITIONED BY (i); + +statement ok +INSERT INTO test VALUES (1,5), (2,5) + +statement ok +MERGE INTO test + USING ( + SELECT + 1 as i, + 5 as j + ) AS test_updates + ON test.j = test_updates.j + WHEN MATCHED THEN UPDATE; + +query III +SELECT rowid,i,j FROM test order by rowid +---- +0 1 5 +1 1 5 \ No newline at end of file diff --git a/tests/sqllogictests/sql/schema_evolution/field_ids.test b/tests/sqllogictests/sql/schema_evolution/field_ids.test new file mode 100644 index 0000000..3ec4ca9 --- /dev/null +++ b/tests/sqllogictests/sql/schema_evolution/field_ids.test @@ -0,0 +1,79 @@ +# name: test/sql/schema_evolution/field_ids.test +# description: Verify that field ids are correctly written +# group: [schema_evolution] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_field_ids_files') + +# insert into +statement ok +CREATE TABLE ducklake.t1(t1_c1 INT, t1_c2 VARCHAR, t1_c3 TIMESTAMP); + +statement ok +INSERT INTO ducklake.t1 VALUES (42, 'hello', TIMESTAMP '1992-01-01'); + +# CTAS +statement ok +CREATE TABLE ducklake.t2 AS SELECT 84 t2_c1, 'world' t2_c2 + +query III +SELECT name, type, field_id FROM parquet_schema('${DATA_PATH}/ducklake_field_ids_files/main/t1/*.parquet') +WHERE name <> 'duckdb_schema' +ORDER BY name +---- +t1_c1 INT32 1 +t1_c2 BYTE_ARRAY 2 +t1_c3 INT64 3 + + +query III +SELECT name, type, field_id FROM parquet_schema('${DATA_PATH}/ducklake_field_ids_files/main/t2/*.parquet') +WHERE name <> 'duckdb_schema' +ORDER BY name +---- +t2_c1 INT32 1 +t2_c2 BYTE_ARRAY 2 + +# deeply_nested +statement ok +CREATE TABLE ducklake.t3 AS +SELECT {'c1': 42, 'c2': {'n1': 84, 'n2': 32}} struct_col, + [42, 48] l, + [{'x': 42, 'y': 84}] as list_of_structs, + {'l': [1,2,3], 'l2': 42, 'l3': {'x': 42, 'y': 84}} as struct_of_lists + +query II +SELECT name, field_id FROM parquet_schema('${DATA_PATH}/ducklake_field_ids_files/main/t3/*.parquet') +WHERE name <> 'duckdb_schema' +ORDER BY name +---- +c1 2 +c2 3 +element 7 +element 9 +element 14 +l 6 +l 13 +l2 15 +l3 16 +list NULL +list NULL +list NULL +list_of_structs 8 +n1 4 +n2 5 +struct_col 1 +struct_of_lists 12 +x 10 +x 17 +y 11 +y 18 diff --git a/tests/sqllogictests/sql/secrets/ducklake_secrets.test b/tests/sqllogictests/sql/secrets/ducklake_secrets.test new file mode 100644 index 0000000..a21ca8c --- /dev/null +++ b/tests/sqllogictests/sql/secrets/ducklake_secrets.test @@ -0,0 +1,111 @@ +# name: test/sql/secrets/ducklake_secrets.test +# description: Test DuckLake connections with secrets +# group: [secrets] + +require ducklake + +require parquet + +# no secret available +statement error +ATTACH 'ducklake:' AS ducklake +---- +Default secret was not found + +# default secret +statement ok +CREATE SECRET ( + TYPE DUCKLAKE, + METADATA_PATH '__TEST_DIR__/metadata.db', + DATA_PATH '__TEST_DIR__/my_data_path' +); + +statement ok +ATTACH 'ducklake:' AS ducklake + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok +INSERT INTO ducklake.test VALUES (1), (2), (3); + +# verify we are using the correct path +query I +SELECT COUNT(*) FROM glob('__TEST_DIR__/my_data_path/**/*.parquet') +---- +1 + +statement ok +DETACH ducklake + +statement ok +ATTACH 'ducklake:' AS ducklake + +query I +FROM ducklake.test +---- +1 +2 +3 + +statement ok +DETACH ducklake + +# named secrets +statement ok +CREATE SECRET my_ducklake ( + TYPE DUCKLAKE, + METADATA_PATH '__TEST_DIR__/metadata_named.db', + DATA_PATH '__TEST_DIR__/my_data_path_named' +); + +statement ok +ATTACH 'ducklake:my_ducklake' AS ducklake + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok +INSERT INTO ducklake.test VALUES (1), (2), (3); + +# verify we are using the correct path +query I +SELECT COUNT(*) FROM glob('__TEST_DIR__/my_data_path_named/**/*.parquet') +---- +1 + +statement ok +DETACH ducklake + +statement ok +ATTACH 'ducklake:my_ducklake' AS ducklake + +query I +FROM ducklake.test +---- +1 +2 +3 + +statement ok +DETACH ducklake + +# metadata parameters +statement ok +CREATE SECRET metadata_parameters123 ( + TYPE DUCKLAKE, + METADATA_PATH '__TEST_DIR__/metadata_named.db', + DATA_PATH '__TEST_DIR__/my_data_path_named', + METADATA_PARAMETERS MAP {'TYPE': 'DUCKDBXX'} +); + +statement error +ATTACH 'ducklake:metadata_parameters123' AS ducklake +---- +duckdbxx + +# unknown secret name +statement error +ATTACH 'ducklake:unknown_secret' AS ducklake +---- +Secret "unknown_secret" was not found diff --git a/tests/sqllogictests/sql/settings/max_retry_count.test b/tests/sqllogictests/sql/settings/max_retry_count.test new file mode 100644 index 0000000..07a268b --- /dev/null +++ b/tests/sqllogictests/sql/settings/max_retry_count.test @@ -0,0 +1,171 @@ +# name: test/sql/settings/max_retry_count.test +# description: Test ducklake_max_retry_count configuration reading and usage +# group: [settings] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_retry_test_files'); + +statement ok +USE ducklake + +# Test 1: Verify default setting +query I +SELECT current_setting('ducklake_max_retry_count') +---- +10 + +# Test 2: Set to custom value and verify +statement ok +SET ducklake_max_retry_count = 3; + +query I +SELECT current_setting('ducklake_max_retry_count') +---- +3 + +# Test 3: Create table and insert data to trigger transaction logic +statement ok +CREATE TABLE retry_test(id INTEGER, message VARCHAR); + +statement ok +INSERT INTO retry_test VALUES (1, 'test with retry count 3'); + +# Test 4: Change setting and test again +statement ok +SET ducklake_max_retry_count = 7; + +query I +SELECT current_setting('ducklake_max_retry_count') +---- +7 + +statement ok +INSERT INTO retry_test VALUES (2, 'test with retry count 7'); + +# Test 5: Test session scope +statement ok +SET SESSION ducklake_max_retry_count = 2; + +query I +SELECT current_setting('ducklake_max_retry_count') +---- +2 + +statement ok +INSERT INTO retry_test VALUES (3, 'test with retry count 2'); + +# Test 6: Reset to default +statement ok +RESET ducklake_max_retry_count; + +query I +SELECT current_setting('ducklake_max_retry_count') +---- +10 + +# Verify all data was inserted correctly +query II +SELECT * FROM retry_test ORDER BY id +---- +1 test with retry count 3 +2 test with retry count 7 +3 test with retry count 2 + +query I +SELECT COUNT(*) FROM retry_test +---- +3 + +# Test 7: Simple retry behavior test +statement ok +SET ducklake_max_retry_count = 2; + +# Create a table for retry testing +statement ok +CREATE TABLE retry_behavior_test(id INTEGER, data VARCHAR); + +# Simple transaction with potential for retries +statement ok +BEGIN + +statement ok +INSERT INTO retry_behavior_test VALUES (1, 'test_data_1'); + +statement ok +INSERT INTO retry_behavior_test VALUES (2, 'test_data_2'); + +statement ok +COMMIT + +# Verify the transaction completed successfully +query II +SELECT * FROM retry_behavior_test ORDER BY id; +---- +1 test_data_1 +2 test_data_2 + +# Test 8: Test with very low retry count +statement ok +SET ducklake_max_retry_count = 1; + +statement ok +INSERT INTO retry_behavior_test VALUES (3, 'test_with_low_retry'); + +# Test 9: with zero retries +statement ok +SET ducklake_max_retry_count = 0; + +statement ok +INSERT INTO retry_behavior_test VALUES (4, 'test_with_zero_retry'); + +# Final verification +query I +SELECT COUNT(*) FROM retry_behavior_test; +---- +4 + +# Test 10: Test retry behavior with true concurrent transaction conflicts +statement ok +CREATE TABLE ducklake.tbl(id INTEGER); + +concurrentloop i 0 5 + +statement ok +SET ducklake_max_retry_count = 100; + +statement ok +INSERT INTO ducklake.tbl VALUES(${i}); + +endloop + +query I +SELECT COUNT() FROM ducklake.tbl; +---- +5 + + +statement ok +SET ducklake_max_retry_count = 1; + +concurrentloop i 0 10 + +statement maybe +INSERT INTO ducklake.tbl VALUES(${i}); +---- +Exceeded the maximum retry count of 1 set by the ducklake_max_retry_count setting. + +endloop + +query I +SELECT COUNT() >= 5 AND COUNT() < 15 FROM ducklake.tbl; +---- +TRUE \ No newline at end of file diff --git a/tests/sqllogictests/sql/settings/parquet_compression.test b/tests/sqllogictests/sql/settings/parquet_compression.test new file mode 100644 index 0000000..01923b9 --- /dev/null +++ b/tests/sqllogictests/sql/settings/parquet_compression.test @@ -0,0 +1,80 @@ +# name: test/sql/settings/parquet_compression.test +# description: Test parquet compression +# group: [settings] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_parquet_compression.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_parquet_compression') + +statement ok +CALL ducklake.set_option('parquet_compression', 'zstd') + +statement ok +CALL ducklake.set_option('parquet_version', '2') + +statement ok +CALL ducklake.set_option('parquet_compression_level', '17') + +statement ok +CALL ducklake.set_option('parquet_row_group_size', '64000') + +statement ok +CREATE TABLE ducklake.tbl AS SELECT i, 'hello world' || i str FROM range(100000) t(i) + +# zstd + v2 encodings are used +query II +SELECT DISTINCT compression, encodings FROM parquet_metadata('__TEST_DIR__/ducklake_parquet_compression/**') ORDER BY ALL +---- +ZSTD DELTA_BINARY_PACKED +ZSTD DELTA_LENGTH_BYTE_ARRAY + +# we have two row groups (of up to 64K rows) +query I +SELECT COUNT(DISTINCT row_group_id) FROM parquet_metadata('__TEST_DIR__/ducklake_parquet_compression/**') ORDER BY ALL +---- +2 + +# ensure setting persists across restarts +statement ok +DETACH ducklake + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_parquet_compression.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_parquet_compression') + +statement ok +INSERT INTO ducklake.tbl VALUES (42, 'hello world'); + +query II +SELECT DISTINCT compression, encodings FROM parquet_metadata('__TEST_DIR__/ducklake_parquet_compression/**') ORDER BY ALL +---- +ZSTD DELTA_BINARY_PACKED +ZSTD DELTA_LENGTH_BYTE_ARRAY + +# unknown settings +statement error +CALL ducklake.set_option('parquet_compression', 'zstdx') +---- +Unsupported + +statement error +CALL ducklake.set_option('parquet_version', 'z') +---- +Could not convert + +statement error +CALL ducklake.set_option('parquet_compression_level', 'z') +---- +Could not convert + +statement error +CALL ducklake.set_option('parquet_row_group_size', 'z') +---- +Could not convert + +statement error +CALL ducklake.set_option('parquet_bla', '42') +---- +Unsupported diff --git a/tests/sqllogictests/sql/settings/parquet_row_group_size_bytes.test b/tests/sqllogictests/sql/settings/parquet_row_group_size_bytes.test new file mode 100644 index 0000000..7434442 --- /dev/null +++ b/tests/sqllogictests/sql/settings/parquet_row_group_size_bytes.test @@ -0,0 +1,38 @@ +# name: test/sql/settings/parquet_row_group_size_bytes.test +# description: Test parquet row group size bytes +# group: [settings] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_parquet_row_group_size_bytes') + +statement ok +CALL ducklake.set_option('parquet_row_group_size_bytes', '10KB') + +statement ok +SET threads=1 + +# FIXME: this should not be necessary +statement ok +SET preserve_insertion_order=false; + +statement ok +CREATE TABLE ducklake.tbl AS SELECT i, 'hello world' || i str FROM range(100000) t(i) + +query I +SELECT COUNT(DISTINCT row_group_id)>10 FROM parquet_metadata('${DATA_PATH}/ducklake_parquet_row_group_size_bytes/**') +---- +true + +query II +SELECT value, scope FROM ducklake.options() WHERE option_name='parquet_row_group_size_bytes' +---- +10000 GLOBAL diff --git a/tests/sqllogictests/sql/settings/per_table_settings.test b/tests/sqllogictests/sql/settings/per_table_settings.test new file mode 100644 index 0000000..a80d7e8 --- /dev/null +++ b/tests/sqllogictests/sql/settings/per_table_settings.test @@ -0,0 +1,117 @@ +# name: test/sql/settings/per_table_settings.test +# description: Test per-table settings +# group: [settings] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_per_table_settings.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_per_table_settings_files'); + +# test different settings per schema/table +statement ok +CREATE TABLE ducklake.t1(str VARCHAR) + +statement ok +CREATE TABLE ducklake.t2(str VARCHAR) + +statement ok +CREATE SCHEMA ducklake.s1; + +statement ok +CREATE TABLE ducklake.s1.t1(str VARCHAR); + +statement ok +CREATE TABLE ducklake.s1.t2(str VARCHAR); + +# global default -> uncompressed +statement ok +CALL ducklake.set_option('parquet_compression', 'uncompressed') + +# t1 -> zstd +statement ok +CALL ducklake.set_option('parquet_compression', 'zstd', table_name => 't1') + +# schema s1 -> lz4 +statement ok +CALL ducklake.set_option('parquet_compression', 'lz4', schema => 's1') + +# s1.t1 -> gzip +statement ok +CALL ducklake.set_option('parquet_compression', 'gzip', schema => 's1', table_name => 't1') + +# non-existent schema/table +statement error +CALL ducklake.set_option('parquet_compression', 'gzip', table_name => 'nonexistent_table') +---- +nonexistent_table + +statement error +CALL ducklake.set_option('parquet_compression', 'gzip', schema => 'nonexistent_schema') +---- +nonexistent_schema + +loop i 0 2 + +# t1 uses zstd (table-specific setting) +query I +INSERT INTO ducklake.t1 SELECT 'hello world' || i str FROM range(1000) t(i) +---- +1000 + +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_per_table_settings_files/main/t1/**') ORDER BY ALL +---- +ZSTD + +# t2 uses uncompressed (global setting) +query I +INSERT INTO ducklake.t2 SELECT 'hello world' || i str FROM range(1000) t(i) +---- +1000 + +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_per_table_settings_files/main/t2/**') ORDER BY ALL +---- +UNCOMPRESSED + +# s1.t1 uses gzip (table-specific setting) +query I +INSERT INTO ducklake.s1.t1 SELECT 'hello world' || i str FROM range(1000) t(i) +---- +1000 + +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_per_table_settings_files/s1/t1/**') ORDER BY ALL +---- +GZIP + +# s1.t2 uses lz4 (schema-specific setting) +query I +INSERT INTO ducklake.s1.t2 SELECT 'hello world' || i str FROM range(1000) t(i) +---- +1000 + +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_per_table_settings_files/s1/t2/**') ORDER BY ALL +---- +LZ4_RAW + +# all these options are persisted - restart and do it again +statement ok +DETACH ducklake + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_per_table_settings.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_per_table_settings_files'); + +endloop + +# options retrieval - check that all options are present with correct values and scopes +query IIII +SELECT option_name, value, scope, scope_entry FROM ducklake.options() WHERE option_name='parquet_compression' ORDER BY scope, value; +---- +parquet_compression uncompressed GLOBAL NULL +parquet_compression lz4 SCHEMA s1 +parquet_compression gzip TABLE s1.t1 +parquet_compression zstd TABLE main.t1 diff --git a/tests/sqllogictests/sql/settings/per_thread_output.test b/tests/sqllogictests/sql/settings/per_thread_output.test new file mode 100644 index 0000000..9e123d2 --- /dev/null +++ b/tests/sqllogictests/sql/settings/per_thread_output.test @@ -0,0 +1,49 @@ +# name: test/sql/settings/per_thread_output.test +# description: Test per thread output option +# group: [settings] + +require ducklake + +require parquet + +statement ok +SET preserve_insertion_order=false; + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/per_thread_data') + +statement ok +CALL ducklake.set_option('per_thread_output', 'true') + +# Setup some parquet files to read from +statement ok +CREATE TABLE ducklake.tbl_base AS (SELECT i AS col_a FROM range(0,1000000) tbl(i)) + +# Now create a new files reading from those +statement ok +CREATE TABLE ducklake.tbl_actual AS (FROM ducklake.tbl_base) + +# Verify that multiple files were created +query I +SELECT COUNT(*) > 1 FROM ducklake_list_files('ducklake', 'tbl_actual') +---- +true + +# Now set to off and repeat +statement ok +CALL ducklake.set_option('per_thread_output', 'false') + +statement ok +CREATE TABLE ducklake.tbl_no_thread_output AS (FROM ducklake.tbl_base) + +# Verify that a single file was created +query I +SELECT COUNT(*) FROM ducklake_list_files('ducklake', 'tbl_no_thread_output') +---- +1 + diff --git a/tests/sqllogictests/sql/snapshot_info/ducklake_current_commit.test b/tests/sqllogictests/sql/snapshot_info/ducklake_current_commit.test new file mode 100644 index 0000000..94b3d4e --- /dev/null +++ b/tests/sqllogictests/sql/snapshot_info/ducklake_current_commit.test @@ -0,0 +1,97 @@ +# name: test/sql/snapshot_info/ducklake_current_commit.test +# description: test that the ducklake_current_snapshot returns current snapshot id +# group: [snapshot_info] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_current_snapshot_commit'); + +statement ok +USE ducklake; + +query I +FROM ducklake_current_snapshot('ducklake') +---- +0 + +# Snapshot 1 +statement ok +CREATE TABLE integer(i INTEGER); + +query I +FROM ducklake_current_snapshot('ducklake') +---- +1 + +# Start a few transactions +statement ok con1 +BEGIN TRANSACTION + +statement ok con2 +BEGIN TRANSACTION + +statement ok con3 +BEGIN TRANSACTION + +# Each transaction does somethins +statement ok con1 +CREATE TABLE ducklake.integers_2(i INTEGER) + +statement ok con1 +INSERT into ducklake.integers_2 values (0); + +statement ok con2 +INSERT into ducklake.integer values (0); + +statement ok con3 +INSERT into ducklake.integer values (1); + +# Snapshot still 1 +query I +FROM ducklake_current_snapshot('ducklake') +---- +1 + +# Check commited snapshot from within a transaction +query I con1 +FROM ducklake_current_snapshot('ducklake') +---- +1 + +statement ok con1 +COMMIT + +query I +FROM ducklake_current_snapshot('ducklake') +---- +2 + +statement ok con2 +ROLLBACK + +query I +FROM ducklake_current_snapshot('ducklake') +---- +2 + +statement ok con3 +COMMIT + +query I +FROM ducklake_current_snapshot('ducklake') +---- +3 + +# Try out the macro +query I +FROM current_snapshot() +---- +3 \ No newline at end of file diff --git a/tests/sqllogictests/sql/snapshot_info/ducklake_last_commit.test b/tests/sqllogictests/sql/snapshot_info/ducklake_last_commit.test new file mode 100644 index 0000000..66dca13 --- /dev/null +++ b/tests/sqllogictests/sql/snapshot_info/ducklake_last_commit.test @@ -0,0 +1,127 @@ +# name: test/sql/snapshot_info/ducklake_last_commit.test +# description: test that the ducklake_last_commit_snapshot returns last committed snapshot id +# group: [snapshot_info] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_last_snapshot_commit.db' AS ducklake; + +query I +FROM ducklake_last_committed_snapshot('ducklake') +---- +NULL + +# Snapshot 1 +statement ok +CREATE TABLE ducklake.integer(i INTEGER); + +query I +FROM ducklake_last_committed_snapshot('ducklake') +---- +1 + +# Start a few transactions +statement ok con1 +BEGIN TRANSACTION + +statement ok con2 +BEGIN TRANSACTION + +statement ok con3 +BEGIN TRANSACTION + +# Each transaction does somethins +statement ok con1 +CREATE TABLE ducklake.integers_2(i INTEGER) + +statement ok con1 +INSERT into ducklake.integers_2 values (0); + +statement ok con2 +INSERT into ducklake.integer values (0); + +statement ok con3 +INSERT into ducklake.integer values (1); + +# Snapshot still 1 +query I +FROM ducklake_last_committed_snapshot('ducklake') +---- +1 + +# Check commited snapshot from within a transaction +query I con1 +FROM ducklake_last_committed_snapshot('ducklake') +---- +1 + +statement ok con1 +COMMIT + +query I +FROM ducklake_last_committed_snapshot('ducklake') +---- +2 + +statement ok con2 +ROLLBACK + +query I +FROM ducklake_last_committed_snapshot('ducklake') +---- +2 + +statement ok con3 +COMMIT + +query I +FROM ducklake_last_committed_snapshot('ducklake') +---- +3 + +statement ok +DETACH ducklake + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_last_snapshot_commit.db' AS ducklake; + +statement ok +USE ducklake; + +# Last snapshot is reset for the transaction +query I +FROM ducklake_last_committed_snapshot('ducklake') +---- +NULL + +# Try out the macro +query I +FROM last_committed_snapshot() +---- +NULL + +statement ok +SET ducklake_max_retry_count = 100; + + +# Try concurrency +concurrentloop i 0 50 + +skipif i>25 +statement ok +insert into ducklake.integer values (${i}); + +skipif i<25 +statement ok +FROM ducklake_last_committed_snapshot('ducklake') + +endloop + +# 26 + 3 +query I +FROM last_committed_snapshot() +---- +29 \ No newline at end of file diff --git a/tests/sqllogictests/sql/stats/cardinality.test b/tests/sqllogictests/sql/stats/cardinality.test new file mode 100644 index 0000000..e8694eb --- /dev/null +++ b/tests/sqllogictests/sql/stats/cardinality.test @@ -0,0 +1,26 @@ +# name: test/sql/stats/cardinality.test +# description: test ducklake cardinality extension +# group: [stats] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_cardinality_files') + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok +INSERT INTO ducklake.test FROM range(1000); + +query II +EXPLAIN SELECT * FROM ducklake.test +---- +physical_plan :.*~1,000.* diff --git a/tests/sqllogictests/sql/stats/filter_pushdown.test b/tests/sqllogictests/sql/stats/filter_pushdown.test new file mode 100644 index 0000000..4a5eedc --- /dev/null +++ b/tests/sqllogictests/sql/stats/filter_pushdown.test @@ -0,0 +1,167 @@ +# name: test/sql/stats/filter_pushdown.test +# description: Test filter pushdown into DuckLake files +# group: [stats] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_filter_pushdown_files') + +statement ok +CREATE TABLE ducklake.filter_pushdown(v INTEGER, i INTEGER, d DATE, k DECIMAL(9, 3), s VARCHAR); + +# file 1 - +# i: 0..1000 +# d: 2000-01-01 - 2000-02-11 +# k: 0..100 +# s: 000000...000999 +statement ok +INSERT INTO ducklake.filter_pushdown +SELECT i % 1000 v, i, (TIMESTAMP '2000-01-01' + interval (i) hour)::DATE, i / 10, printf('%06d', i) +FROM range(1000) t(i); + +# file 2 - +# i: 100000..101000 +# d: 2011-05-29 - 2011-07-10 +# k: 10000..10100 +# s: 100000...100999 +statement ok +INSERT INTO ducklake.filter_pushdown +SELECT i % 1000 v, i, (TIMESTAMP '2000-01-01' + interval (i) hour)::DATE, i / 10, printf('%06d', i) +FROM range(100000,101000) t(i); + +# file 3 - +# i: 500000..501000 +# d: 2027-01-15 - 2027-02-25 +# k: 50000..50100 +# s: 500000...500999 +statement ok +INSERT INTO ducklake.filter_pushdown +SELECT i % 1000 v, i, (TIMESTAMP '1970-01-01' + interval (i) hour)::DATE, i / 10, printf('%06d', i) +FROM range(500000,501000) t(i); + +# integer filters +query IIIII +SELECT * FROM ducklake.filter_pushdown WHERE i=527 +---- +527 527 2000-01-22 52.700 000527 + +query I +SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE i>100998 +---- +1001 + +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE i>100998 +---- +analyzed_plan :.*Total Files Read: 2.* + +query I +SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE i>=100999 +---- +1001 + +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE i>=100999 +---- +analyzed_plan :.*Total Files Read: 2.* + +query I +SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE i<100001 +---- +1001 + +query I +SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE i<=100000 +---- +1001 + +# date filter +query I +SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE d=DATE '2000-01-23'; +---- +24 + +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE d=DATE '2000-01-23'; +---- +analyzed_plan :.*Total Files Read: 1.* + +# decimal filter +query IIIII +SELECT * FROM ducklake.filter_pushdown WHERE k=25.3; +---- +253 253 2000-01-11 25.300 000253 + +query II +EXPLAIN ANALYZE SELECT * FROM ducklake.filter_pushdown WHERE k=25.3; +---- +analyzed_plan :.*Total Files Read: 1.* + +# varchar filter +query I +SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE s>= '500023'; +---- +977 + +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE s>= '500023'; +---- +analyzed_plan :.*Total Files Read: 1.* + +# multiple filters +query I +SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE d >= DATE '2011-05-29' AND k < 50000; +---- +1000 + +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE d >= DATE '2011-05-29' AND k < 50000; +---- +analyzed_plan :.*Total Files Read: 1.* + +# OR filters +query I +SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE i=527 OR i=100527; +---- +2 + +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE i=527 OR i=100527; +---- +analyzed_plan :.*Total Files Read: 2.* + +query I +SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE i in (500, 600, 700); +---- +3 + +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE i IN (500, 600, 700); +---- +analyzed_plan :.*Total Files Read: 1.* + +# file 4 - +# Single row so it should be able to be pruned with != filter +statement ok +INSERT INTO ducklake.filter_pushdown +SELECT i % 1000 v, i, (TIMESTAMP '1970-01-01' + interval (i) hour)::DATE, i / 10, printf('%06d', i) +FROM range(501000, 501001) t(i); + +# != (not equal) pushdown - should prune file with constant value +query I +SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE i != 501000; +---- +3000 + +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM ducklake.filter_pushdown WHERE i != 501000; +---- +analyzed_plan :.*Total Files Read: 3.* diff --git a/tests/sqllogictests/sql/stats/global_stats.test b/tests/sqllogictests/sql/stats/global_stats.test new file mode 100644 index 0000000..05d8a58 --- /dev/null +++ b/tests/sqllogictests/sql/stats/global_stats.test @@ -0,0 +1,119 @@ +# name: test/sql/stats/global_stats.test +# description: Test global stats +# group: [stats] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_global_stats', METADATA_CATALOG 'ducklake_meta') + +# test tracking of stats +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok +INSERT INTO ducklake.test VALUES (42); + +query I +SELECT stats(i) FROM ducklake.test LIMIT 1 +---- +:.*Min.*42.*Max.*42.*Has Null.*false.* + +statement ok +INSERT INTO ducklake.test VALUES (84) + +query I +SELECT stats(i) FROM ducklake.test LIMIT 1 +---- +:.*Min.*42.*Max.*84.*Has Null.*false.* + +statement ok +INSERT INTO ducklake.test VALUES (NULL) + +query I +SELECT stats(i) FROM ducklake.test LIMIT 1 +---- +:.*Min.*42.*Max.*84.*Has Null.*true.* + +# dates/timestamps +statement ok +CREATE TABLE ducklake.dates(d DATE); + +statement ok +INSERT INTO ducklake.dates VALUES (date '1992-01-01'); + +query I +SELECT stats(d) FROM ducklake.dates LIMIT 1 +---- +:.*Min.*1992-01-01.*Max.*1992-01-01.*Has Null.*false.* + +statement ok +INSERT INTO ducklake.dates VALUES (date '2000-02-03'); + +query I +SELECT stats(d) FROM ducklake.dates LIMIT 1 +---- +:.*Min.*1992-01-01.*Max.*2000-02-03.*Has Null.*false.* + +# strings +statement ok +CREATE TABLE ducklake.strings(s VARCHAR); + +statement ok +INSERT INTO ducklake.strings VALUES ('hello world'); + +query I +SELECT stats(s) FROM ducklake.strings LIMIT 1 +---- +:.*Min.*hello wo.*Max.*hello wo.*Has Null.*false.* + +statement ok +INSERT INTO ducklake.strings VALUES ('bye bye world'); + +query I +SELECT stats(s) FROM ducklake.strings LIMIT 1 +---- +:.*Min.*bye bye.*Max.*hello wo.*Has Null.*false.* + +# multiple inserts within the same transaction +statement ok +BEGIN + +statement ok +INSERT INTO ducklake.test VALUES (85); + +statement ok +INSERT INTO ducklake.test VALUES (86); + +statement ok +INSERT INTO ducklake.test VALUES (87); + +statement ok +COMMIT + +query I +SELECT stats(i) FROM ducklake.test LIMIT 1 +---- +:.*Min.*42.*Max.*87.*Has Null.*true.* + +# test stats with transaction-local changes +statement ok +BEGIN + +statement ok +INSERT INTO ducklake.test VALUES (100); + +query I +SELECT * FROM ducklake.test WHERE i=100 +---- +100 + +statement ok +ROLLBACK diff --git a/tests/sqllogictests/sql/stats/global_stats_transactions.test b/tests/sqllogictests/sql/stats/global_stats_transactions.test new file mode 100644 index 0000000..f65f8cf --- /dev/null +++ b/tests/sqllogictests/sql/stats/global_stats_transactions.test @@ -0,0 +1,54 @@ +# name: test/sql/stats/global_stats_transactions.test +# description: Test global stats in the presence of transaction conflicts +# group: [stats] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_global_stats_transactions') + +statement ok +SET immediate_transaction_mode=true + +# test tracking of stats across transactions +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con3 +BEGIN + +statement ok con1 +INSERT INTO ducklake.test VALUES (42); + +statement ok con2 +INSERT INTO ducklake.test VALUES (84); + +statement ok con3 +INSERT INTO ducklake.test VALUES (NULL); + +statement ok con1 +COMMIT + +statement ok con2 +COMMIT + +statement ok con3 +COMMIT + +query I +SELECT stats(i) FROM ducklake.test LIMIT 1 +---- +:.*Min.*42.*Max.*84.*Has Null.*true.* diff --git a/tests/sqllogictests/sql/table_changes/ducklake_lower_timestamp.test b/tests/sqllogictests/sql/table_changes/ducklake_lower_timestamp.test new file mode 100644 index 0000000..1d0dbea --- /dev/null +++ b/tests/sqllogictests/sql/table_changes/ducklake_lower_timestamp.test @@ -0,0 +1,40 @@ +# name: test/sql/table_changes/ducklake_lower_timestamp.test +# description: test issue #330 is fixed +# group: [table_changes] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_lower_timestamp') + +statement ok +use ducklake; + +statement ok +create or replace table repro (foo int, bar text, update_ts timestamptz); + +statement ok +insert into repro by name select 1 as foo, '1' as bar, current_timestamp as update_ts; + +statement ok +insert into repro by name select 2 as foo, '2' as bar, current_timestamp as update_ts; + +statement ok +insert into repro by name select 3 as foo, '3' as bar, current_timestamp as update_ts; + +query I +SELECT COUNT(*) FROM ducklake.table_changes('repro', 0, 4); +---- +3 + +query I +SELECT COUNT(*) from ducklake.table_changes('repro', date '1970-01-01', now()) +---- +3 \ No newline at end of file diff --git a/tests/sqllogictests/sql/table_changes/ducklake_table_changes.test b/tests/sqllogictests/sql/table_changes/ducklake_table_changes.test new file mode 100644 index 0000000..1164a96 --- /dev/null +++ b/tests/sqllogictests/sql/table_changes/ducklake_table_changes.test @@ -0,0 +1,106 @@ +# name: test/sql/table_changes/ducklake_table_changes.test +# description: test ducklake_table_changes function +# group: [table_changes] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_table_changes_files') + +# snapshot 1 +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +# snapshot 2 +statement ok +INSERT INTO ducklake.test FROM range(3); + +# snapshot 3 +statement ok +UPDATE ducklake.test SET i=i+100 + +# snapshot 4 +statement ok +UPDATE ducklake.test SET i=i+100 + +# snapshot 5 +statement ok +DELETE FROM ducklake.test + +query IIII +FROM ducklake.table_changes('test', 0, 2) ORDER BY ALL +---- +2 0 insert 0 +2 1 insert 1 +2 2 insert 2 + +# with timestamps +statement ok +SET VARIABLE begin_ts = (SELECT snapshot_time FROM ducklake.snapshots() WHERE snapshot_id = 0) + +statement ok +SET VARIABLE end_ts = (SELECT snapshot_time FROM ducklake.snapshots() WHERE snapshot_id = 2) + +query IIII +FROM ducklake.table_changes('test', getvariable('begin_ts'), getvariable('end_ts')) ORDER BY ALL +---- +2 0 insert 0 +2 1 insert 1 +2 2 insert 2 + +query IIII +FROM ducklake.table_changes('test', 3, 3) ORDER BY ALL +---- +3 0 update_postimage 100 +3 0 update_preimage 0 +3 1 update_postimage 101 +3 1 update_preimage 1 +3 2 update_postimage 102 +3 2 update_preimage 2 + +query IIII +FROM ducklake.table_changes('test', 4, 4) ORDER BY ALL +---- +4 0 update_postimage 200 +4 0 update_preimage 100 +4 1 update_postimage 201 +4 1 update_preimage 101 +4 2 update_postimage 202 +4 2 update_preimage 102 + +query IIII +FROM ducklake.table_changes('test', 5, 5) ORDER BY ALL +---- +5 0 delete 200 +5 1 delete 201 +5 2 delete 202 + +# all changes +query IIII +FROM ducklake.table_changes('test', 0, 5) ORDER BY ALL +---- +2 0 insert 0 +2 1 insert 1 +2 2 insert 2 +3 0 update_postimage 100 +3 0 update_preimage 0 +3 1 update_postimage 101 +3 1 update_preimage 1 +3 2 update_postimage 102 +3 2 update_preimage 2 +4 0 update_postimage 200 +4 0 update_preimage 100 +4 1 update_postimage 201 +4 1 update_preimage 101 +4 2 update_postimage 202 +4 2 update_preimage 102 +5 0 delete 200 +5 1 delete 201 +5 2 delete 202 diff --git a/tests/sqllogictests/sql/table_changes/ducklake_table_deletions.test b/tests/sqllogictests/sql/table_changes/ducklake_table_deletions.test new file mode 100644 index 0000000..e22af55 --- /dev/null +++ b/tests/sqllogictests/sql/table_changes/ducklake_table_deletions.test @@ -0,0 +1,149 @@ +# name: test/sql/table_changes/ducklake_table_deletions.test +# description: test ducklake_table_deletions function +# group: [table_changes] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_table_deletions_files') + +# snapshot 1 +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +# snapshot 2 +statement ok +INSERT INTO ducklake.test VALUES (1) + +# snapshot 3 +statement ok +INSERT INTO ducklake.test VALUES (2) + +# snapshot 4 +statement ok +INSERT INTO ducklake.test VALUES (3) + +# snapshot 5 +statement ok +INSERT INTO ducklake.test VALUES (NULL) + +# snapshot 6 +statement ok +INSERT INTO ducklake.test FROM range(10, 15) + +# snapshot 7 +statement ok +DELETE FROM ducklake.test WHERE i=2 + +# snapshot 8 +statement ok +DELETE FROM ducklake.test WHERE i=11 + +# snapshot 9 +statement ok +DELETE FROM ducklake.test WHERE i=12 + +query I +FROM ducklake.test ORDER BY ALL +---- +1 +3 +10 +13 +14 +NULL + +# snapshot 7 deleted i=2 +query III +SELECT rowid, snapshot_id, * FROM ducklake_table_deletions('ducklake', 'main', 'test', 7, 7); +---- +1 7 2 + +# snapshot 8 deleted i=11 +query III +SELECT rowid, snapshot_id, * FROM ducklake_table_deletions('ducklake', 'main', 'test', 8, 8); +---- +5 8 11 + +# snapshot 9 deleted i=12 +query III +SELECT rowid, snapshot_id, * FROM ducklake_table_deletions('ducklake', 'main', 'test', 9, 9); +---- +6 9 12 + +# we can get all of these at once +query III +SELECT rowid, snapshot_id, * FROM ducklake_table_deletions('ducklake', 'main', 'test', 7, 9) ORDER BY rowid +---- +1 7 2 +5 8 11 +6 9 12 + +# this shows all deletions performed +query II +SELECT rowid, * FROM ducklake_table_deletions('ducklake', 'main', 'test', 0, 9) ORDER BY rowid +---- +1 2 +5 11 +6 12 + +# snapshot 10: update a subset of the rows +statement ok +UPDATE ducklake.test SET i=i+100 WHERE i < 13 + +# updates are deletions + insertions +query II +SELECT rowid, * FROM ducklake_table_deletions('ducklake', 'main', 'test', 10, 10) ORDER BY rowid +---- +0 1 +2 3 +4 10 + +# these are all the deleted tuples +query II +SELECT rowid, * FROM ducklake_table_deletions('ducklake', 'main', 'test', 7, 10) ORDER BY rowid +---- +0 1 +1 2 +2 3 +4 10 +5 11 +6 12 + +statement ok +DELETE FROM ducklake.test + +query II +SELECT rowid, * FROM ducklake_table_deletions('ducklake', 'main', 'test', 11, 11) ORDER BY rowid +---- +0 101 +2 103 +3 NULL +4 110 +7 13 +8 14 + + +# if we update and then delete - the same row can be there multiple times +query II +SELECT rowid, * FROM ducklake_table_deletions('ducklake', 'main', 'test', 0, 11) ORDER BY ALL +---- +0 1 +0 101 +1 2 +2 3 +2 103 +3 NULL +4 10 +4 110 +5 11 +6 12 +7 13 +8 14 diff --git a/tests/sqllogictests/sql/table_changes/ducklake_table_insertions.test b/tests/sqllogictests/sql/table_changes/ducklake_table_insertions.test new file mode 100644 index 0000000..c23be25 --- /dev/null +++ b/tests/sqllogictests/sql/table_changes/ducklake_table_insertions.test @@ -0,0 +1,95 @@ +# name: test/sql/table_changes/ducklake_table_insertions.test +# description: test ducklake_table_insertions function +# group: [table_changes] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_table_insertions_files'); + +# snapshot 1 +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +# snapshot 2 +statement ok +INSERT INTO ducklake.test VALUES (1); + +# snapshot 3 +statement ok +INSERT INTO ducklake.test VALUES (2); + +# snapshot 4 +statement ok +INSERT INTO ducklake.test VALUES (3); + +# snapshot 5 +statement ok +INSERT INTO ducklake.test VALUES (NULL); + +# snapshot 6 +statement ok +INSERT INTO ducklake.test FROM range(10, 12); + +query II +SELECT rowid, * FROM ducklake_table_insertions('ducklake', 'main', 'test', 0, 2); +---- +0 1 + +query II +SELECT rowid, * FROM ducklake_table_insertions('ducklake', 'main', 'test', 0, 3); +---- +0 1 +1 2 + +query II +SELECT rowid, * FROM ducklake_table_insertions('ducklake', 'main', 'test', 0, 4); +---- +0 1 +1 2 +2 3 + +query II +SELECT rowid, * FROM ducklake_table_insertions('ducklake', 'main', 'test', 4, 5); +---- +2 3 +3 NULL + +query II +SELECT rowid, * FROM ducklake_table_insertions('ducklake', 'main', 'test', 5, 5); +---- +3 NULL + +# snapshot 7: update a subset of the rows +statement ok +UPDATE ducklake.test SET i=i+100 WHERE i < 11; + +query II +SELECT rowid, * FROM ducklake_table_insertions('ducklake', 'main', 'test', 7, 7) ORDER BY rowid +---- +0 101 +1 102 +2 103 +4 110 + +# the change feed has both the original rows and the updated rows +query II +SELECT rowid, * FROM ducklake_table_insertions('ducklake', 'main', 'test', 0, 7) ORDER BY ALL +---- +0 1 +0 101 +1 2 +1 102 +2 3 +2 103 +3 NULL +4 10 +4 110 +5 11 diff --git a/tests/sqllogictests/sql/time_travel/basic_time_travel.test b/tests/sqllogictests/sql/time_travel/basic_time_travel.test new file mode 100644 index 0000000..d49faac --- /dev/null +++ b/tests/sqllogictests/sql/time_travel/basic_time_travel.test @@ -0,0 +1,112 @@ +# name: test/sql/time_travel/basic_time_travel.test +# description: test time travel in ducklake +# group: [time_travel] + +require ducklake + +require parquet + +require icu + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_time_travel_files') + +statement ok +CREATE TABLE ducklake.test(s STRUCT(i INTEGER, j INTEGER)); + +query I +SELECT * FROM ducklake.test +---- + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 1, 'j': 2}), ({'i': NULL, 'j': 3}), (NULL); + +# non-existent version +statement error +SELECT * FROM ducklake.test AT (VERSION => 10) +---- +No snapshot found at version 10 + +# table does not exist +statement error +SELECT * FROM ducklake.test AT (VERSION => 0) +---- +does not exist at version 0 + +query I +SELECT * FROM ducklake.test AT (VERSION => 1) +---- + +query I +SELECT * FROM ducklake.test AT (VERSION => 2) +---- +{'i': 1, 'j': 2} +{'i': NULL, 'j': 3} +NULL + +# timestamp-based query +query I +SELECT * FROM ducklake.test AT (TIMESTAMP => NOW()) +---- +{'i': 1, 'j': 2} +{'i': NULL, 'j': 3} +NULL + +statement ok +SET VARIABLE snapshot_time = (SELECT snapshot_time FROM ducklake.snapshots() WHERE snapshot_id=1); + +query I +SELECT * FROM ducklake.test AT (TIMESTAMP => getvariable('snapshot_time')) +---- + +# read a dropped table +statement ok +DROP TABLE ducklake.test + +statement error +SELECT * FROM ducklake.test +---- +does not exist + +query I +SELECT * FROM ducklake.test AT (VERSION => 2) +---- +{'i': 1, 'j': 2} +{'i': NULL, 'j': 3} +NULL + +# get a changeset +query I rowsort +SELECT * FROM ducklake.test AT (version => 2) EXCEPT SELECT * FROM ducklake.test AT (version => 1) +---- +NULL +{'i': 1, 'j': 2} +{'i': NULL, 'j': 3} + +# time travel with schemas +statement ok +CREATE SCHEMA ducklake.s1 + +statement ok +CREATE TABLE ducklake.s1.tbl(i INT); + +statement ok +INSERT INTO ducklake.s1.tbl VALUES (42); + +statement ok +DROP SCHEMA ducklake.s1 CASCADE + +query I +SELECT * FROM ducklake.s1.tbl AT (version => 6) +---- +42 + +statement error +SELECT * FROM ducklake.s1.tbl AT (version => 7) +---- +does not exist diff --git a/tests/sqllogictests/sql/time_travel/time_travel_views.test b/tests/sqllogictests/sql/time_travel/time_travel_views.test new file mode 100644 index 0000000..2b75d48 --- /dev/null +++ b/tests/sqllogictests/sql/time_travel/time_travel_views.test @@ -0,0 +1,111 @@ +# name: test/sql/time_travel/time_travel_views.test +# description: test time travel of views in ducklake +# group: [time_travel] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_time_travel_views_files') + +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER); + +statement ok +CREATE VIEW ducklake.v1 AS SELECT i * 100 AS i, j * 100 AS j FROM test ORDER BY ALL + +statement ok +INSERT INTO ducklake.test VALUES (1, 2), (2, 3); + +statement ok +INSERT INTO ducklake.test VALUES (3, 4), (5, 6); + +query II +SELECT * FROM ducklake.v1 +---- +100 200 +200 300 +300 400 +500 600 + +# view does not exist +statement error +SELECT * FROM ducklake.v1 AT (VERSION => 0) +---- +does not exist at version 0 + +query II +SELECT * FROM ducklake.v1 AT (VERSION => 2) +---- + +query II +SELECT * FROM ducklake.v1 AT (VERSION => 3) +---- +100 200 +200 300 + +query II +SELECT * FROM ducklake.v1 AT (VERSION => 4) +---- +100 200 +200 300 +300 400 +500 600 + +# time travel with schemas +statement ok +BEGIN + +statement ok +CREATE SCHEMA ducklake.s1 + +statement ok +CREATE TABLE ducklake.s1.test(i INT) + +statement ok +CREATE VIEW ducklake.s1_view AS SELECT * FROM s1.test + +statement ok +COMMIT + +statement ok +INSERT INTO ducklake.s1.test VALUES (42), (84); + +statement ok +DROP SCHEMA ducklake.s1 CASCADE + +statement error +SELECT * FROM ducklake.s1_view +---- + +query I +SELECT * FROM ducklake.s1_view AT (VERSION => 5) +---- + +query I rowsort +SELECT * FROM ducklake.s1_view AT (VERSION => 6) +---- +42 +84 + +# explicit time travel clause mentioned in view takes priority over any versioning over the view itself +statement ok +CREATE VIEW ducklake.my_view AS SELECT * FROM ducklake.test AT (VERSION => 3) + +query II rowsort +FROM ducklake.my_view +---- +1 2 +2 3 + +query II rowsort +FROM ducklake.my_view AT (VERSION => 8) +---- +1 2 +2 3 diff --git a/tests/sqllogictests/sql/tpch/tpch_sf1.test_slow b/tests/sqllogictests/sql/tpch/tpch_sf1.test_slow new file mode 100644 index 0000000..e3e7daa --- /dev/null +++ b/tests/sqllogictests/sql/tpch/tpch_sf1.test_slow @@ -0,0 +1,46 @@ +# name: test/sql/tpch/tpch_sf1.test_slow +# description: Test running TPC-H on DuckLake +# group: [tpch] + +require ducklake + +require parquet + +require tpch + +mode skip + +statement ok +CALL dbgen(sf=1); + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_tpch_files') + +statement ok +COPY FROM DATABASE memory TO ducklake + +statement ok +USE ducklake + +loop i 1 9 + +query I +PRAGMA tpch(${i}) +---- +:duckdb/extension/tpch/dbgen/answers/sf1/q0${i}.csv + +endloop + +loop i 10 23 + +query I +PRAGMA tpch(${i}) +---- +:duckdb/extension/tpch/dbgen/answers/sf1/q${i}.csv + +endloop diff --git a/tests/sqllogictests/sql/transaction/basic_transaction.test b/tests/sqllogictests/sql/transaction/basic_transaction.test new file mode 100644 index 0000000..6580f3a --- /dev/null +++ b/tests/sqllogictests/sql/transaction/basic_transaction.test @@ -0,0 +1,85 @@ +# name: test/sql/transaction/basic_transaction.test +# description: Test basic transaction support of ducklakes +# group: [transaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_tl_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +USE ducklake + +# table creation +statement ok +BEGIN + +statement ok +CREATE TABLE test(i INTEGER, j INTEGER); + +# we can query the table +query II +SELECT * FROM test +---- + +query I +SHOW TABLES +---- +test + + +statement ok +ROLLBACK + +# after rollback the table no longer exists +statement error +SELECT * FROM test +---- +does not exist + +query I +SHOW TABLES +---- + +# create the table again +statement ok +CREATE TABLE test(i INTEGER, j INTEGER); + +# we can query transaction local data +statement ok +BEGIN + +statement ok +INSERT INTO test VALUES (42, 84) + +query II +SELECT * FROM test +---- +42 84 + +# the data exists as files in the data directory +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_tl_files/**/*.parquet') +---- +1 + +statement ok +ROLLBACK + +# rolling back deletes the rows from the table +query II +SELECT * FROM test +---- + +# it also deletes the written files from the data directory +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_tl_files/**/*.parquet') +---- +0 diff --git a/tests/sqllogictests/sql/transaction/concurrent_table_creation.test b/tests/sqllogictests/sql/transaction/concurrent_table_creation.test new file mode 100644 index 0000000..97a3fb7 --- /dev/null +++ b/tests/sqllogictests/sql/transaction/concurrent_table_creation.test @@ -0,0 +1,47 @@ +# name: test/sql/transaction/concurrent_table_creation.test +# description: Test concurrent table creation with files +# group: [transaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_concurrent_conflicts_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +SET immediate_transaction_mode=true + +# two transactions try to create a table with different names and data: no conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE TABLE ducklake.test AS SELECT 42 i + +statement ok con2 +CREATE TABLE ducklake.test2 AS SELECT 'hello world' s + +statement ok con1 +COMMIT + +statement ok con2 +COMMIT + +query I +FROM ducklake.test +---- +42 + +query I +FROM ducklake.test2 +---- +hello world diff --git a/tests/sqllogictests/sql/transaction/create_conflict.test b/tests/sqllogictests/sql/transaction/create_conflict.test new file mode 100644 index 0000000..875a034 --- /dev/null +++ b/tests/sqllogictests/sql/transaction/create_conflict.test @@ -0,0 +1,107 @@ +# name: test/sql/transaction/create_conflict.test +# description: test conflict handling on CREATE +# group: [transaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_conflict_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +CREATE TABLE ducklake.test(i INTEGER, j INTEGER); + +# default behavior: table already exists (error) +statement error +CREATE TABLE ducklake.test(i VARCHAR); +---- +already exists + +# ignore if exists +statement ok +CREATE TABLE IF NOT EXISTS ducklake.test(i VARCHAR); + +query II +FROM ducklake.test +---- + +# replace table +statement ok +CREATE OR REPLACE TABLE ducklake.test(i VARCHAR); + +query I +FROM ducklake.test +---- + +# views also conflict with tables +statement error +CREATE VIEW ducklake.test AS SELECT 42 +---- +already exists + +statement ok +CREATE VIEW ducklake.v1 AS SELECT 42 + +# if not exists is ignored +statement ok +CREATE VIEW IF NOT EXISTS ducklake.v1 AS SELECT 84 + +query I +FROM ducklake.v1 +---- +42 + +# or replace replaces the view +statement ok +CREATE OR REPLACE VIEW ducklake.v1 AS SELECT 84 + +query I +FROM ducklake.v1 +---- +84 + +# tables also conflict with views +statement error +CREATE TABLE ducklake.v1(i INT) +---- +already exists + +statement ok +DROP VIEW ducklake.v1 + +# transaction-local view conflicts +# conflict creating a view with the same name +statement ok +BEGIN + +statement ok +CREATE VIEW ducklake.v1 AS SELECT 42 + +statement error +CREATE VIEW ducklake.v1 AS SELECT 84 +---- +already exists + +statement ok +ROLLBACK + +# conflict creating a view with the same name as a table +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.t1 AS SELECT 42 + +statement error +CREATE VIEW ducklake.t1 AS SELECT 84 +---- +already exists + +statement ok +ROLLBACK diff --git a/tests/sqllogictests/sql/transaction/transaction_conflict_cleanup.test b/tests/sqllogictests/sql/transaction/transaction_conflict_cleanup.test new file mode 100644 index 0000000..64004c0 --- /dev/null +++ b/tests/sqllogictests/sql/transaction/transaction_conflict_cleanup.test @@ -0,0 +1,54 @@ +# name: test/sql/transaction/transaction_conflict_cleanup.test +# description: Verify files are cleaned up after a transaction conflict +# group: [transaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_files_conflict_cleanup') + +statement ok +SET immediate_transaction_mode=true + +# con2 creates a conflicting table (with data) +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE TABLE ducklake.test(i INTEGER); + +statement ok con2 +CREATE TABLE ducklake.test(s VARCHAR); + +statement ok con2 +INSERT INTO ducklake.test VALUES ('hello'), ('world'); + +statement ok con1 +COMMIT + +# con2 has the transaction-local files written in the directory +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_files_conflict_cleanup/**/*.parquet') +---- +1 + +statement error con2 +COMMIT +---- +conflict + +# they are cleaned up after the conflict +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_files_conflict_cleanup/**/*.parquet') +---- +0 diff --git a/tests/sqllogictests/sql/transaction/transaction_conflict_inlining.test b/tests/sqllogictests/sql/transaction/transaction_conflict_inlining.test new file mode 100644 index 0000000..0f06df9 --- /dev/null +++ b/tests/sqllogictests/sql/transaction/transaction_conflict_inlining.test @@ -0,0 +1,103 @@ +# name: test/sql/transaction/transaction_conflict_inlining.test +# description: Test ducklake transaction conflicts with data inlining +# group: [transaction] + +require ducklake + +require parquet + +require httpfs + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_transaction_conflict_files', ENCRYPTED, DATA_INLINING_ROW_LIMIT 10000) + +statement ok +SET immediate_transaction_mode=true + +statement ok +CREATE TABLE ducklake.test AS SELECT i id FROM range(1000) t(i); + +# two transactions try to flush the inlined data +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CALL ducklake_flush_inlined_data('ducklake') + +statement error con2 +CALL ducklake_flush_inlined_data('ducklake') +---- +Conflict + +statement ok con1 +COMMIT + +statement ok con2 +COMMIT + +statement ok +DROP TABLE ducklake.test + +# one transaction updates inlined data; the other flushes it +statement ok +CREATE TABLE ducklake.test2(id INTEGER); + +statement ok +INSERT INTO ducklake.test2 FROM range(1000); + +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +UPDATE ducklake.test2 SET id=id+1000 + +statement ok con2 +CALL ducklake_flush_inlined_data('ducklake') + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- +conflict + +statement ok +DROP TABLE ducklake.test2 + +# one transaction flushes inlined data; the other updates it +statement ok +CREATE TABLE ducklake.test3(id INTEGER); + +statement ok +INSERT INTO ducklake.test3 FROM range(1000); + +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CALL ducklake_flush_inlined_data('ducklake') + +statement ok con2 +UPDATE ducklake.test3 SET id=id+1000 + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- +conflict diff --git a/tests/sqllogictests/sql/transaction/transaction_conflicts.test b/tests/sqllogictests/sql/transaction/transaction_conflicts.test new file mode 100644 index 0000000..741cf7f --- /dev/null +++ b/tests/sqllogictests/sql/transaction/transaction_conflicts.test @@ -0,0 +1,302 @@ +# name: test/sql/transaction/transaction_conflicts.test +# description: Test transaction conflicts in DuckLake +# group: [transaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_conflicts_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +SET immediate_transaction_mode=true + +# two transactions try to create a table with different names: no conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE TABLE ducklake.test2(i INTEGER); + +statement ok con2 +CREATE TABLE ducklake.test3(s VARCHAR); + +statement ok con1 +COMMIT + +statement ok con2 +COMMIT + +# two transactions try to create a table with the same name: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE TABLE ducklake.test(i INTEGER); + +statement ok con2 +CREATE TABLE ducklake.test(s VARCHAR); + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- +Transaction conflict + +# two transactions try to insert into the same table: no conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +INSERT INTO ducklake.test VALUES (1); + +statement ok con2 +INSERT INTO ducklake.test VALUES (100); + +statement ok con1 +COMMIT + +statement ok con2 +COMMIT + +# two transactions try to drop the same table: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +DROP TABLE ducklake.test + +statement ok con2 +DROP TABLE ducklake.test + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + + +# two transactions try to create a schema with the same name: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE SCHEMA ducklake.s1 + +statement ok con2 +CREATE SCHEMA ducklake.s1 + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- +Transaction conflict + +statement ok +CREATE SCHEMA ducklake.s2 + +# two transactions try to create a table with the same name in a different schema: no conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE TABLE ducklake.s1.same_name_tbl(i INTEGER); + +statement ok con2 +CREATE TABLE ducklake.s2.same_name_tbl(i INTEGER); + +statement ok con1 +COMMIT + +statement ok con2 +COMMIT + +statement ok +DROP TABLE ducklake.s1.same_name_tbl + +statement ok +DROP TABLE ducklake.s2.same_name_tbl + +# two transactions try to drop the same schema: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +DROP SCHEMA ducklake.s1 + +statement ok con2 +DROP SCHEMA ducklake.s1 + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + + +# one transaction tries to create a table in a dropped schema: conflict +statement ok +CREATE SCHEMA ducklake.s1 + +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +DROP SCHEMA ducklake.s1 + +statement ok con2 +CREATE TABLE ducklake.s1.tbl(i INT); + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + + +# one transaction tries to insert data in a dropped table: conflict +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +DROP TABLE ducklake.test + +statement ok con2 +INSERT INTO ducklake.test VALUES (42); + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + +# one transaction tries to drop a schema that another transaction created a table in: conflict +statement ok +CREATE SCHEMA ducklake.s1 + +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE TABLE ducklake.s1.test(i INTEGER); + +statement ok con2 +DROP SCHEMA ducklake.s1 + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + +statement ok +CREATE TABLE ducklake.test(part_key INT, val INT); + +# two transaction try to set a partition key on the same table +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +ALTER TABLE ducklake.test SET PARTITIONED BY (part_key); + +statement ok con2 +ALTER TABLE ducklake.test SET PARTITIONED BY (val); + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + +# try to insert into a table that has had its partition key changed +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +ALTER TABLE ducklake.test SET PARTITIONED BY (val); + +statement ok con2 +INSERT INTO ducklake.test VALUES (1, 1) + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + +# try to set partition key on a dropped table +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +DROP TABLE ducklake.test + +statement ok con2 +ALTER TABLE ducklake.test SET PARTITIONED BY (part_key); + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + diff --git a/tests/sqllogictests/sql/transaction/transaction_conflicts_delete.test b/tests/sqllogictests/sql/transaction/transaction_conflicts_delete.test new file mode 100644 index 0000000..1df7097 --- /dev/null +++ b/tests/sqllogictests/sql/transaction/transaction_conflicts_delete.test @@ -0,0 +1,123 @@ +# name: test/sql/transaction/transaction_conflicts_delete.test +# description: Test transaction conflicts in DuckLake +# group: [transaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_conflicts_deletes_files') + +statement ok +SET immediate_transaction_mode=true + +statement ok +CREATE TABLE ducklake.test AS FROM range(1000) t(i) + +# two transactions try to delete the same data: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +query I con1 +DELETE FROM ducklake.test WHERE i<200 +---- +200 + +query I con2 +DELETE FROM ducklake.test WHERE i<100 +---- +100 + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- +conflict + +# two transactions try to delete all data of the same table: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +query I con1 +DELETE FROM ducklake.test +---- +800 + +query I con2 +DELETE FROM ducklake.test +---- +800 + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + + +statement ok +INSERT INTO ducklake.test FROM range(1000) + +# transaction tries to delete from a table that was dropped +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +query I con1 +DROP TABLE ducklake.test + +query I con2 +DELETE FROM ducklake.test +---- +1000 + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- +another transaction has dropped it + +statement ok +CREATE TABLE ducklake.test AS FROM range(1000) t(i) + +# transaction tries to delete from a table that was altered +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +query I con1 +ALTER TABLE ducklake.test ADD COLUMN j INTEGER + +query I con2 +DELETE FROM ducklake.test +---- +1000 + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- +another transaction has altered it diff --git a/tests/sqllogictests/sql/transaction/transaction_conflicts_view.test b/tests/sqllogictests/sql/transaction/transaction_conflicts_view.test new file mode 100644 index 0000000..06180ae --- /dev/null +++ b/tests/sqllogictests/sql/transaction/transaction_conflicts_view.test @@ -0,0 +1,128 @@ +# name: test/sql/transaction/transaction_conflicts_view.test +# description: Test transaction conflicts in DuckLake +# group: [transaction] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_conflicts_view_files') + +statement ok +SET immediate_transaction_mode=true + +# two transactions try to create a view with different names: no conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE VIEW ducklake.test2 AS SELECT 42 + +statement ok con2 +CREATE VIEW ducklake.test3 AS SELECT 84 + +statement ok con1 +COMMIT + +statement ok con2 +COMMIT + +# two transactions try to create a view with the same name: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE VIEW ducklake.test AS SELECT 42 + +statement ok con2 +CREATE VIEW ducklake.test AS SELECT 84 + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- +Transaction conflict + +# two transactions try to create a view and table with the same name: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE TABLE ducklake.t_name AS SELECT 42 + +statement ok con2 +CREATE VIEW ducklake.t_name AS SELECT 84 + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- +Transaction conflict + +# two transactions try to drop the same view: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +DROP VIEW ducklake.test + +statement ok con2 +DROP VIEW ducklake.test + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + +statement ok +CREATE VIEW ducklake.comment_view AS SELECT 42; + +# two transactions try to alter the same view: conflict +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +COMMENT ON VIEW ducklake.comment_view IS 'con1 comment' + +statement ok con2 +COMMENT ON VIEW ducklake.comment_view IS 'con2 comment' + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- +another transaction has altered it + +query I +SELECT comment FROM duckdb_views WHERE view_name='comment_view' +---- +con1 comment diff --git a/tests/sqllogictests/sql/transaction/transaction_inlining.test b/tests/sqllogictests/sql/transaction/transaction_inlining.test new file mode 100644 index 0000000..fd8d3e4 --- /dev/null +++ b/tests/sqllogictests/sql/transaction/transaction_inlining.test @@ -0,0 +1,51 @@ +# name: test/sql/transaction/transaction_inlining.test +# description: Test inlining support within a transaction +# group: [transaction] + +require ducklake + +require parquet + +require icu + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/transaction_inlining', DATA_INLINING_ROW_LIMIT 1000 ) + +statement ok +USE ducklake + +statement ok +BEGIN TRANSACTION; + +statement ok +CREATE TABLE repro_table (yr int ); + +statement ok +ALTER TABLE repro_table SET PARTITIONED BY (yr); + +statement ok +INSERT INTO repro_table values (2025), (2026), (2027) + +statement ok +COMMIT; + +query I +FROM repro_table; +---- +2025 +2026 +2027 + +statement ok +CALL ducklake_flush_inlined_data('ducklake') + +query I +FROM repro_table; +---- +2025 +2026 +2027 \ No newline at end of file diff --git a/tests/sqllogictests/sql/transaction/transaction_schema.test b/tests/sqllogictests/sql/transaction/transaction_schema.test new file mode 100644 index 0000000..b8e941e --- /dev/null +++ b/tests/sqllogictests/sql/transaction/transaction_schema.test @@ -0,0 +1,34 @@ +# name: test/sql/transaction/transaction_schema.test +# description: Test multi-schema support in DuckLake +# group: [transaction] + +require ducklake + +require parquet + +# store multiple DuckLake catalogs in the same metadata store (with different schemas) +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake_1 (DATA_PATH '${DATA_PATH}/ducklake_files_s1', METADATA_SCHEMA 'metadata_s1') + +statement ok +CREATE TABLE ducklake_1.tbl(i INTEGER); + +statement ok +INSERT INTO ducklake_1.tbl VALUES (42); + +statement ok +DETACH ducklake_1 + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake_2 (DATA_PATH '${DATA_PATH}/ducklake_files_s2', METADATA_SCHEMA 'metadata_s2') + +statement ok +CREATE TABLE ducklake_2.tbl(s VARCHAR); + +statement ok +INSERT INTO ducklake_2.tbl VALUES ('hello world'); diff --git a/tests/sqllogictests/sql/types/all_types.test b/tests/sqllogictests/sql/types/all_types.test new file mode 100644 index 0000000..06d2707 --- /dev/null +++ b/tests/sqllogictests/sql/types/all_types.test @@ -0,0 +1,50 @@ +# name: test/sql/types/all_types.test +# description: test ducklake with different data types +# group: [types] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_all_types_files', METADATA_CATALOG 'ducklake_meta') + +statement ok +CREATE VIEW all_types AS SELECT * EXCLUDE (BIGNUM, BIT, small_enum, +medium_enum, +large_enum, +"union", +fixed_int_array, +fixed_varchar_array, +fixed_nested_int_array, +fixed_nested_varchar_array, +fixed_struct_array, +struct_of_fixed_array, +fixed_array_of_int_list, +list_of_fixed_int_array, hugeint, uhugeint, interval, time_tz) FROM test_all_types(); + +query I nosort alltypes +FROM all_types +---- + +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.data_types AS FROM all_types + +query I nosort alltypes +FROM ducklake.data_types +---- + +statement ok +COMMIT + +query I nosort alltypes +FROM ducklake.data_types +---- diff --git a/tests/sqllogictests/sql/types/floats.test b/tests/sqllogictests/sql/types/floats.test new file mode 100644 index 0000000..df0faaf --- /dev/null +++ b/tests/sqllogictests/sql/types/floats.test @@ -0,0 +1,97 @@ +# name: test/sql/types/floats.test +# description: test ducklake floats +# group: [types] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_float_files') + +foreach type FLOAT DOUBLE + +statement ok +CREATE OR REPLACE TABLE ducklake.test(f ${type}); + +statement ok +INSERT INTO ducklake.test VALUES (1), (10); + +statement ok +INSERT INTO ducklake.test VALUES ('NaN'), (1); + +# predicate on NaN +query I +SELECT COUNT(*) FROM ducklake.test WHERE f='NaN' +---- +1 + +query I +SELECT COUNT(*) FROM ducklake.test WHERE f>'NaN' +---- +0 + +query I +SELECT COUNT(*) FROM ducklake.test WHERE f>='NaN' +---- +1 + +query I +SELECT COUNT(*) FROM ducklake.test WHERE f<'NaN' +---- +3 + +query I +SELECT COUNT(*) FROM ducklake.test WHERE f<='NaN' +---- +4 + +# upper-bound requires NaN checks +query I +SELECT COUNT(*) FROM ducklake.test WHERE f>1 +---- +2 + +# test infinite +statement ok +INSERT INTO ducklake.test VALUES ('inf'); + +statement ok +INSERT INTO ducklake.test VALUES ('-inf'); + +query I +SELECT COUNT(*) FROM ducklake.test WHERE f>'inf' +---- +1 + +query I +SELECT COUNT(*) FROM ducklake.test WHERE f>='inf' +---- +2 + +query I +SELECT COUNT(*) FROM ducklake.test WHERE f<'inf' +---- +4 + +query I +SELECT COUNT(*) FROM ducklake.test WHERE f<='inf' +---- +5 + +query I +SELECT COUNT(*) FROM ducklake.test WHERE f>'-inf' +---- +5 + +query I +SELECT COUNT(*) FROM ducklake.test WHERE f>='-inf' +---- +6 + +endloop diff --git a/tests/sqllogictests/sql/types/json.test b/tests/sqllogictests/sql/types/json.test new file mode 100644 index 0000000..5191c95 --- /dev/null +++ b/tests/sqllogictests/sql/types/json.test @@ -0,0 +1,48 @@ +# name: test/sql/types/json.test +# description: test storing json in ducklake +# group: [types] + +require ducklake + +require parquet + +require json + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_json.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_json_files') + +statement ok +CREATE TABLE ducklake.test(l JSON); + +query I +SELECT * FROM ducklake.test +---- + +statement ok +INSERT INTO ducklake.test VALUES ('{"key": "value"}'); + +query I +SELECT * FROM ducklake.test +---- +{"key": "value"} + +query I +SELECT typeof(l) FROM ducklake.test +---- +JSON + +statement ok +DETACH ducklake + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_json.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_json_files') + +query I +SELECT * FROM ducklake.test +---- +{"key": "value"} + +query I +SELECT typeof(l) FROM ducklake.test +---- +JSON diff --git a/tests/sqllogictests/sql/types/list.test b/tests/sqllogictests/sql/types/list.test new file mode 100644 index 0000000..2d92ac1 --- /dev/null +++ b/tests/sqllogictests/sql/types/list.test @@ -0,0 +1,61 @@ +# name: test/sql/types/list.test +# description: test storing list types in ducklake +# group: [types] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_list_files') + +statement ok +CREATE TABLE ducklake.test(l INTEGER[]); + +query I +SELECT * FROM ducklake.test +---- + +statement ok +INSERT INTO ducklake.test VALUES ([1]), ([NULL]), (NULL), ([3]); + +query I +SELECT * FROM ducklake.test +---- +[1] +[NULL] +NULL +[3] + +query I +SELECT * FROM ducklake.test WHERE l[1]=1 +---- +[1] + +query I +SELECT * FROM ducklake.test WHERE l[1]=100 +---- + +statement ok +INSERT INTO ducklake.test VALUES ([4, 5]), ([6, 7]); + +query I +SELECT * FROM ducklake.test +---- +[1] +[NULL] +NULL +[3] +[4, 5] +[6, 7] + +# stats +query I +SELECT stats(l[1]) FROM ducklake.test LIMIT 1 +---- +:.*Min.*1.*Max.*7.*Has Null.*true.* diff --git a/tests/sqllogictests/sql/types/map.test b/tests/sqllogictests/sql/types/map.test new file mode 100644 index 0000000..03b6cb2 --- /dev/null +++ b/tests/sqllogictests/sql/types/map.test @@ -0,0 +1,61 @@ +# name: test/sql/types/map.test +# description: test storing map types in ducklake +# group: [types] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_map_files') + +statement ok +CREATE TABLE ducklake.test(s MAP(VARCHAR, INTEGER)); + +query I +SELECT * FROM ducklake.test +---- + +statement ok +INSERT INTO ducklake.test VALUES (MAP {'i': 1, 'j': 2}), (MAP {'j': 3}), (NULL); + +query I +SELECT * FROM ducklake.test +---- +{i=1, j=2} +{j=3} +NULL + +query I +SELECT * FROM ducklake.test WHERE s.i=1 +---- +{i=1, j=2} + +query I +SELECT * FROM ducklake.test WHERE s.i=100 +---- + +statement ok +INSERT INTO ducklake.test VALUES (MAP {'i': 4, 'j': 5}), (MAP {'i': 6}); + +query I +SELECT * FROM ducklake.test +---- +{i=1, j=2} +{j=3} +NULL +{i=4, j=5} +{i=6} + +# map stats not supported yet +mode skip + +query I +SELECT stats(s.i) FROM ducklake.test LIMIT 1 +---- +:.*Min.*1.*Max.*6.*Has Null.*true.* diff --git a/tests/sqllogictests/sql/types/null_byte.test b/tests/sqllogictests/sql/types/null_byte.test new file mode 100644 index 0000000..a17d405 --- /dev/null +++ b/tests/sqllogictests/sql/types/null_byte.test @@ -0,0 +1,32 @@ +# name: test/sql/types/null_byte.test +# description: Test null bytes in strings +# group: [types] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_null_byte', METADATA_CATALOG 'ducklake_meta') + +statement ok +CREATE TABLE ducklake.tbl(s VARCHAR); + +statement ok +INSERT INTO ducklake.tbl VALUES ('goo' || chr(0) || 'se'), ('hello'); + +query I +FROM ducklake.tbl +---- +goo\0se +hello + +query I +FROM ducklake.tbl WHERE s < 'hello' +---- +goo\0se diff --git a/tests/sqllogictests/sql/types/struct.test b/tests/sqllogictests/sql/types/struct.test new file mode 100644 index 0000000..2f375f4 --- /dev/null +++ b/tests/sqllogictests/sql/types/struct.test @@ -0,0 +1,59 @@ +# name: test/sql/types/struct.test +# description: test storing struct types in ducklake +# group: [types] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_struct_files') + +statement ok +CREATE TABLE ducklake.test(s STRUCT(i INTEGER, j INTEGER)); + +query I +SELECT * FROM ducklake.test +---- + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 1, 'j': 2}), ({'i': NULL, 'j': 3}), (NULL); + +query I +SELECT * FROM ducklake.test +---- +{'i': 1, 'j': 2} +{'i': NULL, 'j': 3} +NULL + +query I +SELECT * FROM ducklake.test WHERE s.i=1 +---- +{'i': 1, 'j': 2} + +query I +SELECT * FROM ducklake.test WHERE s.i=100 +---- + +statement ok +INSERT INTO ducklake.test VALUES ({'i': 4, 'j': 5}), ({'i': 6, 'j': 7}); + +query I +SELECT * FROM ducklake.test +---- +{'i': 1, 'j': 2} +{'i': NULL, 'j': 3} +NULL +{'i': 4, 'j': 5} +{'i': 6, 'j': 7} + +# stats +query I +SELECT stats(s.i) FROM ducklake.test LIMIT 1 +---- +:.*Min.*1.*Max.*6.*Has Null.*true.* diff --git a/tests/sqllogictests/sql/types/timestamp.test b/tests/sqllogictests/sql/types/timestamp.test new file mode 100644 index 0000000..58b0182 --- /dev/null +++ b/tests/sqllogictests/sql/types/timestamp.test @@ -0,0 +1,40 @@ +# name: test/sql/types/timestamp.test +# description: test ducklake timestamps +# group: [types] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_ts_files') + +statement ok +CREATE OR REPLACE TABLE ducklake.test(ts TIMESTAMP); + +statement ok +INSERT INTO ducklake.test VALUES (TIMESTAMP '1992-01-01'), (TIMESTAMP '2020-01-01'); + +statement ok +INSERT INTO ducklake.test VALUES ('infinity'), (TIMESTAMP '2022-01-01'); + +# predicate on infinity +query I +SELECT COUNT(*) FROM ducklake.test WHERE ts='infinity' +---- +1 + +query I +SELECT COUNT(*) FROM ducklake.test WHERE ts<='infinity' +---- +4 + +query I +SELECT COUNT(*) FROM ducklake.test WHERE ts>'-infinity' +---- +4 diff --git a/tests/sqllogictests/sql/types/unsupported.test b/tests/sqllogictests/sql/types/unsupported.test new file mode 100644 index 0000000..3cd059f --- /dev/null +++ b/tests/sqllogictests/sql/types/unsupported.test @@ -0,0 +1,49 @@ +# name: test/sql/types/unsupported.test +# description: test unsupported types +# group: [types] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_union_files') + +# unsupported types +statement error +CREATE TABLE ducklake.test(ts INT[3]); +---- +unsupported type + +statement error +CREATE TABLE ducklake.test(ts UNION(i INT, j INT)); +---- +unsupported type + +# enum +statement error +CREATE TABLE ducklake.test AS SELECT 'hello'::ENUM('world', 'hello') AS h; +---- +unsupported type + +# varchar with collation +statement error +CREATE TABLE ducklake.test(s VARCHAR COLLATE NOACCENT); +---- +Collations are not supported + +statement error +CREATE TABLE ducklake.test(s VARCHAR USING COMPRESSION ZSTD); +---- +compression type for a column is not supported in DuckLake + +# unsupported type in a struct +statement error +CREATE TABLE ducklake.test(ts STRUCT(x INT[3])); +---- +unsupported type diff --git a/tests/sqllogictests/sql/update/basic_update.test b/tests/sqllogictests/sql/update/basic_update.test new file mode 100644 index 0000000..03608d1 --- /dev/null +++ b/tests/sqllogictests/sql/update/basic_update.test @@ -0,0 +1,49 @@ +# name: test/sql/update/basic_update.test +# description: Test ducklake updates +# group: [update] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_update_files'); + +statement ok +CREATE TABLE ducklake.test AS SELECT 1000 + i id, i % 10 as val FROM range(1000) t(i); + +statement ok +BEGIN + +query III +SELECT COUNT(*), SUM(id), SUM(val) FROM ducklake.test +---- +1000 1499500 4500 + +query I +UPDATE ducklake.test SET id=id+2 WHERE id%2=0 +---- +500 + +query III +SELECT COUNT(*), SUM(id), SUM(val) FROM ducklake.test +---- +1000 1500500 4500 + +statement ok +COMMIT + +query III +SELECT COUNT(*), SUM(id), SUM(val) FROM ducklake.test +---- +1000 1500500 4500 + +query III +SELECT COUNT(*), SUM(id), SUM(val) FROM ducklake.test AT (VERSION => 1) +---- +1000 1499500 4500 diff --git a/tests/sqllogictests/sql/update/test_update_expression.test b/tests/sqllogictests/sql/update/test_update_expression.test new file mode 100644 index 0000000..216f608 --- /dev/null +++ b/tests/sqllogictests/sql/update/test_update_expression.test @@ -0,0 +1,28 @@ +# name: test/sql/update/test_update_expression.test +# description: Test ducklake updates +# group: [update] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/test_update_expression'); + +statement ok +use ducklake + +statement ok +CREATE TABLE monthly_sales(empid INT, amount INT, month TEXT, status TEXT); + +statement ok +INSERT INTO monthly_sales VALUES + (1, 10000, '1-JAN', 'regular') + +statement ok +UPDATE monthly_sales SET status=CASE WHEN amount >= 10000 THEN 'important' ELSE 'regular' END diff --git a/tests/sqllogictests/sql/update/update_join_duplicates.test b/tests/sqllogictests/sql/update/update_join_duplicates.test new file mode 100644 index 0000000..e1dd292 --- /dev/null +++ b/tests/sqllogictests/sql/update/update_join_duplicates.test @@ -0,0 +1,61 @@ +# name: test/sql/update/update_join_duplicates.test +# description: Test ducklake update using a join with duplicates +# group: [update] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_update_join_files') + +statement ok +CREATE TABLE ducklake.test AS SELECT i id FROM range(5) t(i); + +statement ok +CREATE TEMPORARY TABLE updated_rows AS FROM range(0, 10, 2) t(update_id) UNION ALL FROM range(0, 10, 2); + +# duplicate row-id updates are not yet supported +statement ok +BEGIN + +statement ok +INSERT INTO ducklake.test FROM range(5, 10) + +statement error +UPDATE ducklake.test SET id=id+1000 FROM updated_rows WHERE id=updated_rows.update_id +---- +The same row was updated multiple times + +statement ok +ROLLBACK + +# we can update through a join if we filter out the duplciate row ids +statement ok +BEGIN + +statement ok +INSERT INTO ducklake.test FROM range(5, 10) + +statement ok +UPDATE ducklake.test SET id=id+1000 FROM (SELECT DISTINCT update_id FROM updated_rows) updated_rows WHERE id=updated_rows.update_id + +query III +SELECT COUNT(*), SUM(id), AVG(id) FROM ducklake.test +---- +10 5045 504.5 + +statement ok +COMMIT + +query III +SELECT COUNT(*), SUM(id), AVG(id) FROM ducklake.test +---- +10 5045 504.5 + + diff --git a/tests/sqllogictests/sql/update/update_not_null.test b/tests/sqllogictests/sql/update/update_not_null.test new file mode 100644 index 0000000..f1fee1e --- /dev/null +++ b/tests/sqllogictests/sql/update/update_not_null.test @@ -0,0 +1,42 @@ +# name: test/sql/update/update_not_null.test +# description: test updating a table with a NOT NULL constraint +# group: [update] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_update_not_null_files') + +statement ok +CREATE TABLE ducklake.test(i INTEGER NOT NULL, j INTEGER); + +statement ok +INSERT INTO ducklake.test VALUES (42, NULL); + +statement ok +BEGIN + +statement error +UPDATE ducklake.test SET i=NULL +---- +NOT NULL constraint failed + +statement error +UPDATE ducklake.test SET i=100 +---- +Current transaction is aborted + +statement ok +ROLLBACK + +query II +FROM ducklake.test +---- +42 NULL diff --git a/tests/sqllogictests/sql/update/update_partitioning.test b/tests/sqllogictests/sql/update/update_partitioning.test new file mode 100644 index 0000000..dc97d38 --- /dev/null +++ b/tests/sqllogictests/sql/update/update_partitioning.test @@ -0,0 +1,67 @@ +# name: test/sql/update/update_partitioning.test +# description: Update a partitioned table +# group: [update] + +require ducklake + +require parquet + +# partitioning based on a column +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_update_partitioning', METADATA_CATALOG 'ducklake_metadata') + +statement ok +USE ducklake + +statement ok +CREATE TABLE partitioned_tbl(part_key INTEGER, values VARCHAR); + +statement ok +ALTER TABLE partitioned_tbl SET PARTITIONED BY (part_key); + +statement ok +INSERT INTO partitioned_tbl SELECT i%2, concat('thisisastring_', i) FROM range(10000) t(i) + +statement ok +UPDATE partitioned_tbl SET part_key=2 WHERE part_key=0 + +# verify files are partitioned +query III +SELECT data_file_id, partition_id, regexp_extract(path, '.*(part_key=[0-9])[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file +ORDER BY ALL +---- +0 2 part_key=0 +1 2 part_key=1 +2 2 part_key=2 + +query I +SELECT COUNT(*) FROM partitioned_tbl +---- +10000 + +# query the new partition +query I +SELECT COUNT(*) FROM partitioned_tbl WHERE part_key=2 +---- +5000 + +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM partitioned_tbl WHERE part_key=2 +---- +analyzed_plan :.*Total Files Read: 1.* + +# query the old partition with time travel +query I +SELECT COUNT(*) FROM partitioned_tbl AT (VERSION => 3) WHERE part_key=0 +---- +5000 + +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM partitioned_tbl AT (VERSION => 3) WHERE part_key=0 +---- +analyzed_plan :.*Total Files Read: 1.* diff --git a/tests/sqllogictests/sql/update/update_rollback.test b/tests/sqllogictests/sql/update/update_rollback.test new file mode 100644 index 0000000..f21b6c3 --- /dev/null +++ b/tests/sqllogictests/sql/update/update_rollback.test @@ -0,0 +1,43 @@ +# name: test/sql/update/update_rollback.test +# description: Test ducklake update rollback +# group: [update] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_update_rollback_files'); + +statement ok +CREATE TABLE ducklake.test AS SELECT 1000 + i id, i % 10 as val FROM range(1000) t(i); + +statement ok +BEGIN + +statement ok +UPDATE ducklake.test SET id=id+1 + +query III +SELECT COUNT(*), SUM(id), SUM(val) FROM ducklake.test +---- +1000 1500500 4500 + +statement ok +ROLLBACK + +query III +SELECT COUNT(*), SUM(id), SUM(val) FROM ducklake.test +---- +1000 1499500 4500 + +# verify any additional files were deleted +query I +SELECT COUNT(*) FROM glob('${DATA_PATH}/ducklake_update_rollback_files/main/test/*.parquet') +---- +1 diff --git a/tests/sqllogictests/sql/update/update_same_transaction.test b/tests/sqllogictests/sql/update/update_same_transaction.test new file mode 100644 index 0000000..ddfd30a --- /dev/null +++ b/tests/sqllogictests/sql/update/update_same_transaction.test @@ -0,0 +1,35 @@ +# name: test/sql/update/update_same_transaction.test +# description: Test running updates the same transaction +# group: [update] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/update_same_transaction', METADATA_CATALOG 'ducklake_metadata') + +statement ok +BEGIN TRANSACTION; + +statement ok +CREATE TABLE ducklake.test (id INTEGER, name TEXT); + +statement ok +INSERT INTO ducklake.test VALUES (1, 'Bob'); + +statement ok +UPDATE ducklake.test SET name = 'Alice' Where id = 1; + +statement ok +COMMIT; + +query T +select name from ducklake.test where id = 1; +---- +Alice diff --git a/tests/sqllogictests/sql/view/ducklake_rename_view.test b/tests/sqllogictests/sql/view/ducklake_rename_view.test new file mode 100644 index 0000000..597597c --- /dev/null +++ b/tests/sqllogictests/sql/view/ducklake_rename_view.test @@ -0,0 +1,93 @@ +# name: test/sql/view/ducklake_rename_view.test +# description: Test renaming views in DuckLake +# group: [view] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_rename_view_files') + +statement ok +CREATE VIEW ducklake.v1 AS SELECT 42 + +statement ok +ALTER VIEW ducklake.v1 RENAME TO v2 + +query I +SELECT * FROM ducklake.v2 +---- +42 + +# rename in a transaction +statement ok +BEGIN + +statement ok +ALTER VIEW ducklake.v2 RENAME TO v3 + +statement error +SELECT * FROM ducklake.v2 +---- +does not exist + +query I +SELECT * FROM ducklake.v3 +---- +42 + +statement ok +ROLLBACK + +query I +SELECT * FROM ducklake.v2 +---- +42 + +# rename a transaction-local view +statement ok +BEGIN + +statement ok +CREATE VIEW ducklake.local_view AS SELECT 100 + +statement ok +ALTER VIEW ducklake.local_view RENAME TO local_view2 + +statement error +SELECT * FROM ducklake.local_view +---- +does not exist + +query I +SELECT * FROM ducklake.local_view2 +---- +100 + +# and I'll do it again! +statement ok +ALTER VIEW ducklake.local_view2 RENAME TO local_view3 + +query I +SELECT * FROM ducklake.local_view3 +---- +100 + +statement ok +COMMIT + +query I +SELECT * FROM ducklake.local_view3 +---- +100 + +statement error +ALTER TABLE ducklake.local_view3 RENAME TO local_view4 +---- +not a table diff --git a/tests/sqllogictests/sql/view/ducklake_rename_view_incorect.test b/tests/sqllogictests/sql/view/ducklake_rename_view_incorect.test new file mode 100644 index 0000000..d0c1809 --- /dev/null +++ b/tests/sqllogictests/sql/view/ducklake_rename_view_incorect.test @@ -0,0 +1,37 @@ +# name: test/sql/view/ducklake_rename_view_incorect.test +# description: Test RENAME VIEW: view does not exist and rename to an already existing view +# group: [view] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_rename_view_incorect') + +statement ok +USE ducklake; + +statement ok +CREATE TABLE tbl(i INTEGER) + +statement ok +CREATE VIEW vw AS SELECT * FROM tbl + +statement ok +CREATE VIEW vw2 AS SELECT 1729 AS i + +# Renaming a non existing view +statement error +ALTER VIEW non_view RENAME TO vw +---- + +# rename to an already existing view +statement error +ALTER VIEW vw2 RENAME TO vw +---- \ No newline at end of file diff --git a/tests/sqllogictests/sql/view/ducklake_view.test b/tests/sqllogictests/sql/view/ducklake_view.test new file mode 100644 index 0000000..edb460d --- /dev/null +++ b/tests/sqllogictests/sql/view/ducklake_view.test @@ -0,0 +1,79 @@ +# name: test/sql/view/ducklake_view.test +# description: test ducklake view creation +# group: [view] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_view_files') + +# create a view +statement ok +CREATE VIEW ducklake.v1 AS SELECT 42 + +query I +SELECT * FROM ducklake.v1 +---- +42 + +query I +SELECT sql FROM duckdb_views() WHERE database_name='ducklake' +---- +CREATE VIEW v1 AS SELECT 42; + +# we can drop the view +statement ok +DROP VIEW ducklake.v1 + +# aaaand it's gone +statement error +SELECT * FROM ducklake.v1 +---- +does not exist + +# transaction-local view drop and re-create +statement ok +CREATE VIEW ducklake.v1 AS SELECT 42 + +statement ok +BEGIN + +statement ok +DROP VIEW ducklake.v1 + +statement error +FROM ducklake.v1 +---- +does not exist + +statement ok +CREATE VIEW ducklake.v1 AS SELECT 84 + +query I +FROM ducklake.v1 +---- +84 + +statement ok +COMMIT + +query I +FROM ducklake.v1 +---- +84 + +# view with explicit column aliases +statement ok +CREATE VIEW ducklake.aliased_view(a) AS SELECT 42 AS X, 84 as Y + +query I +SELECT a FROM ducklake.aliased_view +---- +42 diff --git a/tests/sqllogictests/sql/view/ducklake_view_info_columns.test b/tests/sqllogictests/sql/view/ducklake_view_info_columns.test new file mode 100644 index 0000000..4e9797d --- /dev/null +++ b/tests/sqllogictests/sql/view/ducklake_view_info_columns.test @@ -0,0 +1,78 @@ +# name: test/sql/view/ducklake_view_info_columns.test +# description: test ducklake view info columns +# group: [view] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_view_info_columns.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_view_info_columns') + +statement ok +use ducklake; + +statement ok +create or replace table t as select 1 as id; + +statement ok +create or replace view v as select * from t; + +query II +select table_name, column_name from information_schema.columns where table_catalog = current_database() order by all; +---- +t id +v id + +# What if we alter our view? +statement ok +ALTER VIEW ducklake.v RENAME TO v2 + +query II +select table_name, column_name from information_schema.columns where table_catalog = current_database() order by all; +---- +t id +v2 id + +# What if we drop our view +statement ok +DROP VIEW ducklake.v2; + +query II +select table_name, column_name from information_schema.columns where table_catalog = current_database(); +---- +t id + +# Let's try a more complex view +statement ok +create or replace table t_2 as select 1 as id, 'Oogie' as name, 2 as salary, 3 as dividends; + +# Also try with the same name as in-code +query I +create view mock_view_name_lake as select name, salary+dividends as fortune from t inner join t_2 on (t.id = t_2.id); + +query II +select table_name, column_name from information_schema.columns where table_catalog = current_database() and table_name = 'mock_view_name_lake' order by all; +---- +mock_view_name_lake fortune +mock_view_name_lake name + +# We can also time travel + +statement ok +use memory; + +statement ok +DETACH ducklake; + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_view_info_columns.db' AS ducklake (SNAPSHOT_VERSION 2); + +statement ok +use ducklake; + +query II +select table_name, column_name from information_schema.columns where table_catalog = current_database() order by all; +---- +t id +v id \ No newline at end of file diff --git a/tests/sqllogictests/sql/view/ducklake_view_schema.test b/tests/sqllogictests/sql/view/ducklake_view_schema.test new file mode 100644 index 0000000..16e788f --- /dev/null +++ b/tests/sqllogictests/sql/view/ducklake_view_schema.test @@ -0,0 +1,140 @@ +# name: test/sql/view/ducklake_view_schema.test +# description: Test views in schemas in DuckLake +# group: [view] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_view_schema_files') + +statement ok +CREATE SCHEMA ducklake.s1; + +statement ok +CREATE SCHEMA ducklake.s2; + +statement ok +CREATE VIEW ducklake.s1.v1 AS SELECT 42 i + +statement ok +CREATE VIEW ducklake.s2.v1 AS SELECT 'hello' a, 'world' b + +query I +SELECT * FROM ducklake.s1.v1 +---- +42 + +query II +SELECT * FROM ducklake.s2.v1 +---- +hello world + +# drop the schemas again +statement error +DROP SCHEMA ducklake.s1 +---- +there are entries that depend on it + +statement ok +DROP VIEW ducklake.s1.v1 + +statement ok +DROP SCHEMA ducklake.s1 + +statement ok +DROP SCHEMA ducklake.s2 CASCADE + +# the schemas are now gone +statement error +CREATE VIEW ducklake.s2.v1 AS SELECT 42 +---- +not found + +foreach commit_query ROLLBACK COMMIT + +# now try all of this transaction-local +statement ok +BEGIN + +statement ok +CREATE SCHEMA ducklake.s1; + +statement ok +CREATE SCHEMA ducklake.s2; + +query I +SELECT schema_name FROM duckdb_schemas() WHERE database_name='ducklake' ORDER BY ALL +---- +main +s1 +s2 + +statement ok +CREATE VIEW ducklake.s1.v1 AS SELECT 42 i + +statement ok +CREATE VIEW ducklake.s2.v1 AS SELECT 'hello' a, 'world' b + +query I +SELECT * FROM ducklake.s1.v1 +---- +42 + +query II +SELECT * FROM ducklake.s2.v1 +---- +hello world + +statement ok +${commit_query} + +endloop + +query I +SELECT * FROM ducklake.s1.v1 +---- +42 + +query II +SELECT * FROM ducklake.s2.v1 +---- +hello world + +# drop and re-create a schema within the same transaction +statement ok +BEGIN + +statement ok +DROP SCHEMA ducklake.s1 CASCADE + +query I +SELECT schema_name FROM duckdb_schemas() WHERE database_name='ducklake' ORDER BY ALL +---- +main +s2 + +statement ok +CREATE SCHEMA ducklake.s1 + +statement ok +CREATE VIEW ducklake.s1.v1 AS SELECT 42 i + +query I +SELECT * FROM ducklake.s1.v1 +---- +42 + +statement ok +COMMIT + +query I +SELECT * FROM ducklake.s1.v1 +---- +42 diff --git a/tests/sqllogictests/sql/view/ducklake_view_table_conflict.test b/tests/sqllogictests/sql/view/ducklake_view_table_conflict.test new file mode 100644 index 0000000..d60b0a9 --- /dev/null +++ b/tests/sqllogictests/sql/view/ducklake_view_table_conflict.test @@ -0,0 +1,29 @@ +# name: test/sql/view/ducklake_view_table_conflict.test +# description: test ducklake view/table conflicts +# group: [view] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_view_conflict_files') + +# create a view +statement ok +CREATE VIEW ducklake.v1 AS SELECT 42 + +statement error +DROP TABLE IF EXISTS ducklake.v1 +---- +trying to drop type Table + +statement error +CREATE OR REPLACE TABLE ducklake.v1(i INTEGER) +---- +trying to replace with type Table diff --git a/tests/sqllogictests/sql/virtualcolumns/ducklake_snapshot_id.test b/tests/sqllogictests/sql/virtualcolumns/ducklake_snapshot_id.test new file mode 100644 index 0000000..a588b1b --- /dev/null +++ b/tests/sqllogictests/sql/virtualcolumns/ducklake_snapshot_id.test @@ -0,0 +1,70 @@ +# name: test/sql/virtualcolumns/ducklake_snapshot_id.test +# description: test snapshot_id virtual column +# group: [virtualcolumns] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_snapshot_id_files') + +# snapshot 1 +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +# snapshot 2 +statement ok +INSERT INTO ducklake.test VALUES (1); + +# snapshot 3 +statement ok +INSERT INTO ducklake.test VALUES (2); + +# snapshot 4 +statement ok +INSERT INTO ducklake.test VALUES (3); + +# snapshot 5 +statement ok +INSERT INTO ducklake.test VALUES (NULL); + +statement ok +BEGIN + +# snapshot 6 +statement ok +INSERT INTO ducklake.test FROM range(10, 12); + +query II +SELECT snapshot_id, * FROM ducklake.test ORDER BY ALL +---- +2 1 +3 2 +4 3 +5 NULL +NULL 10 +NULL 11 + +statement ok +COMMIT + +query II +SELECT snapshot_id, * FROM ducklake.test ORDER BY ALL +---- +2 1 +3 2 +4 3 +5 NULL +6 10 +6 11 + +query II +SELECT snapshot_id, * FROM ducklake.test WHERE snapshot_id=4 +---- +4 3 diff --git a/tests/sqllogictests/sql/virtualcolumns/ducklake_virtual_columns.test b/tests/sqllogictests/sql/virtualcolumns/ducklake_virtual_columns.test new file mode 100644 index 0000000..4455e60 --- /dev/null +++ b/tests/sqllogictests/sql/virtualcolumns/ducklake_virtual_columns.test @@ -0,0 +1,38 @@ +# name: test/sql/virtualcolumns/ducklake_virtual_columns.test +# description: test ducklake virtual columns +# group: [virtualcolumns] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_virtual_files') + +statement ok +CREATE TABLE ducklake.test(i INTEGER); + +statement ok +INSERT INTO ducklake.test VALUES (1), (2), (3); + +query I +SELECT file_row_number FROM ducklake.test +---- +0 +1 +2 + +query I +SELECT file_row_number FROM ducklake.test WHERE file_row_number=1 +---- +1 + +query I +SELECT COUNT(DISTINCT filename) FROM ducklake.test WHERE contains(filename, 'ducklake_virtual_files') +---- +1