Skip to content

Commit c0f9fdc

Browse files
authored
feat(datafusion): Add sqllogictest for DataFusion INSERT INTO (#1887)
## Which issue does this PR close? - Closes #1835 ## What changes are included in this PR? - Added a new schedule to run insert into sqllogic test ## Are these changes tested? It's a test itself
1 parent 7e66ded commit c0f9fdc

File tree

4 files changed

+200
-2
lines changed

4 files changed

+200
-2
lines changed

crates/sqllogictest/src/engine/datafusion.rs

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ use std::sync::Arc;
2222
use datafusion::catalog::CatalogProvider;
2323
use datafusion::prelude::{SessionConfig, SessionContext};
2424
use datafusion_sqllogictest::DataFusion;
25-
use iceberg::CatalogBuilder;
2625
use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder};
26+
use iceberg::spec::{NestedField, PrimitiveType, Schema, Transform, Type, UnboundPartitionSpec};
27+
use iceberg::{Catalog, CatalogBuilder, NamespaceIdent, TableCreation};
2728
use iceberg_datafusion::IcebergCatalogProvider;
2829
use indicatif::ProgressBar;
2930
use toml::Table as TomlTable;
@@ -84,8 +85,76 @@ impl DataFusionEngine {
8485
)
8586
.await?;
8687

88+
// Create a test namespace for INSERT INTO tests
89+
let namespace = NamespaceIdent::new("default".to_string());
90+
catalog.create_namespace(&namespace, HashMap::new()).await?;
91+
92+
// Create test tables
93+
Self::create_unpartitioned_table(&catalog, &namespace).await?;
94+
Self::create_partitioned_table(&catalog, &namespace).await?;
95+
8796
Ok(Arc::new(
8897
IcebergCatalogProvider::try_new(Arc::new(catalog)).await?,
8998
))
9099
}
100+
101+
/// Create an unpartitioned test table with id and name columns
102+
/// TODO: this can be removed when we support CREATE TABLE
103+
async fn create_unpartitioned_table(
104+
catalog: &impl Catalog,
105+
namespace: &NamespaceIdent,
106+
) -> anyhow::Result<()> {
107+
let schema = Schema::builder()
108+
.with_fields(vec![
109+
NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
110+
NestedField::optional(2, "name", Type::Primitive(PrimitiveType::String)).into(),
111+
])
112+
.build()?;
113+
114+
catalog
115+
.create_table(
116+
namespace,
117+
TableCreation::builder()
118+
.name("test_unpartitioned_table".to_string())
119+
.schema(schema)
120+
.build(),
121+
)
122+
.await?;
123+
124+
Ok(())
125+
}
126+
127+
/// Create a partitioned test table with id, category, and value columns
128+
/// Partitioned by category using identity transform
129+
/// TODO: this can be removed when we support CREATE TABLE
130+
async fn create_partitioned_table(
131+
catalog: &impl Catalog,
132+
namespace: &NamespaceIdent,
133+
) -> anyhow::Result<()> {
134+
let schema = Schema::builder()
135+
.with_fields(vec![
136+
NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
137+
NestedField::required(2, "category", Type::Primitive(PrimitiveType::String)).into(),
138+
NestedField::optional(3, "value", Type::Primitive(PrimitiveType::String)).into(),
139+
])
140+
.build()?;
141+
142+
let partition_spec = UnboundPartitionSpec::builder()
143+
.with_spec_id(0)
144+
.add_partition_field(2, "category", Transform::Identity)?
145+
.build();
146+
147+
catalog
148+
.create_table(
149+
namespace,
150+
TableCreation::builder()
151+
.name("test_partitioned_table".to_string())
152+
.schema(schema)
153+
.partition_spec(partition_spec)
154+
.build(),
155+
)
156+
.await?;
157+
158+
Ok(())
159+
}
91160
}

crates/sqllogictest/testdata/schedules/df_test.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,8 @@ df = { type = "datafusion" }
2020

2121
[[steps]]
2222
engine = "df"
23-
slt = "df_test/show_tables.slt"
23+
slt = "df_test/show_tables.slt"
24+
25+
[[steps]]
26+
engine = "df"
27+
slt = "df_test/insert_into.slt"
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
# Verify the table is initially empty
19+
query IT rowsort
20+
SELECT * FROM default.default.test_unpartitioned_table
21+
----
22+
23+
# Insert a single row and verify the count
24+
query I
25+
INSERT INTO default.default.test_unpartitioned_table VALUES (1, 'Alice')
26+
----
27+
1
28+
29+
# Verify the inserted row
30+
query IT rowsort
31+
SELECT * FROM default.default.test_unpartitioned_table
32+
----
33+
1 Alice
34+
35+
# Insert multiple rows and verify the count
36+
query I
37+
INSERT INTO default.default.test_unpartitioned_table VALUES (2, 'Bob'), (3, 'Charlie')
38+
----
39+
2
40+
41+
# Verify all rows
42+
query IT rowsort
43+
SELECT * FROM default.default.test_unpartitioned_table
44+
----
45+
1 Alice
46+
2 Bob
47+
3 Charlie
48+
49+
# Insert with NULL value and verify the count
50+
query I
51+
INSERT INTO default.default.test_unpartitioned_table VALUES (4, NULL)
52+
----
53+
1
54+
55+
# Verify NULL handling
56+
query IT rowsort
57+
SELECT * FROM default.default.test_unpartitioned_table
58+
----
59+
1 Alice
60+
2 Bob
61+
3 Charlie
62+
4 NULL
63+
64+
# Test partitioned table - verify initially empty
65+
query ITT rowsort
66+
SELECT * FROM default.default.test_partitioned_table
67+
----
68+
69+
# Insert single row into partitioned table
70+
query I
71+
INSERT INTO default.default.test_partitioned_table VALUES (1, 'electronics', 'laptop')
72+
----
73+
1
74+
75+
# Verify the inserted row in partitioned table
76+
query ITT rowsort
77+
SELECT * FROM default.default.test_partitioned_table
78+
----
79+
1 electronics laptop
80+
81+
# Insert multiple rows with different partition values
82+
query I
83+
INSERT INTO default.default.test_partitioned_table VALUES (2, 'electronics', 'phone'), (3, 'books', 'novel'), (4, 'books', 'textbook'), (5, 'clothing', 'shirt')
84+
----
85+
4
86+
87+
# Verify all rows in partitioned table
88+
query ITT rowsort
89+
SELECT * FROM default.default.test_partitioned_table
90+
----
91+
1 electronics laptop
92+
2 electronics phone
93+
3 books novel
94+
4 books textbook
95+
5 clothing shirt
96+
97+
# Insert with NULL value in optional column
98+
query I
99+
INSERT INTO default.default.test_partitioned_table VALUES (6, 'electronics', NULL)
100+
----
101+
1
102+
103+
# Verify NULL handling in partitioned table
104+
query ITT rowsort
105+
SELECT * FROM default.default.test_partitioned_table
106+
----
107+
1 electronics laptop
108+
2 electronics phone
109+
3 books novel
110+
4 books textbook
111+
5 clothing shirt
112+
6 electronics NULL
113+
114+
# Verify partition filtering works
115+
query ITT rowsort
116+
SELECT * FROM default.default.test_partitioned_table WHERE category = 'books'
117+
----
118+
3 books novel
119+
4 books textbook

crates/sqllogictest/testdata/slts/df_test/show_tables.slt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ datafusion information_schema routines VIEW
2525
datafusion information_schema schemata VIEW
2626
datafusion information_schema tables VIEW
2727
datafusion information_schema views VIEW
28+
default default test_partitioned_table BASE TABLE
29+
default default test_partitioned_table$manifests BASE TABLE
30+
default default test_partitioned_table$snapshots BASE TABLE
31+
default default test_unpartitioned_table BASE TABLE
32+
default default test_unpartitioned_table$manifests BASE TABLE
33+
default default test_unpartitioned_table$snapshots BASE TABLE
2834
default information_schema columns VIEW
2935
default information_schema df_settings VIEW
3036
default information_schema parameters VIEW

0 commit comments

Comments
 (0)