Skip to content

Commit 2ec670c

Browse files
author
Bertjan Broeksema
committed
Implement basic support for testing a datacontract on duckdb
1 parent 58b5f72 commit 2ec670c

File tree

6 files changed

+121
-0
lines changed

6 files changed

+121
-0
lines changed

datacontract/engines/soda/check_soda_execute.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ def check_soda_execute(
117117
soda_configuration_str = to_athena_soda_configuration(server)
118118
scan.add_configuration_yaml_str(soda_configuration_str)
119119
scan.set_data_source_name(server.type)
120+
elif server.type == "duckdb":
121+
soda_configuration_str = to_duckdb_soda_configuration(server)
122+
scan.add_configuration_yaml_str(soda_configuration_str)
123+
scan.set_data_source_name(server.type)
120124

121125
else:
122126
run.checks.append(

datacontract/engines/soda/connections/duckdb_connection.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,44 @@
22
from typing import Any, Dict
33

44
import duckdb
5+
import yaml
56

67
from datacontract.export.duckdb_type_converter import convert_to_duckdb_csv_type, convert_to_duckdb_json_type
78
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model, Server
9+
from datacontract.model.exceptions import DataContractException
810
from datacontract.model.run import Run
911

1012

13+
def to_duckdb_soda_configuration(server):
14+
if not hasattr(server, "database") or not server.database:
15+
raise DataContractException(
16+
type="duckdb-connection",
17+
name="missing_database",
18+
reason="Database is required for DuckDB connection. Specify the database file in which your tables exist.",
19+
engine="datacontract",
20+
)
21+
22+
if not hasattr(server, "read_only") or not server.read_only:
23+
raise DataContractException(
24+
type="duckdb-connection",
25+
name="missing_read_only",
26+
reason="read_only is required for DuckDB connection. Specify if the database should be opened in read-only mode.",
27+
engine="datacontract",
28+
)
29+
30+
data_source = {
31+
"type": "duckdb",
32+
"path": server.database,
33+
"read_only": server.read_only,
34+
}
35+
36+
if server.schema:
37+
data_source["schema_"] = server.schema_
38+
39+
soda_configuration = {f"data_source {server.type}": data_source}
40+
soda_configuration_str = yaml.dump(soda_configuration)
41+
return soda_configuration_str
42+
1143
def get_duckdb_connection(
1244
data_contract: DataContractSpecification,
1345
server: Server,

datacontract/export/sql_type_converter.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
2424
return convert_type_to_trino(field)
2525
elif server_type == "oracle":
2626
return convert_type_to_oracle(field)
27+
elif server_type == "duckdb":
28+
return convert_to_duckdb(field)
2729

2830
return field.type
2931

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
-- Create the table
2+
CREATE TABLE my_table (
3+
field_one VARCHAR(10) primary key,
4+
field_two INT not null,
5+
field_three TIMESTAMPTZ
6+
);
7+
8+
-- Insert the data
9+
INSERT INTO my_table (field_one, field_two, field_three) VALUES
10+
('CX-263-DU', 50, '2023-06-16 13:12:56'),
11+
('IK-894-MN', 47, '2023-10-08 22:40:57'),
12+
('ER-399-JY', 22, '2023-05-16 01:08:22'),
13+
('MT-939-FH', 63, '2023-03-15 05:15:21'),
14+
('LV-849-MI', 33, '2023-09-08 20:08:43'),
15+
('VS-079-OH', 85, '2023-04-15 00:50:32'),
16+
('DN-297-XY', 79, '2023-11-08 12:55:42'),
17+
('ZE-172-FP', 14, '2023-12-03 18:38:38'),
18+
('ID-840-EG', 89, '2023-10-02 17:17:58'),
19+
('FK-230-KZ', 64, '2023-11-27 15:21:48');
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
dataContractSpecification: 1.2.1
2+
id: duckdb
3+
info:
4+
title: duckdb
5+
version: 0.0.1
6+
owner: my-domain-team
7+
servers:
8+
my-dataproduct/duckdb:
9+
type: duckdb
10+
database: fixtures/duckdb/db.duckdb
11+
read_only: true
12+
models:
13+
my_table:
14+
type: table
15+
fields:
16+
field_one:
17+
type: varchar
18+
required: true
19+
unique: true
20+
pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$"
21+
field_two:
22+
type: integer
23+
minimum: 10
24+
field_three:
25+
type: timestamp_tz

tests/test_test_duckdb.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# import pytest
2+
3+
from pathlib import Path
4+
5+
import duckdb
6+
7+
from datacontract.data_contract import DataContract
8+
from datacontract.model.run import ResultEnum, Run
9+
10+
11+
def test_test_duckdb():
12+
_init_sql("fixtures/duckdb/data/data.sql")
13+
14+
datacontract_file = "fixtures/duckdb/datacontract.yaml"
15+
data_contract_str = _setup_datacontract(datacontract_file)
16+
data_contract = DataContract(data_contract_str=data_contract_str)
17+
18+
run: Run = data_contract.test()
19+
20+
assert run.result == "passed"
21+
assert all(check.result == ResultEnum.passed for check in run.checks)
22+
23+
24+
def _setup_datacontract(file):
25+
with open(file) as data_contract_file:
26+
data_contract_str = data_contract_file.read()
27+
return data_contract_str
28+
29+
def _init_sql(file_path):
30+
if (Path("fixtures/duckdb/db.duckdb").exists()):
31+
Path("fixtures/duckdb/db.duckdb").unlink()
32+
33+
connection = duckdb.connect(database="fixtures/duckdb/db.duckdb" , read_only=False)
34+
35+
with open(file_path, "r") as sql_file:
36+
sql_commands = sql_file.read()
37+
connection.sql(sql_commands)
38+
connection.close()
39+
pass

0 commit comments

Comments
 (0)