Skip to content

Commit 1a5110a

Browse files
author
Bertjan Broeksema
committed
Implement basic support for testing a datacontract on duckdb
1 parent 58b5f72 commit 1a5110a

File tree

6 files changed

+119
-0
lines changed

6 files changed

+119
-0
lines changed

datacontract/engines/soda/check_soda_execute.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ def check_soda_execute(
117117
soda_configuration_str = to_athena_soda_configuration(server)
118118
scan.add_configuration_yaml_str(soda_configuration_str)
119119
scan.set_data_source_name(server.type)
120+
elif server.type == "duckdb":
121+
soda_configuration_str = to_duckdb_soda_configuration(server)
122+
scan.add_configuration_yaml_str(soda_configuration_str)
123+
scan.set_data_source_name(server.type)
120124

121125
else:
122126
run.checks.append(

datacontract/engines/soda/connections/duckdb_connection.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,42 @@
22
from typing import Any, Dict
33

44
import duckdb
5+
import yaml
56

67
from datacontract.export.duckdb_type_converter import convert_to_duckdb_csv_type, convert_to_duckdb_json_type
78
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model, Server
9+
from datacontract.model.exceptions import DataContractException
810
from datacontract.model.run import Run
911

12+
def to_duckdb_soda_configuration(server):
13+
if not hasattr(server, "database") or not server.database:
14+
raise DataContractException(
15+
type="duckdb-connection",
16+
name="missing_database",
17+
reason="Database is required for DuckDB connection. Specify the database file in which your tables exist.",
18+
engine="datacontract",
19+
)
20+
21+
if not hasattr(server, "read_only") or not server.read_only:
22+
raise DataContractException(
23+
type="duckdb-connection",
24+
name="missing_read_only",
25+
reason="read_only is required for DuckDB connection. Specify if the database should be opened in read-only mode.",
26+
engine="datacontract",
27+
)
28+
29+
data_source = {
30+
"type": "duckdb",
31+
"path": server.database,
32+
"read_only": server.read_only,
33+
}
34+
35+
if server.schema:
36+
data_source["schema_"] = server.schema_
37+
38+
soda_configuration = {f"data_source {server.type}": data_source}
39+
soda_configuration_str = yaml.dump(soda_configuration)
40+
return soda_configuration_str
1041

1142
def get_duckdb_connection(
1243
data_contract: DataContractSpecification,

datacontract/export/sql_type_converter.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
2424
return convert_type_to_trino(field)
2525
elif server_type == "oracle":
2626
return convert_type_to_oracle(field)
27+
elif server_type == "duckdb":
28+
return convert_to_duckdb(field)
2729

2830
return field.type
2931

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
-- Create the table
2+
CREATE TABLE my_table (
3+
field_one VARCHAR(10) primary key,
4+
field_two INT not null,
5+
field_three TIMESTAMPTZ
6+
);
7+
8+
-- Insert the data
9+
INSERT INTO my_table (field_one, field_two, field_three) VALUES
10+
('CX-263-DU', 50, '2023-06-16 13:12:56'),
11+
('IK-894-MN', 47, '2023-10-08 22:40:57'),
12+
('ER-399-JY', 22, '2023-05-16 01:08:22'),
13+
('MT-939-FH', 63, '2023-03-15 05:15:21'),
14+
('LV-849-MI', 33, '2023-09-08 20:08:43'),
15+
('VS-079-OH', 85, '2023-04-15 00:50:32'),
16+
('DN-297-XY', 79, '2023-11-08 12:55:42'),
17+
('ZE-172-FP', 14, '2023-12-03 18:38:38'),
18+
('ID-840-EG', 89, '2023-10-02 17:17:58'),
19+
('FK-230-KZ', 64, '2023-11-27 15:21:48');
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
dataContractSpecification: 1.2.1
2+
id: duckdb
3+
info:
4+
title: duckdb
5+
version: 0.0.1
6+
owner: my-domain-team
7+
servers:
8+
my-dataproduct/duckdb:
9+
type: duckdb
10+
database: fixtures/duckdb/db.duckdb
11+
read_only: true
12+
models:
13+
my_table:
14+
type: table
15+
fields:
16+
field_one:
17+
type: varchar
18+
required: true
19+
unique: true
20+
pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$"
21+
field_two:
22+
type: integer
23+
minimum: 10
24+
field_three:
25+
type: timestamp_tz

tests/test_test_duckdb.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# import pytest
2+
3+
from pathlib import Path
4+
import duckdb
5+
6+
from datacontract.data_contract import DataContract
7+
from datacontract.model.run import ResultEnum, Run
8+
9+
10+
def test_test_duckdb():
11+
_init_sql("fixtures/duckdb/data/data.sql")
12+
13+
datacontract_file = "fixtures/duckdb/datacontract.yaml"
14+
data_contract_str = _setup_datacontract(datacontract_file)
15+
data_contract = DataContract(data_contract_str=data_contract_str)
16+
17+
run: Run = data_contract.test()
18+
19+
assert run.result == "passed"
20+
assert all(check.result == ResultEnum.passed for check in run.checks)
21+
22+
23+
def _setup_datacontract(file):
24+
with open(file) as data_contract_file:
25+
data_contract_str = data_contract_file.read()
26+
return data_contract_str
27+
28+
def _init_sql(file_path):
29+
if (Path("fixtures/duckdb/db.duckdb").exists()):
30+
Path("fixtures/duckdb/db.duckdb").unlink()
31+
32+
connection = duckdb.connect(database="fixtures/duckdb/db.duckdb" , read_only=False)
33+
34+
with open(file_path, "r") as sql_file:
35+
sql_commands = sql_file.read()
36+
connection.sql(sql_commands)
37+
connection.close()
38+
pass

0 commit comments

Comments
 (0)