Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions datacontract/engines/soda/check_soda_execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ def check_soda_execute(
soda_configuration_str = to_athena_soda_configuration(server)
scan.add_configuration_yaml_str(soda_configuration_str)
scan.set_data_source_name(server.type)
elif server.type == "duckdb":
soda_configuration_str = to_duckdb_soda_configuration(server)
scan.add_configuration_yaml_str(soda_configuration_str)
scan.set_data_source_name(server.type)

else:
run.checks.append(
Expand Down
32 changes: 32 additions & 0 deletions datacontract/engines/soda/connections/duckdb_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,44 @@
from typing import Any, Dict

import duckdb
import yaml

from datacontract.export.duckdb_type_converter import convert_to_duckdb_csv_type, convert_to_duckdb_json_type
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model, Server
from datacontract.model.exceptions import DataContractException
from datacontract.model.run import Run


def to_duckdb_soda_configuration(server):
if not hasattr(server, "database") or not server.database:
raise DataContractException(
type="duckdb-connection",
name="missing_database",
reason="Database is required for DuckDB connection. Specify the database file in which your tables exist.",
engine="datacontract",
)

if not hasattr(server, "read_only") or not server.read_only:
raise DataContractException(
type="duckdb-connection",
name="missing_read_only",
reason="read_only is required for DuckDB connection. Specify if the database should be opened in read-only mode.",
engine="datacontract",
)

data_source = {
"type": "duckdb",
"path": server.database,
"read_only": server.read_only,
}

if server.schema:
data_source["schema_"] = server.schema_

soda_configuration = {f"data_source {server.type}": data_source}
soda_configuration_str = yaml.dump(soda_configuration)
return soda_configuration_str

def get_duckdb_connection(
data_contract: DataContractSpecification,
server: Server,
Expand Down
2 changes: 2 additions & 0 deletions datacontract/export/sql_type_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
return convert_type_to_trino(field)
elif server_type == "oracle":
return convert_type_to_oracle(field)
elif server_type == "duckdb":
return convert_to_duckdb(field)

return field.type

Expand Down
19 changes: 19 additions & 0 deletions tests/fixtures/duckdb/data/data.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
-- Create the table
CREATE TABLE my_table (
field_one VARCHAR(10) primary key,
field_two INT not null,
field_three TIMESTAMPTZ
);

-- Insert the data
INSERT INTO my_table (field_one, field_two, field_three) VALUES
('CX-263-DU', 50, '2023-06-16 13:12:56'),
('IK-894-MN', 47, '2023-10-08 22:40:57'),
('ER-399-JY', 22, '2023-05-16 01:08:22'),
('MT-939-FH', 63, '2023-03-15 05:15:21'),
('LV-849-MI', 33, '2023-09-08 20:08:43'),
('VS-079-OH', 85, '2023-04-15 00:50:32'),
('DN-297-XY', 79, '2023-11-08 12:55:42'),
('ZE-172-FP', 14, '2023-12-03 18:38:38'),
('ID-840-EG', 89, '2023-10-02 17:17:58'),
('FK-230-KZ', 64, '2023-11-27 15:21:48');
25 changes: 25 additions & 0 deletions tests/fixtures/duckdb/datacontract.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
dataContractSpecification: 1.2.1
id: duckdb
info:
title: duckdb
version: 0.0.1
owner: my-domain-team
servers:
my-dataproduct/duckdb:
type: duckdb
database: fixtures/duckdb/db.duckdb
read_only: true
models:
my_table:
type: table
fields:
field_one:
type: varchar
required: true
unique: true
pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$"
field_two:
type: integer
minimum: 10
field_three:
type: timestamp_tz
39 changes: 39 additions & 0 deletions tests/test_test_duckdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# import pytest

from pathlib import Path

import duckdb

from datacontract.data_contract import DataContract
from datacontract.model.run import ResultEnum, Run


def test_test_duckdb():
_init_sql("fixtures/duckdb/data/data.sql")

datacontract_file = "fixtures/duckdb/datacontract.yaml"
data_contract_str = _setup_datacontract(datacontract_file)
data_contract = DataContract(data_contract_str=data_contract_str)

run: Run = data_contract.test()

assert run.result == "passed"
assert all(check.result == ResultEnum.passed for check in run.checks)


def _setup_datacontract(file):
with open(file) as data_contract_file:
data_contract_str = data_contract_file.read()
return data_contract_str

def _init_sql(file_path):
if (Path("fixtures/duckdb/db.duckdb").exists()):
Path("fixtures/duckdb/db.duckdb").unlink()

connection = duckdb.connect(database="fixtures/duckdb/db.duckdb" , read_only=False)

with open(file_path, "r") as sql_file:
sql_commands = sql_file.read()
connection.sql(sql_commands)
connection.close()
pass