Skip to content

Commit acfd62a

Browse files
authored
chore: add adbc, duckdb examples (#76)
Adds examples of running substrait plans with adbc (duckdb driver) and plain duckdb
1 parent 52e3438 commit acfd62a

File tree

2 files changed

+86
-0
lines changed

2 files changed

+86
-0
lines changed

examples/adbc_example.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Install pyarrow, adbc-driver-manager and duckdb before running this example
2+
# This example currently can be run only with duckdb<=1.1.3, later versions of duckdb no longer support substrait in adbc
3+
# /// script
4+
# dependencies = [
5+
# "pyarrow==20.0.0",
6+
# "adbc-driver-manager==1.5.0",
7+
# "duckdb==1.1.3",
8+
# "substrait[extensions] @ file:///${PROJECT_ROOT}/"
9+
# ]
10+
# ///
11+
12+
13+
import adbc_driver_duckdb.dbapi
14+
import pyarrow
15+
from substrait.builders.plan import read_named_table, filter
16+
from substrait.builders.extended_expression import scalar_function, column, literal
17+
from substrait.builders.type import i64
18+
from substrait.extension_registry import ExtensionRegistry
19+
import pyarrow.substrait as pa_substrait
20+
21+
registry = ExtensionRegistry()
22+
23+
data = pyarrow.record_batch(
24+
[[1, 2, 3, 4], ["a", "b", "c", "d"]],
25+
names=["ints", "strs"],
26+
)
27+
28+
def read_adbc_named_table(name: str, conn):
29+
pa_schema = conn.adbc_get_table_schema(name)
30+
substrait_schema = pa_substrait.serialize_schema(pa_schema).to_pysubstrait().base_schema
31+
return read_named_table(name, substrait_schema)
32+
33+
with adbc_driver_duckdb.dbapi.connect(":memory:") as conn:
34+
with conn.cursor() as cur:
35+
cur.adbc_ingest("AnswerToEverything", data)
36+
37+
cur.executescript("INSTALL substrait;")
38+
cur.executescript("LOAD substrait;")
39+
40+
table = read_adbc_named_table("AnswerToEverything", conn)
41+
table = filter(table, expression=scalar_function('functions_comparison.yaml', 'gte', column('ints'), literal(3, i64())))
42+
43+
cur.execute(table(registry).SerializeToString())
44+
print(cur.fetch_arrow_table())

examples/duckdb_example.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Install duckdb and pyarrow before running this example
2+
# /// script
3+
# dependencies = [
4+
# "pyarrow==20.0.0",
5+
# "duckdb==1.2.1",
6+
# "substrait[extensions] @ file:///${PROJECT_ROOT}/"
7+
# ]
8+
# ///
9+
10+
11+
import duckdb
12+
from substrait.builders.plan import read_named_table, project, filter
13+
from substrait.builders.extended_expression import column, scalar_function, literal
14+
from substrait.builders.type import i32
15+
from substrait.extension_registry import ExtensionRegistry
16+
from substrait.json import dump_json
17+
import pyarrow.substrait as pa_substrait
18+
19+
try:
20+
duckdb.install_extension("substrait")
21+
except:
22+
duckdb.install_extension("substrait", repository="community")
23+
duckdb.load_extension("substrait")
24+
25+
duckdb.install_extension("tpch")
26+
duckdb.load_extension("tpch")
27+
28+
duckdb.sql("CALL dbgen(sf = 1);")
29+
30+
registry = ExtensionRegistry(load_default_extensions=True)
31+
32+
def read_duckdb_named_table(name: str, conn):
33+
pa_schema = conn.sql(f"SELECT * FROM {name} LIMIT 0").arrow().schema
34+
substrait_schema = pa_substrait.serialize_schema(pa_schema).to_pysubstrait().base_schema
35+
return read_named_table(name, substrait_schema)
36+
37+
table = read_duckdb_named_table("customer", duckdb)
38+
table = filter(table, expression=scalar_function('functions_comparison.yaml', 'equal', column('c_nationkey'), literal(3, i32())))
39+
table = project(table, expressions=[column('c_name'), column('c_address'), column('c_nationkey')])
40+
41+
sql = f"CALL from_substrait_json('{dump_json(table(registry))}')"
42+
print(duckdb.sql(sql))

0 commit comments

Comments
 (0)