Skip to content

Commit d202b0c

Browse files
[DBAPI2.0] Fix the description attribute (duckdb#43)
Fixes duckdblabs/duckdb-internal#1217, duckdb/duckdb#10495 This PR should make a better attempt at implementing https://peps.python.org/pep-0249/#type-objects-and-constructors We add the required type object sentinels: - `STRING` - `NUMBER` - `DATETIME` - `BINARY` - `ROWID` (None, as DuckDB doesn't have a way to detect this on a LogicalType) The objects returned for the `type_code` of the `description` is a `DuckDBPyType`. The sentinels are of type `DBAPITypeObject` and are overloaded to be compared with the `DuckDBPyType` objects. The constructors listed by the PEP are not added, they could be added in the future.
2 parents bf9b439 + c29e9cb commit d202b0c

File tree

9 files changed

+84
-83
lines changed

9 files changed

+84
-83
lines changed

duckdb/__init__.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,49 @@ def version():
1818
"functional"
1919
])
2020

21+
class DBAPITypeObject:
22+
def __init__(self, types: list[typing.DuckDBPyType]) -> None:
23+
self.types = types
24+
25+
def __eq__(self, other):
26+
if isinstance(other, typing.DuckDBPyType):
27+
return other in self.types
28+
return False
29+
30+
def __repr__(self):
31+
return f"<DBAPITypeObject [{','.join(str(x) for x in self.types)}]>"
32+
33+
# Define the standard DBAPI sentinels
34+
STRING = DBAPITypeObject([typing.VARCHAR])
35+
NUMBER = DBAPITypeObject([
36+
typing.TINYINT,
37+
typing.UTINYINT,
38+
typing.SMALLINT,
39+
typing.USMALLINT,
40+
typing.INTEGER,
41+
typing.UINTEGER,
42+
typing.BIGINT,
43+
typing.UBIGINT,
44+
typing.HUGEINT,
45+
typing.UHUGEINT,
46+
typing.DuckDBPyType("BIGNUM"),
47+
typing.DuckDBPyType("DECIMAL"),
48+
typing.FLOAT,
49+
typing.DOUBLE
50+
])
51+
DATETIME = DBAPITypeObject([
52+
typing.DATE,
53+
typing.TIME,
54+
typing.TIME_TZ,
55+
typing.TIMESTAMP,
56+
typing.TIMESTAMP_TZ,
57+
typing.TIMESTAMP_NS,
58+
typing.TIMESTAMP_MS,
59+
typing.TIMESTAMP_S
60+
])
61+
BINARY = DBAPITypeObject([typing.BLOB])
62+
ROWID = None
63+
2164
# Classes
2265
from _duckdb import (
2366
DuckDBPyRelation,

src/duckdb_py/pyresult.cpp

Lines changed: 1 addition & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -514,72 +514,12 @@ py::object DuckDBPyResult::FetchArrowCapsule(idx_t rows_per_batch) {
514514
return py::capsule(stream, "arrow_array_stream", ArrowArrayStreamPyCapsuleDestructor);
515515
}
516516

517-
py::str GetTypeToPython(const LogicalType &type) {
518-
switch (type.id()) {
519-
case LogicalTypeId::BOOLEAN:
520-
return py::str("bool");
521-
case LogicalTypeId::TINYINT:
522-
case LogicalTypeId::SMALLINT:
523-
case LogicalTypeId::INTEGER:
524-
case LogicalTypeId::BIGINT:
525-
case LogicalTypeId::UTINYINT:
526-
case LogicalTypeId::USMALLINT:
527-
case LogicalTypeId::UINTEGER:
528-
case LogicalTypeId::UBIGINT:
529-
case LogicalTypeId::HUGEINT:
530-
case LogicalTypeId::UHUGEINT:
531-
case LogicalTypeId::FLOAT:
532-
case LogicalTypeId::DOUBLE:
533-
case LogicalTypeId::DECIMAL: {
534-
return py::str("NUMBER");
535-
}
536-
case LogicalTypeId::VARCHAR: {
537-
if (type.HasAlias() && type.GetAlias() == "JSON") {
538-
return py::str("JSON");
539-
} else {
540-
return py::str("STRING");
541-
}
542-
}
543-
case LogicalTypeId::BLOB:
544-
case LogicalTypeId::BIT:
545-
return py::str("BINARY");
546-
case LogicalTypeId::TIMESTAMP:
547-
case LogicalTypeId::TIMESTAMP_TZ:
548-
case LogicalTypeId::TIMESTAMP_MS:
549-
case LogicalTypeId::TIMESTAMP_NS:
550-
case LogicalTypeId::TIMESTAMP_SEC: {
551-
return py::str("DATETIME");
552-
}
553-
case LogicalTypeId::TIME:
554-
case LogicalTypeId::TIME_TZ: {
555-
return py::str("Time");
556-
}
557-
case LogicalTypeId::DATE: {
558-
return py::str("Date");
559-
}
560-
case LogicalTypeId::STRUCT:
561-
case LogicalTypeId::MAP:
562-
return py::str("dict");
563-
case LogicalTypeId::LIST: {
564-
return py::str("list");
565-
}
566-
case LogicalTypeId::INTERVAL: {
567-
return py::str("TIMEDELTA");
568-
}
569-
case LogicalTypeId::UUID: {
570-
return py::str("UUID");
571-
}
572-
default:
573-
return py::str(type.ToString());
574-
}
575-
}
576-
577517
py::list DuckDBPyResult::GetDescription(const vector<string> &names, const vector<LogicalType> &types) {
578518
py::list desc;
579519

580520
for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) {
581521
auto py_name = py::str(names[col_idx]);
582-
auto py_type = GetTypeToPython(types[col_idx]);
522+
auto py_type = DuckDBPyType(types[col_idx]);
583523
desc.append(py::make_tuple(py_name, py_type, py::none(), py::none(), py::none(), py::none(), py::none()));
584524
}
585525
return desc;

src/duckdb_py/typing/pytype.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -326,8 +326,8 @@ void DuckDBPyType::Initialize(py::handle &m) {
326326
auto type_module = py::class_<DuckDBPyType, shared_ptr<DuckDBPyType>>(m, "DuckDBPyType", py::module_local());
327327

328328
type_module.def("__repr__", &DuckDBPyType::ToString, "Stringified representation of the type object");
329-
type_module.def("__eq__", &DuckDBPyType::Equals, "Compare two types for equality", py::arg("other"));
330-
type_module.def("__eq__", &DuckDBPyType::EqualsString, "Compare two types for equality", py::arg("other"));
329+
type_module.def("__eq__", &DuckDBPyType::Equals, "Compare two types for equality", py::arg("other"), py::is_operator());
330+
type_module.def("__eq__", &DuckDBPyType::EqualsString, "Compare two types for equality", py::arg("other"), py::is_operator());
331331
type_module.def_property_readonly("id", &DuckDBPyType::GetId);
332332
type_module.def_property_readonly("children", &DuckDBPyType::Children);
333333
type_module.def(py::init<>([](const string &type_str, shared_ptr<DuckDBPyConnection> connection = nullptr) {
@@ -347,7 +347,7 @@ void DuckDBPyType::Initialize(py::handle &m) {
347347
return make_shared_ptr<DuckDBPyType>(ltype);
348348
}));
349349
type_module.def("__getattr__", &DuckDBPyType::GetAttribute, "Get the child type by 'name'", py::arg("name"));
350-
type_module.def("__getitem__", &DuckDBPyType::GetAttribute, "Get the child type by 'name'", py::arg("name"));
350+
type_module.def("__getitem__", &DuckDBPyType::GetAttribute, "Get the child type by 'name'", py::arg("name"), py::is_operator());
351351

352352
py::implicitly_convertible<py::object, DuckDBPyType>();
353353
py::implicitly_convertible<py::str, DuckDBPyType>();

tests/fast/api/test_3728.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@ def test_3728_describe_enum(self, duckdb_cursor):
1414

1515
# This fails with "RuntimeError: Not implemented Error: unsupported type: mood"
1616
assert cursor.table("person").execute().description == [
17-
('name', 'STRING', None, None, None, None, None),
17+
('name', 'VARCHAR', None, None, None, None, None),
1818
('current_mood', "ENUM('sad', 'ok', 'happy')", None, None, None, None, None),
1919
]

tests/fast/api/test_dbapi10.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
11
# cursor description
22
from datetime import datetime, date
33
from pytest import mark
4+
import duckdb
45

56

67
class TestCursorDescription(object):
78
@mark.parametrize(
89
"query,column_name,string_type,real_type",
910
[
10-
["SELECT * FROM integers", "i", "NUMBER", int],
11-
["SELECT * FROM timestamps", "t", "DATETIME", datetime],
12-
["SELECT DATE '1992-09-20' AS date_col;", "date_col", "Date", date],
13-
["SELECT '\\xAA'::BLOB AS blob_col;", "blob_col", "BINARY", bytes],
14-
["SELECT {'x': 1, 'y': 2, 'z': 3} AS struct_col", "struct_col", "dict", dict],
15-
["SELECT [1, 2, 3] AS list_col", "list_col", "list", list],
16-
["SELECT 'Frank' AS str_col", "str_col", "STRING", str],
11+
["SELECT * FROM integers", "i", "INTEGER", int],
12+
["SELECT * FROM timestamps", "t", "TIMESTAMP", datetime],
13+
["SELECT DATE '1992-09-20' AS date_col;", "date_col", "DATE", date],
14+
["SELECT '\\xAA'::BLOB AS blob_col;", "blob_col", "BLOB", bytes],
15+
["SELECT {'x': 1, 'y': 2, 'z': 3} AS struct_col", "struct_col", "STRUCT(x INTEGER, y INTEGER, z INTEGER)", dict],
16+
["SELECT [1, 2, 3] AS list_col", "list_col", "INTEGER[]", list],
17+
["SELECT 'Frank' AS str_col", "str_col", "VARCHAR", str],
1718
["SELECT [1, 2, 3]::JSON AS json_col", "json_col", "JSON", str],
1819
["SELECT union_value(tag := 1) AS union_col", "union_col", "UNION(tag INTEGER)", int],
1920
],
@@ -23,6 +24,24 @@ def test_description(self, query, column_name, string_type, real_type, duckdb_cu
2324
assert duckdb_cursor.description == [(column_name, string_type, None, None, None, None, None)]
2425
assert isinstance(duckdb_cursor.fetchone()[0], real_type)
2526

27+
def test_description_comparisons(self):
28+
duckdb.execute("select 42 a, 'test' b, true c")
29+
types = [x[1] for x in duckdb.description()]
30+
31+
STRING = duckdb.STRING
32+
NUMBER = duckdb.NUMBER
33+
DATETIME = duckdb.DATETIME
34+
35+
assert(types[1] == STRING)
36+
assert(STRING == types[1])
37+
assert(types[0] != STRING)
38+
assert((types[1] != STRING) == False)
39+
assert((STRING != types[1]) == False)
40+
41+
assert(types[1] in [STRING])
42+
assert(types[1] in [STRING, NUMBER])
43+
assert(types[1] not in [NUMBER, DATETIME])
44+
2645
def test_none_description(self, duckdb_empty_cursor):
2746
assert duckdb_empty_cursor.description is None
2847

tests/fast/api/test_duckdb_connection.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import duckdb
2+
import duckdb.typing
23
import pytest
34
from conftest import NumpyPandas, ArrowPandas
45

@@ -113,7 +114,7 @@ def test_readonly_properties(self):
113114
duckdb.execute("select 42")
114115
description = duckdb.description()
115116
rowcount = duckdb.rowcount()
116-
assert description == [('42', 'NUMBER', None, None, None, None, None)]
117+
assert description == [('42', 'INTEGER', None, None, None, None, None)]
117118
assert rowcount == -1
118119

119120
def test_execute(self):
@@ -349,9 +350,6 @@ def test_view(self):
349350
assert [([0, 1, 2, 3, 4],)] == duckdb.view("vw").fetchall()
350351
duckdb.execute("drop view vw")
351352

352-
def test_description(self):
353-
assert None != duckdb.description
354-
355353
def test_close(self):
356354
assert None != duckdb.close
357355

tests/fast/relational_api/test_rapi_description.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ def test_rapi_description(self, duckdb_cursor):
99
names = [x[0] for x in desc]
1010
types = [x[1] for x in desc]
1111
assert names == ['a', 'b']
12-
assert types == ['NUMBER', 'NUMBER']
12+
assert types == ['INTEGER', 'BIGINT']
13+
assert (all([x == duckdb.NUMBER for x in types]))
1314

1415
def test_rapi_describe(self, duckdb_cursor):
1516
np = pytest.importorskip("numpy")

tests/fast/test_map.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,9 @@ def cast_to_string(df):
154154

155155
con = duckdb.connect()
156156
rel = con.sql('select i from range (10) tbl(i)')
157-
assert rel.types[0] == int
157+
assert rel.types[0] == duckdb.NUMBER
158158
mapped_rel = rel.map(cast_to_string, schema={'i': str})
159-
assert mapped_rel.types[0] == str
159+
assert mapped_rel.types[0] == duckdb.STRING
160160

161161
def test_explicit_schema_returntype_mismatch(self):
162162
def does_nothing(df):

tests/fast/test_result.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ def test_result_describe_types(self, duckdb_cursor):
3131
rel = connection.table("test")
3232
res = rel.execute()
3333
assert res.description == [
34-
('i', 'bool', None, None, None, None, None),
35-
('j', 'Time', None, None, None, None, None),
36-
('k', 'STRING', None, None, None, None, None),
34+
('i', 'BOOLEAN', None, None, None, None, None),
35+
('j', 'TIME', None, None, None, None, None),
36+
('k', 'VARCHAR', None, None, None, None, None),
3737
]
3838

3939
def test_result_timestamps(self, duckdb_cursor):
@@ -64,7 +64,7 @@ def test_result_interval(self):
6464

6565
rel = connection.table("intervals")
6666
res = rel.execute()
67-
assert res.description == [('ivals', 'TIMEDELTA', None, None, None, None, None)]
67+
assert res.description == [('ivals', 'INTERVAL', None, None, None, None, None)]
6868
assert res.fetchall() == [
6969
(datetime.timedelta(days=1.0),),
7070
(datetime.timedelta(seconds=2.0),),

0 commit comments

Comments
 (0)