Skip to content

Commit b7ae1f7

Browse files
[c++] Move ManagedQuery to separate object library (#4409)
* Migrate ManagedQuery * Remove external dependencies form ColumnBuffer * Remove old ManagedQuery * Use new ManagedQuery * Add missing methods * Switch python API to use new ManagedQuery * Fix geometry type handling * Correctly handle array offset and extension semantics * Add extra tests * Remove unused header * Fix types * Replace `select_columns` with lambda to fix pybind11 span incompatibility * Update R bindings * Rename `ResultOrder` values to lowercase
1 parent 523cdd7 commit b7ae1f7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+2520
-3305
lines changed

apis/python/src/tiledbsoma/managed_query.cc

Lines changed: 101 additions & 76 deletions
Large diffs are not rendered by default.

apis/python/src/tiledbsoma/pytiledbsoma.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,12 @@ PYBIND11_MODULE(pytiledbsoma, m) {
6666
.value("soma_write", OpenMode::soma_write)
6767
.value("soma_delete", OpenMode::soma_delete);
6868

69-
py::enum_<ResultOrder>(m, "ResultOrder")
70-
.value("automatic", ResultOrder::automatic)
71-
.value("rowmajor", ResultOrder::rowmajor)
72-
.value("colmajor", ResultOrder::colmajor)
73-
.value("unordered", ResultOrder::unordered)
74-
.value("globalorder", ResultOrder::global);
69+
py::enum_<common::ResultOrder>(m, "ResultOrder")
70+
.value("automatic", common::ResultOrder::automatic)
71+
.value("rowmajor", common::ResultOrder::rowmajor)
72+
.value("colmajor", common::ResultOrder::colmajor)
73+
.value("unordered", common::ResultOrder::unordered)
74+
.value("globalorder", common::ResultOrder::global);
7575

7676
py::enum_<URIType>(m, "URIType")
7777
.value("automatic", URIType::automatic)

apis/python/src/tiledbsoma/soma_column.cc

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -31,145 +31,145 @@ void load_soma_column(py::module& m) {
3131
py::class_<SOMAColumn, std::shared_ptr<SOMAColumn>>(m.attr("SOMAColumn"))
3232
.def(
3333
"select_columns",
34-
[](std::shared_ptr<SOMAColumn>& column, ManagedQuery& query) { column->select_columns(query); })
34+
[](std::shared_ptr<SOMAColumn>& column, common::ManagedQuery& query) { column->select_columns(query); })
3535
.def(
3636
"set_dim_points_string_or_bytes",
37-
[](std::shared_ptr<SOMAColumn>& column, ManagedQuery& mq, const std::vector<std::string>& points) {
37+
[](std::shared_ptr<SOMAColumn>& column, common::ManagedQuery& mq, const std::vector<std::string>& points) {
3838
column->set_dim_points<std::string>(mq, points);
3939
})
4040
.def(
4141
"set_dim_points_double",
42-
[](std::shared_ptr<SOMAColumn>& column, ManagedQuery& mq, const std::vector<double_t>& points) {
42+
[](std::shared_ptr<SOMAColumn>& column, common::ManagedQuery& mq, const std::vector<double_t>& points) {
4343
column->set_dim_points<double_t>(mq, points);
4444
})
4545
.def(
4646
"set_dim_points_float",
47-
[](std::shared_ptr<SOMAColumn>& column, ManagedQuery& mq, const std::vector<float_t>& points) {
47+
[](std::shared_ptr<SOMAColumn>& column, common::ManagedQuery& mq, const std::vector<float_t>& points) {
4848
column->set_dim_points<float_t>(mq, points);
4949
})
5050
.def(
5151
"set_dim_points_int64",
52-
[](std::shared_ptr<SOMAColumn>& column, ManagedQuery& mq, const std::vector<int64_t>& points) {
52+
[](std::shared_ptr<SOMAColumn>& column, common::ManagedQuery& mq, const std::vector<int64_t>& points) {
5353
column->set_dim_points<int64_t>(mq, points);
5454
})
5555
.def(
5656
"set_dim_points_int32",
57-
[](std::shared_ptr<SOMAColumn>& column, ManagedQuery& mq, const std::vector<int32_t>& points) {
57+
[](std::shared_ptr<SOMAColumn>& column, common::ManagedQuery& mq, const std::vector<int32_t>& points) {
5858
column->set_dim_points<int32_t>(mq, points);
5959
})
6060
.def(
6161
"set_dim_points_int16",
62-
[](std::shared_ptr<SOMAColumn>& column, ManagedQuery& mq, const std::vector<int16_t>& points) {
62+
[](std::shared_ptr<SOMAColumn>& column, common::ManagedQuery& mq, const std::vector<int16_t>& points) {
6363
column->set_dim_points<int16_t>(mq, points);
6464
})
6565
.def(
6666
"set_dim_points_int8",
67-
[](std::shared_ptr<SOMAColumn>& column, ManagedQuery& mq, const std::vector<int8_t>& points) {
67+
[](std::shared_ptr<SOMAColumn>& column, common::ManagedQuery& mq, const std::vector<int8_t>& points) {
6868
column->set_dim_points<int8_t>(mq, points);
6969
})
7070
.def(
7171
"set_dim_points_uint64",
72-
[](std::shared_ptr<SOMAColumn>& column, ManagedQuery& mq, const std::vector<uint64_t>& points) {
72+
[](std::shared_ptr<SOMAColumn>& column, common::ManagedQuery& mq, const std::vector<uint64_t>& points) {
7373
column->set_dim_points<uint64_t>(mq, points);
7474
})
7575
.def(
7676
"set_dim_points_uint32",
77-
[](std::shared_ptr<SOMAColumn>& column, ManagedQuery& mq, const std::vector<uint32_t>& points) {
77+
[](std::shared_ptr<SOMAColumn>& column, common::ManagedQuery& mq, const std::vector<uint32_t>& points) {
7878
column->set_dim_points<uint32_t>(mq, points);
7979
})
8080
.def(
8181
"set_dim_points_uint16",
82-
[](std::shared_ptr<SOMAColumn>& column, ManagedQuery& mq, const std::vector<uint16_t>& points) {
82+
[](std::shared_ptr<SOMAColumn>& column, common::ManagedQuery& mq, const std::vector<uint16_t>& points) {
8383
column->set_dim_points<uint16_t>(mq, points);
8484
})
8585
.def(
8686
"set_dim_points_uint8",
87-
[](std::shared_ptr<SOMAColumn>& column, ManagedQuery& mq, const std::vector<uint8_t>& points) {
87+
[](std::shared_ptr<SOMAColumn>& column, common::ManagedQuery& mq, const std::vector<uint8_t>& points) {
8888
column->set_dim_points<uint8_t>(mq, points);
8989
})
9090
.def(
9191
"set_dim_points_double_array",
9292
[](std::shared_ptr<SOMAColumn>& column,
93-
ManagedQuery& mq,
93+
common::ManagedQuery& mq,
9494
const std::vector<std::vector<double_t>>& points) {
9595
column->set_dim_points<std::vector<double_t>>(mq, points);
9696
})
9797
.def(
9898
"set_dim_ranges_string_or_bytes",
9999
[](std::shared_ptr<SOMAColumn>& column,
100-
ManagedQuery& mq,
100+
common::ManagedQuery& mq,
101101
const std::vector<std::pair<std::string, std::string>>& ranges) {
102102
column->set_dim_ranges<std::string>(mq, ranges);
103103
})
104104
.def(
105105
"set_dim_ranges_double",
106106
[](std::shared_ptr<SOMAColumn>& column,
107-
ManagedQuery& mq,
107+
common::ManagedQuery& mq,
108108
const std::vector<std::pair<double_t, double_t>>& ranges) {
109109
column->set_dim_ranges<double_t>(mq, ranges);
110110
})
111111
.def(
112112
"set_dim_ranges_float",
113113
[](std::shared_ptr<SOMAColumn>& column,
114-
ManagedQuery& mq,
114+
common::ManagedQuery& mq,
115115
const std::vector<std::pair<float_t, float_t>>& ranges) { column->set_dim_ranges<float_t>(mq, ranges); })
116116
.def(
117117
"set_dim_ranges_int64",
118118
[](std::shared_ptr<SOMAColumn>& column,
119-
ManagedQuery& mq,
119+
common::ManagedQuery& mq,
120120
const std::vector<std::pair<int64_t, int64_t>>& ranges) { column->set_dim_ranges<int64_t>(mq, ranges); })
121121
.def(
122122
"set_dim_ranges_int32",
123123
[](std::shared_ptr<SOMAColumn>& column,
124-
ManagedQuery& mq,
124+
common::ManagedQuery& mq,
125125
const std::vector<std::pair<int32_t, int32_t>>& ranges) { column->set_dim_ranges<int32_t>(mq, ranges); })
126126
.def(
127127
"set_dim_ranges_int16",
128128
[](std::shared_ptr<SOMAColumn>& column,
129-
ManagedQuery& mq,
129+
common::ManagedQuery& mq,
130130
const std::vector<std::pair<int16_t, int16_t>>& ranges) { column->set_dim_ranges<int16_t>(mq, ranges); })
131131
.def(
132132
"set_dim_ranges_int8",
133133
[](std::shared_ptr<SOMAColumn>& column,
134-
ManagedQuery& mq,
134+
common::ManagedQuery& mq,
135135
const std::vector<std::pair<int8_t, int8_t>>& ranges) { column->set_dim_ranges<int8_t>(mq, ranges); })
136136
.def(
137137
"set_dim_ranges_uint64",
138138
[](std::shared_ptr<SOMAColumn>& column,
139-
ManagedQuery& mq,
139+
common::ManagedQuery& mq,
140140
const std::vector<std::pair<uint64_t, uint64_t>>& ranges) {
141141
column->set_dim_ranges<uint64_t>(mq, ranges);
142142
})
143143
.def(
144144
"set_dim_ranges_uint32",
145145
[](std::shared_ptr<SOMAColumn>& column,
146-
ManagedQuery& mq,
146+
common::ManagedQuery& mq,
147147
const std::vector<std::pair<uint32_t, uint32_t>>& ranges) {
148148
column->set_dim_ranges<uint32_t>(mq, ranges);
149149
})
150150
.def(
151151
"set_dim_ranges_uint16",
152152
[](std::shared_ptr<SOMAColumn>& column,
153-
ManagedQuery& mq,
153+
common::ManagedQuery& mq,
154154
const std::vector<std::pair<uint16_t, uint16_t>>& ranges) {
155155
column->set_dim_ranges<uint16_t>(mq, ranges);
156156
})
157157
.def(
158158
"set_dim_ranges_uint8",
159159
[](std::shared_ptr<SOMAColumn>& column,
160-
ManagedQuery& mq,
160+
common::ManagedQuery& mq,
161161
const std::vector<std::pair<uint8_t, uint8_t>>& ranges) { column->set_dim_ranges<uint8_t>(mq, ranges); })
162162
.def(
163163
"set_dim_ranges_double_array",
164164
[](std::shared_ptr<SOMAColumn>& column,
165-
ManagedQuery& mq,
165+
common::ManagedQuery& mq,
166166
const std::vector<std::pair<std::vector<double_t>, std::vector<double_t>>>& ranges) {
167167
column->set_dim_ranges<std::vector<double_t>>(mq, ranges);
168168
})
169169
.def(
170170
"set_dim_points_arrow",
171171
[](std::shared_ptr<SOMAColumn>& column,
172-
ManagedQuery& mq,
172+
common::ManagedQuery& mq,
173173
py::object py_arrow_array,
174174
int partition_index,
175175
int partition_count) {

apis/python/tests/test_dataframe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -869,7 +869,7 @@ def test_extend_enumeration_values(tmp_path, extend_not_write, ordered):
869869
]:
870870
with pytest.raises(
871871
soma.SOMAError,
872-
match=r"null values are not supported",
872+
match=r"Null values are not supported",
873873
):
874874
sdf.extend_enumeration_values(nvalues)
875875

@@ -1006,7 +1006,7 @@ def test_extend_enumeration_values_deduplication(tmp_path, deduplicate, ordered,
10061006
values = {
10071007
"float32_enum": pa.array([quiet_nan, quiet_nan], type=pa.float32()),
10081008
}
1009-
with pytest.raises(soma.SOMAError):
1009+
with pytest.raises(ValueError):
10101010
sdf.extend_enumeration_values(values)
10111011

10121012
# Core treats these as distinct so we do too

apis/python/tests/test_io.py

Lines changed: 93 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,32 +25,113 @@ def src_matrix(request):
2525
"cap_nbytes",
2626
[1, 100, 1_000],
2727
)
28-
def test_write_arrow_table(tmp_path, num_rows, cap_nbytes):
28+
@pytest.mark.parametrize(
29+
"schema",
30+
[
31+
pa.schema(
32+
[
33+
("bool", pa.bool_()),
34+
("int32", pa.int32()),
35+
("float64", pa.float64()),
36+
("string", pa.string()),
37+
],
38+
),
39+
pa.schema(
40+
[
41+
("bool_enum", pa.dictionary(pa.int8(), pa.bool_())),
42+
("int32_enum", pa.dictionary(pa.int8(), pa.int32())),
43+
("float64_enum", pa.dictionary(pa.int8(), pa.float64())),
44+
("string_enum", pa.dictionary(pa.int8(), pa.string())),
45+
],
46+
),
47+
],
48+
)
49+
def test_write_arrow_table(tmp_path, num_rows, cap_nbytes, schema):
50+
"""
51+
Additional focus-testing for tiledbsoma.io._write_arrow_table
52+
"""
53+
54+
pydict = {}
55+
pydict["soma_joinid"] = list(range(num_rows))
56+
pydict["bool"] = [bool(e % 2) for e in range(num_rows)]
57+
pydict["int32"] = [(e + 1) * 10 for e in range(num_rows)]
58+
pydict["float64"] = [(e + 1) / 25 for e in range(num_rows)]
59+
pydict["string"] = [str((e + 1) / 25) for e in range(num_rows)]
60+
pydict["bool_enum"] = [bool(e % 2) for e in range(num_rows)]
61+
pydict["int32_enum"] = [((e + 1) % 24) * 10 for e in range(num_rows)]
62+
pydict["float64_enum"] = [((e + 1) % 24) / 25 for e in range(num_rows)]
63+
pydict["string_enum"] = [str(((e + 1) % 24) / 25) for e in range(num_rows)]
64+
65+
tcopt = soma.TileDBCreateOptions(remote_cap_nbytes=cap_nbytes)
66+
twopt = soma.TileDBWriteOptions()
67+
uri = tmp_path.as_posix()
68+
69+
table = pa.Table.from_pydict(pydict, schema=schema.insert(0, pa.field("soma_joinid", pa.int64())))
70+
expect_error = cap_nbytes < table[:1].nbytes # Not enough room for even one row
71+
domain = [[0, max(1, len(table) - 1)]]
72+
73+
with soma.DataFrame.create(uri, schema=schema, domain=domain) as sdf:
74+
if expect_error:
75+
with pytest.raises(soma.SOMAError):
76+
somaio.ingest._write_arrow_table(table, sdf, tcopt, twopt)
77+
else:
78+
somaio.ingest._write_arrow_table(table, sdf, tcopt, twopt)
79+
80+
if not expect_error:
81+
with soma.DataFrame.open(uri) as sdf:
82+
pdf = sdf.read().concat().to_pandas()
83+
84+
for column in pdf.columns.to_list():
85+
assert list(pdf[column]) == pydict[column]
86+
87+
88+
@pytest.mark.parametrize(
89+
"num_rows",
90+
[0, 1, 2, 3, 4, 10, 100, 1_000],
91+
)
92+
@pytest.mark.parametrize(
93+
"cap_nbytes",
94+
[1, 100, 1_000],
95+
)
96+
def test_write_arrow_table_enum_to_values(tmp_path, num_rows, cap_nbytes):
2997
"""
3098
Additional focus-testing for tiledbsoma.io._write_arrow_table
3199
"""
32100

33-
schema = pa.schema(
101+
array_schema = pa.schema(
34102
[
35-
("foo", pa.int32()),
36-
("bar", pa.float64()),
103+
("bool", pa.bool_()),
104+
("int32", pa.int32()),
105+
("float64", pa.float64()),
106+
("string", pa.string()),
107+
],
108+
)
109+
110+
data_schema = pa.schema(
111+
[
112+
("bool", pa.dictionary(pa.int8(), pa.bool_())),
113+
("int32", pa.dictionary(pa.int8(), pa.int32())),
114+
("float64", pa.dictionary(pa.int8(), pa.float64())),
115+
("string", pa.dictionary(pa.int8(), pa.string())),
37116
],
38117
)
39118

40119
pydict = {}
41120
pydict["soma_joinid"] = list(range(num_rows))
42-
pydict["foo"] = [(e + 1) * 10 for e in range(num_rows)]
43-
pydict["bar"] = [(e + 1) / 25 for e in range(num_rows)]
121+
pydict["bool"] = [bool(e % 2) for e in range(num_rows)]
122+
pydict["int32"] = [((e + 1) % 24) * 10 for e in range(num_rows)]
123+
pydict["float64"] = [((e + 1) % 24) / 25 for e in range(num_rows)]
124+
pydict["string"] = [str(((e + 1) % 24) / 25) for e in range(num_rows)]
44125

45126
tcopt = soma.TileDBCreateOptions(remote_cap_nbytes=cap_nbytes)
46127
twopt = soma.TileDBWriteOptions()
47128
uri = tmp_path.as_posix()
48-
expect_error = cap_nbytes == 1 and num_rows > 0 # Not enough room for even one row
49129

50-
table = pa.Table.from_pydict(pydict, schema=schema.insert(0, pa.field("soma_joinid", pa.int64())))
130+
table = pa.Table.from_pydict(pydict, schema=data_schema.insert(0, pa.field("soma_joinid", pa.int64())))
131+
expect_error = cap_nbytes < table[:1].nbytes # Not enough room for even one row
51132
domain = [[0, max(1, len(table) - 1)]]
52133

53-
with soma.DataFrame.create(uri, schema=schema, domain=domain) as sdf:
134+
with soma.DataFrame.create(uri, schema=array_schema, domain=domain) as sdf:
54135
if expect_error:
55136
with pytest.raises(soma.SOMAError):
56137
somaio.ingest._write_arrow_table(table, sdf, tcopt, twopt)
@@ -60,7 +141,9 @@ def test_write_arrow_table(tmp_path, num_rows, cap_nbytes):
60141
if not expect_error:
61142
with soma.DataFrame.open(uri) as sdf:
62143
pdf = sdf.read().concat().to_pandas()
63-
assert list(pdf["foo"]) == pydict["foo"]
144+
145+
for column in pdf.columns.to_list():
146+
assert list(pdf[column]) == pydict[column]
64147

65148

66149
def test_add_matrices(tmp_path, conftest_pbmc_small_h5ad_path):

0 commit comments

Comments
 (0)