Skip to content

Commit 522bb86

Browse files
authored
Fix Bug For QueryCondition With Unselected Boolean Type (#1291)
1 parent dda1d30 commit 522bb86

File tree

3 files changed

+40
-4
lines changed

3 files changed

+40
-4
lines changed

HISTORY.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# In Progress
2+
3+
## Bug Fixes
4+
* Fix issue where querying an array with a Boolean type when `arrow=True`, but is unselected in `.query(attr=...)`, results in an error `pyarrow.lib.ArrowInvalid: Invalid column index to set field.` []()
5+
16
# TileDB-Py 0.17.1 Release Notes
27

38
## API Changes

tiledb/multirange_indexing.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -349,12 +349,16 @@ def _run_query(self) -> Union[DataFrame, Table]:
349349
# this is a workaround to cast TILEDB_BOOL types from uint8
350350
# representation in Arrow to Boolean
351351
schema = table.schema
352-
for n in range(self.array.nattr):
353-
attr = self.array.attr(n)
352+
for attr_or_dim in schema:
353+
if not self.array.schema.has_attr(attr_or_dim.name):
354+
continue
355+
356+
attr = self.array.attr(attr_or_dim.name)
354357
if attr.dtype == bool:
355358
field_idx = schema.get_field_index(attr.name)
356359
field = pyarrow.field(attr.name, pyarrow.bool_())
357360
schema = schema.set(field_idx, field)
361+
358362
table = table.cast(schema)
359363

360364
if self.query.return_arrow:

tiledb/tests/test_query_condition.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import pytest
22

3-
import math
43
import numpy as np
5-
from numpy.testing import assert_array_equal
64
import string
75

86
import tiledb
@@ -638,3 +636,32 @@ def test_dense_datetime(self):
638636
result = A.query(attr_cond=qc).df[:]
639637

640638
assert all(self.filter_dense(result["dates"], dt_mask) == A[idx]["dates"])
639+
640+
def test_array_with_bool_but_unused(self):
641+
path = self.path("test_array_with_bool_but_unused")
642+
643+
dom = tiledb.Domain(
644+
tiledb.Dim(name="d", domain=(1, 3), tile=1, dtype=np.uint32)
645+
)
646+
attrs = [
647+
tiledb.Attr(name="myint", dtype=int),
648+
tiledb.Attr(name="mystr", dtype=str),
649+
tiledb.Attr(name="mybool", dtype=bool),
650+
]
651+
652+
schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True)
653+
tiledb.Array.create(path, schema)
654+
655+
data = {
656+
"myint": np.asarray([10, 20, 30]),
657+
"mystr": np.asarray(["apple", "ball", "cat"]),
658+
"mybool": np.asarray([True, False, True]),
659+
}
660+
661+
with tiledb.open(path, "w") as A:
662+
A[np.arange(1, 4)] = data
663+
664+
with tiledb.open(path) as A:
665+
qc = tiledb.QueryCondition("myint > 10")
666+
result = A.query(attr_cond=qc, attrs=["myint"])[:]
667+
assert all(result["myint"] > 10)

0 commit comments

Comments
 (0)