Skip to content

Commit f54118e

Browse files
committed
Support datetime64 For QueryCondition
* The datetime64 dtypes are now internally casted to UNIX seconds
1 parent 02b7ac5 commit f54118e

File tree

3 files changed

+76
-36
lines changed

3 files changed

+76
-36
lines changed

HISTORY.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# In Progress
2+
3+
## API Changes
4+
* Support `datetime64` for `QueryCondition` [#1279](https://github.com/TileDB-Inc/TileDB-Py/pull/1279)
5+
16
# TileDB-Py 0.17.0 Release Notes
27

38
## TileDB Embedded updates:

tiledb/query_condition.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ class QueryCondition:
8383
8484
``attr ::= <variable> | attr(<str>)``
8585
86-
Values are any Python-valid number or string. They may also be casted with ``val()``.
86+
Values are any Python-valid number or string. datetime64 values should first be
87+
casted to UNIX seconds. Vales may also be casted with ``val()``.
8788
8889
``val ::= <num> | <str> | val(val)``
8990
@@ -362,14 +363,20 @@ def cast_val_to_dtype(
362363
# casted to numeric types
363364
if isinstance(val, str):
364365
raise tiledb.TileDBError(f"Cannot cast `{val}` to {dtype}.")
365-
cast = getattr(np, dtype)
366+
if np.issubdtype(dtype, np.datetime64):
367+
cast = getattr(np, "uint64")
368+
else:
369+
cast = getattr(np, dtype)
366370
val = cast(val)
367371
except ValueError:
368372
raise tiledb.TileDBError(f"Cannot cast `{val}` to {dtype}.")
369373

370374
return val
371375

372376
def init_pyqc(self, pyqc: PyQueryCondition, dtype: str) -> Callable:
377+
if np.issubdtype(dtype, np.datetime64):
378+
dtype = "uint64"
379+
373380
init_fn_name = f"init_{dtype}"
374381

375382
if not hasattr(pyqc, init_fn_name):

tiledb/tests/test_query_condition.py

Lines changed: 62 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import math
44
import numpy as np
55
from numpy.testing import assert_array_equal
6+
import pandas as pd
67
import string
78

89
import tiledb
@@ -11,12 +12,14 @@
1112

1213

1314
class QueryConditionTest(DiskTestCase):
14-
def filter_sparse(self, data, mask):
15+
def filter_dense(self, data, mask):
1516
if isinstance(mask, np.ndarray):
1617
mask = mask[0]
1718

1819
if isinstance(mask, float) and np.isnan(mask):
1920
return data[np.invert(np.isnan(data))]
21+
elif np.isnat(mask):
22+
return data[np.invert(np.isnat(data))]
2023
else:
2124
return data[data != mask]
2225

@@ -122,7 +125,7 @@ def test_unsigned_dense(self):
122125

123126
qc = tiledb.QueryCondition("U < 5")
124127
result = A.query(attr_cond=qc, attrs=["U"])[:]
125-
assert all(self.filter_sparse(result["U"], mask) < 5)
128+
assert all(self.filter_dense(result["U"], mask) < 5)
126129

127130
def test_signed_sparse(self):
128131
uri = self.create_input_array_UIDSA(sparse=True)
@@ -153,20 +156,20 @@ def test_signed_dense(self):
153156

154157
qc = tiledb.QueryCondition("I < 1")
155158
result = A.query(attr_cond=qc, attrs=["I"])[:]
156-
assert all(self.filter_sparse(result["I"], mask) < 1)
159+
assert all(self.filter_dense(result["I"], mask) < 1)
157160

158161
qc = tiledb.QueryCondition("I < +1")
159162
result = A.query(attr_cond=qc, attrs=["I"])[:]
160-
assert all(self.filter_sparse(result["I"], mask) < +1)
163+
assert all(self.filter_dense(result["I"], mask) < +1)
161164

162165
qc = tiledb.QueryCondition("I < ---1")
163166
result = A.query(attr_cond=qc, attrs=["I"])[:]
164-
assert all(self.filter_sparse(result["I"], mask) < ---1)
167+
assert all(self.filter_dense(result["I"], mask) < ---1)
165168

166169
qc = tiledb.QueryCondition("-5 < I < 5")
167170
result = A.query(attr_cond=qc, attrs=["I"])[:]
168-
assert all(-5 < self.filter_sparse(result["I"], mask))
169-
assert all(self.filter_sparse(result["I"], mask) < 5)
171+
assert all(-5 < self.filter_dense(result["I"], mask))
172+
assert all(self.filter_dense(result["I"], mask) < 5)
170173

171174
def test_floats_sparse(self):
172175
with tiledb.open(self.create_input_array_UIDSA(sparse=True)) as A:
@@ -189,17 +192,17 @@ def test_floats_dense(self):
189192

190193
qc = tiledb.QueryCondition("D > 5.0")
191194
result = A.query(attr_cond=qc, attrs=["D"])[:]
192-
assert all(self.filter_sparse(result["D"], mask) > 5.0)
195+
assert all(self.filter_dense(result["D"], mask) > 5.0)
193196

194197
qc = tiledb.QueryCondition("(D > 0.7) & (D < 3.5)")
195198
result = A.query(attr_cond=qc, attrs=["D"])[:]
196-
assert all(self.filter_sparse(result["D"], mask) > 0.7)
197-
assert all(self.filter_sparse(result["D"], mask) < 3.5)
199+
assert all(self.filter_dense(result["D"], mask) > 0.7)
200+
assert all(self.filter_dense(result["D"], mask) < 3.5)
198201

199202
qc = tiledb.QueryCondition("0.2 < D < 0.75")
200203
result = A.query(attr_cond=qc, attrs=["D", "D"])[:]
201-
assert all(0.2 < self.filter_sparse(result["D"], mask))
202-
assert all(self.filter_sparse(result["D"], mask) < 0.75)
204+
assert all(0.2 < self.filter_dense(result["D"], mask))
205+
assert all(self.filter_dense(result["D"], mask) < 0.75)
203206

204207
def test_string_sparse(self):
205208
with tiledb.open(self.create_input_array_UIDSA(sparse=True)) as A:
@@ -217,11 +220,11 @@ def test_string_dense(self):
217220
with tiledb.open(self.create_input_array_UIDSA(sparse=False)) as A:
218221
qc = tiledb.QueryCondition("S == 'c'")
219222
result = A.query(attr_cond=qc, attrs=["S"])[:]
220-
assert all(self.filter_sparse(result["S"], A.attr("S").fill) == b"c")
223+
assert all(self.filter_dense(result["S"], A.attr("S").fill) == b"c")
221224

222225
qc = tiledb.QueryCondition("A == 'a'")
223226
result = A.query(attr_cond=qc, attrs=["A"])[:]
224-
assert all(self.filter_sparse(result["A"], A.attr("A").fill) == b"a")
227+
assert all(self.filter_dense(result["A"], A.attr("A").fill) == b"a")
225228

226229
def test_combined_types_sparse(self):
227230
with tiledb.open(self.create_input_array_UIDSA(sparse=True)) as A:
@@ -256,34 +259,34 @@ def test_combined_types_dense(self):
256259

257260
qc = tiledb.QueryCondition("(I > 0) & ((-3 < D) & (D < 3.0))")
258261
result = A.query(attr_cond=qc, attrs=["I", "D"])[:]
259-
res_I = self.filter_sparse(result["I"], mask_I)
260-
res_D = self.filter_sparse(result["D"], mask_D)
262+
res_I = self.filter_dense(result["I"], mask_I)
263+
res_D = self.filter_dense(result["D"], mask_D)
261264
assert all(res_I > 0) & all(-3 < res_D) & all(res_D < 3.0)
262265

263266
qc = tiledb.QueryCondition("U >= 3 and 0.7 < D")
264267
result = A.query(attr_cond=qc, attrs=["U", "D"])[:]
265-
res_U = self.filter_sparse(result["U"], mask_U)
266-
res_D = self.filter_sparse(result["D"], mask_D)
268+
res_U = self.filter_dense(result["U"], mask_U)
269+
res_D = self.filter_dense(result["D"], mask_D)
267270
assert all(res_U >= 3) & all(0.7 < res_D)
268271

269272
qc = tiledb.QueryCondition("(0.2 < D and D < 0.75) and (-5 < I < 5)")
270273
result = A.query(attr_cond=qc, attrs=["D", "I"])[:]
271-
res_D = self.filter_sparse(result["D"], mask_D)
272-
res_I = self.filter_sparse(result["I"], mask_I)
274+
res_D = self.filter_dense(result["D"], mask_D)
275+
res_I = self.filter_dense(result["I"], mask_I)
273276
assert all((0.2 < res_D) & (res_D < 0.75))
274277
assert all((-5 < res_I) & (res_I < 5))
275278

276279
qc = tiledb.QueryCondition("(-5 < I <= -1) and (0.2 < D < 0.75)")
277280
result = A.query(attr_cond=qc, attrs=["D", "I"])[:]
278-
res_D = self.filter_sparse(result["D"], mask_D)
279-
res_I = self.filter_sparse(result["I"], mask_I)
281+
res_D = self.filter_dense(result["D"], mask_D)
282+
res_I = self.filter_dense(result["I"], mask_I)
280283
assert all((0.2 < res_D) & (res_D < 0.75))
281284
assert all((-5 < res_I) & (res_I <= -1))
282285

283286
qc = tiledb.QueryCondition("(0.2 < D < 0.75) and (-5 < I < 5)")
284287
result = A.query(attr_cond=qc, attrs=["D", "I"])[:]
285-
res_D = self.filter_sparse(result["D"], mask_D)
286-
res_I = self.filter_sparse(result["I"], mask_I)
288+
res_D = self.filter_dense(result["D"], mask_D)
289+
res_I = self.filter_dense(result["I"], mask_I)
287290
assert all((0.2 < res_D) & (res_D < 0.75))
288291
assert all((-5 < res_I) & (res_I < 5))
289292

@@ -315,11 +318,11 @@ def test_check_attrs_dense(self):
315318

316319
qc = tiledb.QueryCondition("U < 0.1")
317320
result = A.query(attr_cond=qc, attrs=["U"])[:]
318-
assert all(self.filter_sparse(result["U"], mask) < 0.1)
321+
assert all(self.filter_dense(result["U"], mask) < 0.1)
319322

320323
qc = tiledb.QueryCondition("U < 1.0")
321324
result = A.query(attr_cond=qc, attrs=["U"])[:]
322-
assert all(self.filter_sparse(result["U"], mask) < 1.0)
325+
assert all(self.filter_dense(result["U"], mask) < 1.0)
323326

324327
with self.assertRaises(tiledb.TileDBError):
325328
qc = tiledb.QueryCondition("U < '1'")
@@ -485,12 +488,12 @@ def test_or_dense(self):
485488

486489
qc = tiledb.QueryCondition("(D < 0.25) | (D > 0.75)")
487490
result = A.query(attr_cond=qc, attrs=["D"])[:]
488-
res = self.filter_sparse(result["D"], mask)
491+
res = self.filter_dense(result["D"], mask)
489492
assert all((res < 0.25) | (res > 0.75))
490493

491494
qc = tiledb.QueryCondition("(D < 0.25) or (D > 0.75)")
492495
result = A.query(attr_cond=qc, attrs=["D"])[:]
493-
res = self.filter_sparse(result["D"], mask)
496+
res = self.filter_dense(result["D"], mask)
494497
assert all((res < 0.25) | (res > 0.75))
495498

496499
@pytest.mark.skipif(
@@ -584,26 +587,51 @@ def test_in_operator_dense(self):
584587

585588
qc = tiledb.QueryCondition("U in [1, 2, 3]")
586589
result = A.query(attr_cond=qc, attrs=["U"])[:]
587-
for val in self.filter_sparse(result["U"], U_mask):
590+
for val in self.filter_dense(result["U"], U_mask):
588591
assert val in [1, 2, 3]
589592

590593
qc = tiledb.QueryCondition("S in ['a', 'e', 'i', 'o', 'u']")
591594
result = A.query(attr_cond=qc, attrs=["S"])[:]
592-
for val in self.filter_sparse(result["S"], S_mask):
595+
for val in self.filter_dense(result["S"], S_mask):
593596
assert val in [b"a", b"e", b"i", b"o", b"u"]
594597

595598
qc = tiledb.QueryCondition(
596599
"S in ['a', 'e', 'i', 'o', 'u'] and U in [5, 6, 7]"
597600
)
598601
result = A.query(attr_cond=qc)[:]
599-
for val in self.filter_sparse(result["U"], U_mask):
602+
for val in self.filter_dense(result["U"], U_mask):
600603
assert val in [5, 6, 7]
601-
for val in self.filter_sparse(result["S"], S_mask):
604+
for val in self.filter_dense(result["S"], S_mask):
602605
assert val in [b"a", b"e", b"i", b"o", b"u"]
603606

604607
result = A.query(attr_cond=tiledb.QueryCondition("U in [8]"))[:]
605-
for val in self.filter_sparse(result["U"], U_mask):
608+
for val in self.filter_dense(result["U"], U_mask):
606609
assert val == 8
607610

608611
result = A.query(attr_cond=tiledb.QueryCondition("S in ['8']"))[:]
609-
assert len(self.filter_sparse(result["S"], S_mask)) == 0
612+
assert len(self.filter_dense(result["S"], S_mask)) == 0
613+
614+
def test_dense_datetime(self):
615+
uri = self.path("query-filter-dense-datetime.tdb")
616+
617+
data = pd.DataFrame(
618+
np.random.randint(438923600, 243892360000, 20),
619+
columns=["dates"],
620+
)
621+
622+
tiledb.from_pandas(
623+
uri,
624+
data,
625+
column_types={"dates": "datetime64[ns]"},
626+
)
627+
628+
with tiledb.open(uri) as A:
629+
idx = 5
630+
631+
dt_mask = A.attr("dates").fill
632+
search_date = data["dates"][idx]
633+
634+
qc = tiledb.QueryCondition(f"dates == {search_date}")
635+
result = A.query(attr_cond=qc).df[:]
636+
637+
assert all(self.filter_dense(result["dates"], dt_mask) == A[idx]["dates"])

0 commit comments

Comments
 (0)