Skip to content

Commit 5f96293

Browse files
committed
[?] Correct Subarray With Negative Domains on Dense Arrays
* We currently support Python negative indexing ie. where -1 returns the last element in the sequence. However, this conflicts with reading and writing with negative subarray values -- indexing with negative values errors out. * This PR deprecates the usage of Pythoh negative indexing so that dense arrays with negative domains can be indexed by TileDB dimensions correctly.
1 parent 656f54f commit 5f96293

File tree

2 files changed

+68
-26
lines changed

2 files changed

+68
-26
lines changed

tiledb/libtiledb.pyx

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,8 @@ def index_domain_subarray(array: Array, dom, idx: tuple):
589589
Return a numpy array representation of the tiledb subarray buffer
590590
for a given domain and tuple of index slices
591591
"""
592+
offsetted = False
593+
592594
ndim = dom.ndim
593595
if len(idx) != ndim:
594596
raise IndexError("number of indices does not match domain rank: "
@@ -646,14 +648,11 @@ def index_domain_subarray(array: Array, dom, idx: tuple):
646648
raise IndexError("cannot index integral domain dimension with floating point slice")
647649
elif not isinstance(start, _inttypes):
648650
raise IndexError("cannot index integral domain dimension with non-integral slice (dtype: {})".format(type(start)))
649-
# apply negative indexing (wrap-around semantics)
650-
if not is_datetime and start < 0:
651-
start += int(dim_ub) + 1
652651
if start < dim_lb:
653652
# numpy allows start value < the array dimension shape,
654653
# clamp to lower bound of dimension domain
655-
#start = dim_lb
656-
raise IndexError("index out of bounds <todo>")
654+
start = dim_lb
655+
# raise IndexError("index out of bounds <todo>")
657656
else:
658657
start = dim_lb
659658
if stop is not None:
@@ -665,8 +664,6 @@ def index_domain_subarray(array: Array, dom, idx: tuple):
665664
raise IndexError("cannot index integral domain dimension with floating point slice")
666665
elif not isinstance(start, _inttypes):
667666
raise IndexError("cannot index integral domain dimension with non-integral slice (dtype: {})".format(type(start)))
668-
if not is_datetime and stop < 0:
669-
stop += dim_ub
670667
if stop > dim_ub:
671668
# numpy allows stop value > than the array dimension shape,
672669
# clamp to upper bound of dimension domain

tiledb/tests/test_libtiledb.py

Lines changed: 64 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -464,19 +464,19 @@ def test_array_1d(self):
464464
assert_array_equal(A, T[slice(None)])
465465
assert_array_equal(A[:10], T[:10])
466466
assert_array_equal(A[10:20], T[10:20])
467-
assert_array_equal(A[-10:], T[-10:])
467+
assert_array_equal(A[-10:], T[1050-10:])
468468

469469
# ellipsis
470470
assert_array_equal(A[:10, ...], T[:10, ...])
471471
assert_array_equal(A[10:50, ...], T[10:50, ...])
472-
assert_array_equal(A[-50:, ...], T[-50:, ...])
472+
assert_array_equal(A[-50:, ...], T[1050-50:, ...])
473473
assert_array_equal(A[..., :10], T[..., :10])
474474
assert_array_equal(A[..., 10:20], T[..., 10:20])
475-
assert_array_equal(A[..., -50:], T[..., -50:])
475+
assert_array_equal(A[..., -50:], T[..., 1050-50:])
476476

477477
# across tiles
478478
assert_array_equal(A[:150], T[:150])
479-
assert_array_equal(A[-250:], T[-250:])
479+
assert_array_equal(A[-250:], T[1050-250:])
480480

481481
# point index
482482
self.assertEqual(A[0], T[0])
@@ -503,7 +503,7 @@ def test_array_1d(self):
503503
# basic step
504504
assert_array_equal(A[:50:2], T[:50:2])
505505
assert_array_equal(A[:2:50], T[:2:50])
506-
assert_array_equal(A[10:-1:50], T[10:-1:50])
506+
assert_array_equal(A[10:-1:50], T[10:1050:50])
507507

508508
# indexing errors
509509
with self.assertRaises(IndexError):
@@ -637,46 +637,46 @@ def test_array_2d(self):
637637
assert_array_equal(A[:10], T[:10])
638638
assert_array_equal(A[:10], T[:10])
639639
assert_array_equal(A[10:20], T[10:20])
640-
assert_array_equal(A[-10:], T[-10:])
640+
assert_array_equal(A[-10:], T[1000-10:])
641641
assert_array_equal(A[:10, :], T[:10, :])
642642
assert_array_equal(A[10:20, :], T[10:20, :])
643-
assert_array_equal(A[-10:, :], T[-10:, :])
643+
assert_array_equal(A[-10:, :], T[1000-10:, :])
644644
assert_array_equal(A[:10, ...], T[:10, ...])
645645
assert_array_equal(A[10:20, ...], T[10:20, ...])
646-
assert_array_equal(A[-10:, ...], T[-10:, ...])
646+
assert_array_equal(A[-10:, ...], T[1000-10:, ...])
647647
assert_array_equal(A[:10, :, ...], T[:10, :, ...])
648648
assert_array_equal(A[10:20, :, ...], T[10:20, :, ...])
649-
assert_array_equal(A[-10:, :, ...], T[-10:, :, ...])
649+
assert_array_equal(A[-10:, :, ...], T[1000-10:, :, ...])
650650

651651
# slice second dimension
652652
assert_array_equal(A[:, :2], T[:, :2])
653653
assert_array_equal(A[:, 2:4], T[:, 2:4])
654-
assert_array_equal(A[:, -2:], T[:, -2:])
654+
assert_array_equal(A[:, -2:], T[:, 10-2:])
655655
assert_array_equal(A[..., :2], T[..., :2])
656656
assert_array_equal(A[..., 2:4], T[..., 2:4])
657-
assert_array_equal(A[..., -2:], T[..., -2:])
657+
assert_array_equal(A[..., -2:], T[..., 10-2:])
658658
assert_array_equal(A[:, ..., :2], T[:, ..., :2])
659659
assert_array_equal(A[:, ..., 2:4], T[:, ..., 2:4])
660-
assert_array_equal(A[:, ..., -2:], T[:, ..., -2:])
660+
assert_array_equal(A[:, ..., -2:], T[:, ..., 10-2:])
661661

662662
# slice both dimensions
663663
assert_array_equal(A[:10, :2], T[:10, :2])
664664
assert_array_equal(A[10:20, 2:4], T[10:20, 2:4])
665-
assert_array_equal(A[-10:, -2:], T[-10:, -2:])
665+
assert_array_equal(A[-10:, -2:], T[1000-10:, 10-2:])
666666

667667
# slice across tile boundries
668668
assert_array_equal(A[:110], T[:110])
669669
assert_array_equal(A[190:310], T[190:310])
670-
assert_array_equal(A[-110:], T[-110:])
670+
assert_array_equal(A[-110:], T[1000-110:])
671671
assert_array_equal(A[:110, :], T[:110, :])
672672
assert_array_equal(A[190:310, :], T[190:310, :])
673-
assert_array_equal(A[-110:, :], T[-110:, :])
673+
assert_array_equal(A[-110:, :], T[1000-110:, :])
674674
assert_array_equal(A[:, :3], T[:, :3])
675675
assert_array_equal(A[:, 3:7], T[:, 3:7])
676-
assert_array_equal(A[:, -3:], T[:, -3:])
676+
assert_array_equal(A[:, -3:], T[:, 10-3:])
677677
assert_array_equal(A[:110, :3], T[:110, :3])
678678
assert_array_equal(A[190:310, 3:7], T[190:310, 3:7])
679-
assert_array_equal(A[-110:, -3:], T[-110:, -3:])
679+
assert_array_equal(A[-110:, -3:], T[1000-110:, 10-3:])
680680

681681
# single row/col/item
682682
assert_array_equal(A[0], T[0])
@@ -1247,7 +1247,7 @@ def test_varlen_sparse_all_empty_strings(self):
12471247

12481248
with tiledb.open(uri, mode="r") as T:
12491249
# check interior range
1250-
assert_array_equal(A[1:-1], T[2:-1]["a1"])
1250+
assert_array_equal(A[1:-1], T[1 : dim_len - 1]["a1"])
12511251
assert_array_equal(A[1:-1], T.multi_index[2 : dim_len - 1]["a1"])
12521252

12531253
def test_varlen_write_unicode(self):
@@ -1521,6 +1521,51 @@ def test_array_varlen_2d_s_fixed(self):
15211521
with tiledb.DenseArray(uri) as T:
15221522
assert_array_equal(A, T)
15231523

1524+
@pytest.mark.parametrize("lo,hi", [(-20, -10), (-10, 0), (0, 10), (10, 20)])
1525+
def test_dense_array_with_negative_positive_domain(self, lo, hi):
1526+
path = self.path("test_dense_array_with_negative_domain")
1527+
attr = tiledb.Attr(dtype=np.uint8)
1528+
dom = tiledb.Domain(tiledb.Dim("X", domain=(-20, 20), dtype=np.int64))
1529+
schema = tiledb.ArraySchema(domain=dom, sparse=False, attrs=[attr])
1530+
tiledb.Array.create(path, schema)
1531+
data = np.random.randint(10, size=(hi - lo))
1532+
1533+
with tiledb.open(path, "w") as A:
1534+
A[lo:hi] = data
1535+
1536+
with tiledb.open(path, "r") as A:
1537+
assert_array_equal(A[lo:hi], data[:])
1538+
1539+
@pytest.mark.parametrize("lo,hi", [(-20, -10), (-20, -15), (-15, -10), (-17, -13)])
1540+
def test_dense_array_with_negative_negative_domain(self, lo, hi):
1541+
path = self.path("test_dense_array_with_negative_negative_domain")
1542+
attr = tiledb.Attr(dtype=np.uint8)
1543+
dom = tiledb.Domain(tiledb.Dim("X", domain=(-20, -10), dtype=np.int64))
1544+
schema = tiledb.ArraySchema(domain=dom, sparse=False, attrs=[attr])
1545+
tiledb.Array.create(path, schema)
1546+
data = np.random.randint(10, size=(hi - lo))
1547+
1548+
with tiledb.open(path, "w") as A:
1549+
A[lo:hi] = data
1550+
1551+
with tiledb.open(path, "r") as A:
1552+
assert_array_equal(A[lo:hi], data[:])
1553+
1554+
@pytest.mark.parametrize("lo,hi", [(10, 20), (10, 15), (12, 15), (17, 20)])
1555+
def test_dense_array_with_offset_domain(self, lo, hi):
1556+
path = self.path("test_dense_array_with_offset_domain")
1557+
attr = tiledb.Attr(dtype=np.uint8)
1558+
dom = tiledb.Domain(tiledb.Dim("X", domain=(10, 20), dtype=np.int64))
1559+
schema = tiledb.ArraySchema(domain=dom, sparse=False, attrs=[attr])
1560+
tiledb.Array.create(path, schema)
1561+
data = np.random.randint(10, size=(hi - lo))
1562+
1563+
with tiledb.open(path, "w") as A:
1564+
A[lo:hi] = data
1565+
1566+
with tiledb.open(path, "r") as A:
1567+
assert_array_equal(A[lo:hi], data[:])
1568+
15241569

15251570
class TestSparseArray(DiskTestCase):
15261571
@pytest.mark.xfail
@@ -2345,7 +2390,7 @@ def _test_index(self, A, T, idx):
23452390
slice(0, 1050),
23462391
slice(50, 150),
23472392
slice(0, 2000),
2348-
slice(-150, -50),
2393+
slice(1050-150, 1050-50),
23492394
# TODO: indexing failures
23502395
# slice(-2000, 2000),
23512396
# slice(0, 0), # empty result

0 commit comments

Comments
 (0)