Skip to content

Commit f4fb64f

Browse files
committed
Fix default validity for non-pandas write to nullable attribute
1 parent 287e6df commit f4fb64f

File tree

3 files changed

+44
-15
lines changed

3 files changed

+44
-15
lines changed

HISTORY.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
# TileDB-Py 0.14.0 Release Notes
1+
# In-progress
2+
3+
## Bug Fixes
4+
* Fix default validity for write to nullable attribute [#994]((https://github.com/TileDB-Inc/TileDB-Py/pull/994)
25

36
## API Changes
47
* Addition of `ArraySchema.version` to get version of array schema [#949](https://github.com/TileDB-Inc/TileDB-Py/pull/949)

tiledb/libtiledb.pyx

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1629,7 +1629,7 @@ cdef class Attr(object):
16291629
check_error(self.ctx,
16301630
tiledb_attribute_get_filter_list(self.ctx.ptr, self.ptr, &filter_list_ptr))
16311631

1632-
return FilterList(PyCapsule_New(filter_list_ptr, "fl", NULL),
1632+
return FilterList(PyCapsule_New(filter_list_ptr, "fl", NULL),
16331633
is_capsule=True, ctx=self.ctx)
16341634

16351635
@property
@@ -1867,7 +1867,7 @@ cdef class Dim(object):
18671867
if tile_size_array.size != 1:
18681868
raise ValueError("tile extent must be a scalar")
18691869
tile_size_ptr = np.PyArray_DATA(tile_size_array)
1870-
1870+
18711871
cdef tiledb_filter_list_t* filter_list_ptr = NULL
18721872
try:
18731873
check_error(ctx,
@@ -2028,7 +2028,7 @@ cdef class Dim(object):
20282028
check_error(self.ctx,
20292029
tiledb_dimension_get_filter_list(self.ctx.ptr, self.ptr, &filter_list_ptr))
20302030

2031-
return FilterList(PyCapsule_New(filter_list_ptr, "fl", NULL),
2031+
return FilterList(PyCapsule_New(filter_list_ptr, "fl", NULL),
20322032
is_capsule=True, ctx=self.ctx)
20332033

20342034
cdef unsigned int _cell_val_num(Dim self) except? 0:
@@ -2664,7 +2664,7 @@ cdef class ArraySchema(object):
26642664
if allows_duplicates:
26652665
ballows_dups = 1
26662666
tiledb_array_schema_set_allows_dups(ctx.ptr, schema_ptr, ballows_dups)
2667-
2667+
26682668
if not isinstance(domain, Domain):
26692669
raise TypeError("'domain' must be an instance of Domain (domain is: '{}')".format(domain))
26702670
cdef tiledb_domain_t* domain_ptr = (<Domain> domain).ptr
@@ -2694,10 +2694,10 @@ cdef class ArraySchema(object):
26942694
filter_list.__capsule__(), "fl")
26952695
check_error(ctx,
26962696
tiledb_array_schema_set_coords_filter_list(ctx.ptr, schema_ptr, filter_list_ptr))
2697-
2697+
26982698
check_error(self.ctx,
26992699
tiledb_domain_get_ndim(ctx.ptr, domain_ptr, &ndim))
2700-
2700+
27012701
if not isinstance(coords_filters, FilterList):
27022702
coords_filters = FilterList(coords_filters, ctx=ctx)
27032703
filter_list = coords_filters
@@ -2733,7 +2733,7 @@ cdef class ArraySchema(object):
27332733
if rc != TILEDB_OK:
27342734
tiledb_array_schema_free(&schema_ptr)
27352735
_raise_ctx_err(ctx.ptr, rc)
2736-
2736+
27372737
cdef tiledb_attribute_t* attr_ptr = NULL
27382738
cdef Attr attribute
27392739
for attr in attrs:
@@ -2946,7 +2946,7 @@ cdef class ArraySchema(object):
29462946
tiledb_array_schema_get_offsets_filter_list(
29472947
self.ctx.ptr, self.ptr, &filter_list_ptr))
29482948
return FilterList(
2949-
PyCapsule_New(filter_list_ptr, "fl", NULL),
2949+
PyCapsule_New(filter_list_ptr, "fl", NULL),
29502950
is_capsule=True, ctx=self.ctx)
29512951

29522952
@property
@@ -2961,20 +2961,20 @@ cdef class ArraySchema(object):
29612961
tiledb_array_schema_get_coords_filter_list(
29622962
self.ctx.ptr, self.ptr, &filter_list_ptr))
29632963
return FilterList(
2964-
PyCapsule_New(filter_list_ptr, "fl", NULL),
2964+
PyCapsule_New(filter_list_ptr, "fl", NULL),
29652965
is_capsule=True, ctx=self.ctx)
2966-
2966+
29672967
@coords_filters.setter
29682968
def coords_filters(self, value):
29692969
warnings.warn(
29702970
"coords_filters is deprecated; "
29712971
"set the FilterList for each dimension",
29722972
DeprecationWarning,
29732973
)
2974-
2974+
29752975
@property
29762976
def validity_filters(self):
2977-
"""The FilterList for the array's validity
2977+
"""The FilterList for the array's validity
29782978
29792979
:rtype: tiledb.FilterList
29802980
:raises: :py:exc:`tiledb.TileDBError`
@@ -2984,7 +2984,7 @@ cdef class ArraySchema(object):
29842984
tiledb_array_schema_get_validity_filter_list(
29852985
self.ctx.ptr, self.ptr, &validity_list_ptr))
29862986
return FilterList(
2987-
PyCapsule_New(validity_list_ptr, "fl", NULL),
2987+
PyCapsule_New(validity_list_ptr, "fl", NULL),
29882988
is_capsule=True, ctx=self.ctx)
29892989

29902990
@property
@@ -4811,7 +4811,7 @@ def _setitem_impl_sparse(self: Array, selection, val, dict nullmaps):
48114811
attr_val = np.ascontiguousarray(attr_val, dtype=attr.dtype)
48124812

48134813
if attr.isnullable and attr.name not in nullmaps:
4814-
nullmaps[attr.name] = np.array([int(v is None) for v in attr_val], dtype=np.uint8)
4814+
nullmaps[attr.name] = np.array([int(v is not None) for v in attr_val], dtype=np.uint8)
48154815

48164816
except Exception as exc:
48174817
raise ValueError(f"NumPy array conversion check failed for attr '{name}'") from exc

tiledb/tests/test_libtiledb.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2874,6 +2874,32 @@ def test_sparse_write_for_zero_attrs(self):
28742874
assert list(output.keys()) == ["dim"]
28752875
assert_array_equal(output["dim"][:], coords)
28762876

2877+
def test_sparse_write_nullable_default(self):
2878+
uri = self.path("test_sparse_write_nullable_default")
2879+
2880+
dim1 = tiledb.Dim(name="d1", dtype="|S0", var=True)
2881+
att = tiledb.Attr(name="a1", dtype="<U0", var=True, nullable=True)
2882+
2883+
schema = tiledb.ArraySchema(
2884+
domain=tiledb.Domain(dim1),
2885+
attrs=(att,),
2886+
sparse=True,
2887+
allows_duplicates=False,
2888+
)
2889+
tiledb.Array.create(uri, schema)
2890+
2891+
with tiledb.open(uri, "w") as A:
2892+
A[["a", "b", "c"]] = np.array(["aaa", "bb", "c"])
2893+
2894+
if has_pandas():
2895+
import pandas as pd
2896+
2897+
with tiledb.open(uri) as A:
2898+
pd._testing.assert_frame_equal(
2899+
A.query(dims=False).df[:],
2900+
pd.DataFrame({"a1": pd.Series(["aaa", "bb", "c"])}),
2901+
)
2902+
28772903

28782904
class TestDenseIndexing(DiskTestCase):
28792905
def _test_index(self, A, T, idx):

0 commit comments

Comments
 (0)