Skip to content

Commit 287e6df

Browse files
authored
Deprecate ArraySchema.filter_coords (#993)
* Attempting to set `filter_coords` will raise a `DeprecationWarning` and set the given `FilterList` to each dimension rather than setting `filter_coords` * `filter_coords` is now defaults to an empty `FilterList`
1 parent 1f797ba commit 287e6df

File tree

5 files changed

+238
-146
lines changed

5 files changed

+238
-146
lines changed

HISTORY.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
## API Changes
44
* Addition of `ArraySchema.version` to get version of array schema [#949](https://github.com/TileDB-Inc/TileDB-Py/pull/949)
5+
* Deprecate `coords_filters` from `ArraySchema` [#993](https://github.com/TileDB-Inc/TileDB-Py/pull/993)
56

67
# TileDB-Py 0.13.1 Release Notes
78

tiledb/dataframe_.py

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,6 @@ def from_pandas(uri: str, dataframe: "pd.DataFrame", **kwargs):
411411
* **mode** - (default ``ingest``), Ingestion mode: ``ingest``, ``schema_only``, ``append``
412412
* **attr_filters** - FilterList to apply to Attributes: FilterList or Dict[str -> FilterList] for any attribute(s). Unspecified attributes will use default.
413413
* **dim_filters** - FilterList to apply to Dimensions: FilterList or Dict[str -> FilterList] for any dimensions(s). Unspecified dimensions will use default.
414-
* **coords_filters** - FilterList to apply to all coordinates (Dimensions)
415414
* **offsets_filters** - FilterList to apply to all offsets
416415
* **full_domain** - Dimensions should be created with full range of the dtype
417416
* **tile** - Dimension tiling: accepts either an int that applies the tiling to all dimensions or a dict("dim_name": int) to specifically assign tiling to a given dimension
@@ -490,15 +489,17 @@ def _from_pandas(uri, dataframe, tiledb_args):
490489

491490
with tiledb.scope_ctx(tiledb_args.get("ctx")):
492491
if create_array:
493-
_create_array(
494-
uri,
495-
dataframe,
496-
sparse,
497-
full_domain,
498-
index_dims,
499-
column_infos,
500-
tiledb_args,
501-
)
492+
with warnings.catch_warnings() as w:
493+
warnings.simplefilter("always")
494+
_create_array(
495+
uri,
496+
dataframe,
497+
sparse,
498+
full_domain,
499+
index_dims,
500+
column_infos,
501+
tiledb_args,
502+
)
502503

503504
if write:
504505
if tiledb_args.get("debug", True):
@@ -536,19 +537,23 @@ def _create_array(uri, df, sparse, full_domain, index_dims, column_infos, tiledb
536537
)
537538

538539
# create the ArraySchema
539-
schema = tiledb.ArraySchema(
540-
sparse=sparse,
541-
domain=tiledb.Domain(*dims),
542-
attrs=attrs,
543-
cell_order=tiledb_args["cell_order"],
544-
tile_order=tiledb_args["tile_order"],
545-
coords_filters=_get_schema_filters(tiledb_args.get("coords_filters", True)),
546-
offsets_filters=_get_schema_filters(tiledb_args.get("offsets_filters", True)),
547-
# 0 will use the libtiledb internal default
548-
capacity=tiledb_args.get("capacity") or 0,
549-
# don't set allows_duplicates=True for dense
550-
allows_duplicates=sparse and tiledb_args.get("allows_duplicates", False),
551-
)
540+
with warnings.catch_warnings() as w:
541+
warnings.simplefilter("always")
542+
schema = tiledb.ArraySchema(
543+
sparse=sparse,
544+
domain=tiledb.Domain(*dims),
545+
attrs=attrs,
546+
cell_order=tiledb_args["cell_order"],
547+
tile_order=tiledb_args["tile_order"],
548+
coords_filters=_get_schema_filters(tiledb_args.get("coords_filters", True)),
549+
offsets_filters=_get_schema_filters(
550+
tiledb_args.get("offsets_filters", True)
551+
),
552+
# 0 will use the libtiledb internal default
553+
capacity=tiledb_args.get("capacity") or 0,
554+
# don't set allows_duplicates=True for dense
555+
allows_duplicates=sparse and tiledb_args.get("allows_duplicates", False),
556+
)
552557

553558
tiledb.Array.create(uri, schema)
554559

@@ -679,7 +684,6 @@ def from_csv(uri: str, csv_file: Union[str, List[str]], **kwargs):
679684
* **mode** - (default ``ingest``), Ingestion mode: ``ingest``, ``schema_only``, ``append``
680685
* **attr_filters** - FilterList to apply to Attributes: FilterList or Dict[str -> FilterList] for any attribute(s). Unspecified attributes will use default.
681686
* **dim_filters** - FilterList to apply to Dimensions: FilterList or Dict[str -> FilterList] for any dimensions(s). Unspecified dimensions will use default.
682-
* **coords_filters** - FilterList to apply to all coordinates (Dimensions)
683687
* **offsets_filters** - FilterList to apply to all offsets
684688
* **full_domain** - Dimensions should be created with full range of the dtype
685689
* **tile** - Dimension tiling: accepts either an int that applies the tiling to all dimensions or a dict("dim_name": int) to specifically assign tiling to a given dimension

tiledb/libtiledb.pyx

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2610,8 +2610,6 @@ cdef class ArraySchema(object):
26102610
:param tile_order: TileDB label for tile layout
26112611
:type tile_order: 'row-major' (default) or 'C', 'col-major' or 'F'
26122612
:param int capacity: tile cell capacity
2613-
:param coords_filters: (default None) coordinate filter list
2614-
:type coords_filters: tiledb.FilterList
26152613
:param offsets_filters: (default None) offsets filter list
26162614
:type offsets_filters: tiledb.FilterList
26172615
:param validity_filters: (default None) validity filter list
@@ -2666,8 +2664,14 @@ cdef class ArraySchema(object):
26662664
if allows_duplicates:
26672665
ballows_dups = 1
26682666
tiledb_array_schema_set_allows_dups(ctx.ptr, schema_ptr, ballows_dups)
2667+
2668+
if not isinstance(domain, Domain):
2669+
raise TypeError("'domain' must be an instance of Domain (domain is: '{}')".format(domain))
2670+
cdef tiledb_domain_t* domain_ptr = (<Domain> domain).ptr
26692671

2670-
cdef tiledb_filter_list_t* filter_list_ptr = NULL
2672+
cdef tiledb_domain_t* dom_with_coords_filters_ptr = NULL;
2673+
cdef unsigned int ndim = 0
2674+
cdef tiledb_dimension_t* dim_ptr = NULL
26712675
try:
26722676
if offsets_filters is not None:
26732677
if not isinstance(offsets_filters, FilterList):
@@ -2677,14 +2681,42 @@ cdef class ArraySchema(object):
26772681
filter_list.__capsule__(), "fl")
26782682
check_error(ctx,
26792683
tiledb_array_schema_set_offsets_filter_list(ctx.ptr, schema_ptr, filter_list_ptr))
2684+
26802685
if coords_filters is not None:
2681-
if not isinstance(coords_filters, FilterList):
2682-
coords_filters = FilterList(coords_filters, ctx=ctx)
2683-
filter_list = coords_filters
2686+
warnings.warn(
2687+
"coords_filters is deprecated; "
2688+
"set the FilterList for each dimension",
2689+
DeprecationWarning,
2690+
)
2691+
2692+
filter_list = FilterList()
26842693
filter_list_ptr = <tiledb_filter_list_t *>PyCapsule_GetPointer(
26852694
filter_list.__capsule__(), "fl")
26862695
check_error(ctx,
26872696
tiledb_array_schema_set_coords_filter_list(ctx.ptr, schema_ptr, filter_list_ptr))
2697+
2698+
check_error(self.ctx,
2699+
tiledb_domain_get_ndim(ctx.ptr, domain_ptr, &ndim))
2700+
2701+
if not isinstance(coords_filters, FilterList):
2702+
coords_filters = FilterList(coords_filters, ctx=ctx)
2703+
filter_list = coords_filters
2704+
filter_list_ptr = <tiledb_filter_list_t *>PyCapsule_GetPointer(
2705+
filter_list.__capsule__(), "fl")
2706+
2707+
tiledb_domain_alloc(ctx.ptr, &dom_with_coords_filters_ptr)
2708+
for dim_id in range(ndim):
2709+
check_error(self.ctx,
2710+
tiledb_domain_get_dimension_from_index(
2711+
ctx.ptr, domain_ptr, dim_id, &dim_ptr))
2712+
check_error(self.ctx,
2713+
tiledb_dimension_set_filter_list(
2714+
ctx.ptr, dim_ptr, filter_list_ptr))
2715+
check_error(self.ctx,
2716+
tiledb_domain_add_dimension(
2717+
ctx.ptr, dom_with_coords_filters_ptr, dim_ptr))
2718+
domain_ptr = dom_with_coords_filters_ptr
2719+
26882720
if validity_filters is not None:
26892721
if not isinstance(validity_filters, FilterList):
26902722
validity_filters = FilterList(validity_filters, ctx=ctx)
@@ -2697,13 +2729,11 @@ cdef class ArraySchema(object):
26972729
tiledb_array_schema_free(&schema_ptr)
26982730
raise
26992731

2700-
if not isinstance(domain, Domain):
2701-
raise TypeError("'domain' must be an instance of Domain (domain is: '{}')".format(domain))
2702-
cdef tiledb_domain_t* domain_ptr = (<Domain> domain).ptr
27032732
rc = tiledb_array_schema_set_domain(ctx.ptr, schema_ptr, domain_ptr)
27042733
if rc != TILEDB_OK:
27052734
tiledb_array_schema_free(&schema_ptr)
27062735
_raise_ctx_err(ctx.ptr, rc)
2736+
27072737
cdef tiledb_attribute_t* attr_ptr = NULL
27082738
cdef Attr attribute
27092739
for attr in attrs:
@@ -2719,6 +2749,7 @@ cdef class ArraySchema(object):
27192749
if rc != TILEDB_OK:
27202750
tiledb_array_schema_free(&schema_ptr)
27212751
_raise_ctx_err(ctx.ptr, rc)
2752+
27222753
self.ctx = ctx
27232754
self.ptr = schema_ptr
27242755

@@ -2933,6 +2964,14 @@ cdef class ArraySchema(object):
29332964
PyCapsule_New(filter_list_ptr, "fl", NULL),
29342965
is_capsule=True, ctx=self.ctx)
29352966

2967+
@coords_filters.setter
2968+
def coords_filters(self, value):
2969+
warnings.warn(
2970+
"coords_filters is deprecated; "
2971+
"set the FilterList for each dimension",
2972+
DeprecationWarning,
2973+
)
2974+
29362975
@property
29372976
def validity_filters(self):
29382977
"""The FilterList for the array's validity
@@ -3121,9 +3160,6 @@ cdef class ArraySchema(object):
31213160
if self.sparse:
31223161
output.write(f" allows_duplicates={self.allows_duplicates},\n")
31233162

3124-
if self.sparse and self.coords_filters is not None:
3125-
output.write(f" coords_filters={self.coords_filters},\n")
3126-
31273163
output.write(")\n")
31283164

31293165
return output.getvalue()
@@ -3180,12 +3216,6 @@ cdef class ArraySchema(object):
31803216
output.write(f"{self.sparse}\n")
31813217
output.write("</details>\n")
31823218

3183-
if self.sparse and self.coords_filters is not None:
3184-
output.write("<details>\n")
3185-
output.write(f"<summary>coords_filters</summary>\n")
3186-
output.write(f"{self.coords_filters}\n")
3187-
output.write("</details>\n")
3188-
31893219
output.write("</section>\n")
31903220

31913221
return output.getvalue()

0 commit comments

Comments
 (0)