Skip to content

Commit e4bb8d8

Browse files
nguyenvihnorton
authored andcommitted
Bind Enumerated Data Type
1 parent 04cb673 commit e4bb8d8

30 files changed

+548
-47
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
# - this is for builds-from-source
2525
# - release builds are controlled by `misc/azure-release.yml`
2626
# - this should be set to the current core release, not `dev`
27-
TILEDB_VERSION = "2.16.3"
27+
TILEDB_VERSION = "dev"
2828

2929
# allow overriding w/ environment variable
3030
TILEDB_VERSION = os.environ.get("TILEDB_VERSION") or TILEDB_VERSION

tiledb/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from .dimension_label import DimLabel
4646
from .dimension_label_schema import DimLabelSchema
4747
from .domain import Domain
48+
from .enumeration import Enumeration
4849
from .filestore import Filestore
4950
from .filter import (
5051
BitShuffleFilter,

tiledb/array_schema.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,15 @@ def __init__(
5151
allows_duplicates: bool = False,
5252
sparse: bool = False,
5353
dim_labels={},
54+
enums=None,
5455
ctx: Ctx = None,
5556
):
5657
super().__init__(ctx, lt.ArrayType.SPARSE if sparse else lt.ArrayType.DENSE)
58+
59+
if enums is not None:
60+
for enum_name in enums:
61+
self._add_enumeration(self._ctx, enum_name)
62+
5763
if attrs is not None:
5864
for att in attrs:
5965
if not isinstance(att, Attr):

tiledb/attribute.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def __init__(
2424
var: bool = None,
2525
nullable: bool = False,
2626
filters: Union[FilterList, Sequence[Filter]] = None,
27+
enum_label: str = None,
2728
ctx: Optional[Ctx] = None,
2829
):
2930
"""Class representing a TileDB array attribute.
@@ -88,6 +89,9 @@ def __init__(
8889
if nullable is not None:
8990
self._nullable = nullable
9091

92+
if enum_label is not None:
93+
self._set_enumeration_name(self._ctx, enum_label)
94+
9195
def __eq__(self, other):
9296
if not isinstance(other, Attr):
9397
return False
@@ -202,6 +206,10 @@ def isascii(self) -> bool:
202206
"""
203207
return self._tiledb_dtype == lt.DataType.STRING_ASCII
204208

209+
@property
210+
def enum_label(self):
211+
return self._get_enumeration_name(self._ctx)
212+
205213
def __repr__(self):
206214
filters_str = ""
207215
if self.filters:
@@ -217,11 +225,16 @@ def __repr__(self):
217225
else:
218226
attr_dtype = self.dtype
219227

228+
if self.enum_label is None:
229+
enum_label = None
230+
else:
231+
enum_label = f"'{self.enum_label!s}'"
232+
220233
# filters_str must be last with no spaces
221234
return (
222235
f"""Attr(name={repr(self.name)}, dtype='{attr_dtype!s}', """
223-
f"""var={self.isvar!s}, nullable={self.isnullable!s}"""
224-
f"""{filters_str})"""
236+
f"""var={self.isvar!s}, nullable={self.isnullable!s}, """
237+
f"""enum_label={enum_label}{filters_str})"""
225238
)
226239

227240
def _repr_html_(self):

tiledb/cc/array.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ void init_array(py::module &m) {
3434
.def("uri", &Array::uri)
3535
.def("schema", &Array::schema)
3636
//.def("ptr", [](Array& arr){ return py::capsule(arr.ptr()); } )
37+
.def("open", (void (Array::*)(tiledb_query_type_t)) & Array::open)
3738
// open with encryption key
3839
.def("open",
3940
(void (Array::*)(tiledb_query_type_t, tiledb_encryption_type_t,

tiledb/cc/attribute.cc

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <tiledb/tiledb>
2+
#include <tiledb/tiledb_experimental>
23

34
#include <pybind11/numpy.h>
45
#include <pybind11/pybind11.h>
@@ -39,6 +40,16 @@ py::array get_fill_value(Attribute &attr) {
3940
return py::array(value_type, value_num, value);
4041
}
4142

43+
void set_enumeration_name(Attribute &attr, const Context &ctx,
44+
const std::string &enumeration_name) {
45+
AttributeExperimental::set_enumeration_name(ctx, attr, enumeration_name);
46+
}
47+
48+
std::optional<std::string> get_enumeration_name(Attribute &attr,
49+
const Context &ctx) {
50+
return AttributeExperimental::get_enumeration_name(ctx, attr);
51+
}
52+
4253
void init_attribute(py::module &m) {
4354
py::class_<tiledb::Attribute>(m, "Attribute")
4455
.def(py::init<Attribute>())
@@ -73,8 +84,11 @@ void init_attribute(py::module &m) {
7384

7485
.def_property("_fill", get_fill_value, set_fill_value)
7586

87+
.def("_get_enumeration_name", get_enumeration_name)
88+
89+
.def("_set_enumeration_name", set_enumeration_name)
90+
7691
.def("_dump", [](Attribute &attr) { attr.dump(); });
77-
;
7892
}
7993

8094
} // namespace libtiledbcpp

tiledb/cc/enumeration.cc

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#include <tiledb/tiledb>
2+
#include <tiledb/tiledb_experimental>
3+
4+
#include <pybind11/numpy.h>
5+
#include <pybind11/pybind11.h>
6+
#include <pybind11/pytypes.h>
7+
#include <pybind11/stl.h>
8+
9+
#include "common.h"
10+
11+
namespace libtiledbcpp {
12+
13+
using namespace tiledb;
14+
using namespace tiledbpy::common;
15+
namespace py = pybind11;
16+
17+
void init_enumeration(py::module &m) {
18+
py::class_<Enumeration>(m, "Enumeration")
19+
.def(py::init<Enumeration>())
20+
21+
.def(py::init([](const Context &ctx, const std::string &name,
22+
std::vector<std::string> &values, bool ordered,
23+
tiledb_datatype_t type) {
24+
return Enumeration::create(ctx, name, values, ordered, type);
25+
}))
26+
27+
.def(py::init([](const Context &ctx, const std::string &name,
28+
bool ordered, py::array data, py::array offsets) {
29+
tiledb_datatype_t data_type;
30+
try {
31+
data_type = np_to_tdb_dtype(data.dtype());
32+
} catch (const TileDBPyError &e) {
33+
throw py::type_error(e.what());
34+
}
35+
36+
py::buffer_info data_buffer = data.request();
37+
if (data_buffer.ndim != 1)
38+
throw py::type_error("Only 1D Numpy arrays can be stored as "
39+
"enumeration values");
40+
41+
py::size_t cell_val_num =
42+
offsets.size() == 0 ? get_ncells(data.dtype()) : TILEDB_VAR_NUM;
43+
44+
return Enumeration::create(
45+
ctx, name, data_type, cell_val_num, ordered, data.data(),
46+
data.nbytes(), offsets.size() == 0 ? nullptr : offsets.data(),
47+
offsets.nbytes());
48+
}))
49+
50+
.def(py::init<const Context &, py::capsule>(), py::keep_alive<1, 2>())
51+
52+
.def("__capsule__",
53+
[](Enumeration &enmr) {
54+
return py::capsule(enmr.ptr().get(), "enmr", nullptr);
55+
})
56+
57+
.def_property_readonly("name", &Enumeration::name)
58+
59+
.def_property_readonly("type", &Enumeration::type)
60+
61+
.def_property_readonly("cell_val_num", &Enumeration::cell_val_num)
62+
63+
.def_property_readonly("ordered", &Enumeration::ordered)
64+
65+
.def("values",
66+
[](Enumeration &enmr) {
67+
auto data = enmr.as_vector<std::byte>();
68+
auto dtype = tdb_to_np_dtype(enmr.type(), enmr.cell_val_num());
69+
return py::array(dtype, data.size() / dtype.itemsize(),
70+
data.data());
71+
})
72+
73+
.def("str_values",
74+
[](Enumeration &enmr) { return enmr.as_vector<std::string>(); });
75+
}
76+
77+
} // namespace libtiledbcpp

tiledb/cc/schema.cc

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -278,16 +278,23 @@ void init_schema(py::module &m) {
278278
.def("_has_attribute", &ArraySchema::has_attribute)
279279

280280
#if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 15
281-
.def("_has_dim_label", [](const ArraySchema &schema, const Context &ctx,
282-
const std::string &name) {
283-
return ArraySchemaExperimental::has_dimension_label(ctx, schema, name);
284-
});
281+
.def("_has_dim_label",
282+
[](const ArraySchema &schema, const Context &ctx,
283+
const std::string &name) {
284+
return ArraySchemaExperimental::has_dimension_label(ctx, schema,
285+
name);
286+
})
285287
#else
286288
.def("_has_dim_label", [](const ArraySchema &, const Context &,
287289
const std::string &) {
288290
return false;
289-
});
291+
})
290292
#endif
293+
294+
.def("_add_enumeration", [](const ArraySchema &schema, const Context &ctx,
295+
const Enumeration &enmr) {
296+
ArraySchemaExperimental::add_enumeration(ctx, schema, enmr);
297+
});
291298
}
292299

293300
} // namespace libtiledbcpp

tiledb/cc/tiledbcpp.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ void init_attribute(py::module &);
1919
void init_context(py::module &);
2020
void init_config(py::module &);
2121
void init_enums(py::module &);
22+
void init_enumeration(py::module &);
2223
void init_dimension_label(py::module &m);
2324
void init_domain(py::module &m);
2425
void init_file_handle(py::module &);
@@ -40,6 +41,7 @@ PYBIND11_MODULE(cc, m) {
4041
init_dimension_label(m);
4142
init_domain(m);
4243
init_enums(m);
44+
init_enumeration(m);
4345
init_file_handle(m);
4446
init_filestore(m);
4547
init_filter(m);

tiledb/core.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ class PyQuery {
316316
// label buffer list
317317
std::unordered_map<string, uint64_t> label_input_buffer_data_;
318318

319-
py::object pyschema_;
319+
std::string uri_;
320320

321321
public:
322322
tiledb_ctx_t *c_ctx_;
@@ -349,7 +349,7 @@ class PyQuery {
349349
domain_ =
350350
std::shared_ptr<tiledb::Domain>(new Domain(array_schema_->domain()));
351351

352-
pyschema_ = array.attr("schema");
352+
uri_ = array.attr("uri").cast<std::string>();
353353

354354
bool issparse = array_->schema().array_type() == TILEDB_SPARSE;
355355

@@ -450,7 +450,7 @@ class PyQuery {
450450
py::object init_pyqc = cond.attr("init_query_condition");
451451

452452
try {
453-
init_pyqc(pyschema_, attrs_);
453+
init_pyqc(uri_, attrs_);
454454
} catch (tiledb::TileDBError &e) {
455455
TPY_ERROR_LOC(e.what());
456456
} catch (py::error_already_set &e) {

0 commit comments

Comments
 (0)