Skip to content

Commit c0e126a

Browse files
Factor write_direct_dense out of Cython (#2109)
Co-authored-by: nguyenv <[email protected]>
1 parent 4674f14 commit c0e126a

File tree

4 files changed

+108
-159
lines changed

4 files changed

+108
-159
lines changed

tiledb/cc/query.cc

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -71,17 +71,13 @@ void init_query(py::module &m) {
7171
// uint64_t))&Query::set_data_buffer);
7272

7373
.def("set_data_buffer",
74-
[](Query &q, std::string name, py::array a) {
75-
// TODO check_type(a.dtype)
76-
// size_t item_size = a.itemsize();
77-
q.set_data_buffer(name, const_cast<void *>(a.data()), a.size());
74+
[](Query &q, std::string name, py::array a, uint32_t buff_size) {
75+
q.set_data_buffer(name, const_cast<void *>(a.data()), buff_size);
7876
})
7977

8078
.def("set_offsets_buffer",
81-
[](Query &q, std::string name, py::array a) {
82-
// TODO check_type(a.dtype)
83-
// size_t item_size = a.itemsize();
84-
q.set_offsets_buffer(name, (uint64_t *)(a.data()), a.size());
79+
[](Query &q, std::string name, py::array a, uint32_t buff_size) {
80+
q.set_offsets_buffer(name, (uint64_t *)(a.data()), buff_size);
8581
})
8682

8783
.def("set_subarray",
@@ -90,10 +86,8 @@ void init_query(py::module &m) {
9086
})
9187

9288
.def("set_validity_buffer",
93-
[](Query &q, std::string name, py::array a) {
94-
// TODO check_type(a.dtype)
95-
// size_t item_size = a.itemsize();
96-
q.set_validity_buffer(name, (uint8_t *)(a.data()), a.size());
89+
[](Query &q, std::string name, py::array a, uint32_t buff_size) {
90+
q.set_validity_buffer(name, (uint8_t *)(a.data()), buff_size);
9791
})
9892

9993
.def("_submit", &Query::submit, py::call_guard<py::gil_scoped_release>())

tiledb/dense_array.py

Lines changed: 95 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import warnings
21
from collections import OrderedDict
32

43
import numpy as np
@@ -14,6 +13,7 @@
1413
replace_ellipsis,
1514
replace_scalars_slice,
1615
)
16+
from .datatypes import DataType
1717
from .query import Query
1818
from .subarray import Subarray
1919

@@ -601,9 +601,101 @@ def __array__(self, dtype=None, **kw):
601601
return array
602602

603603
def write_direct(self, array: np.ndarray, **kw):
604-
from .libtiledb import write_direct_dense
604+
"""
605+
Write directly to given array attribute with minimal checks,
606+
assumes that the numpy array is the same shape as the array's domain
607+
608+
:param np.ndarray array: Numpy contiguous dense array of the same dtype \
609+
and shape and layout of the DenseArray instance
610+
:raises ValueError: cannot write to multi-attribute DenseArray
611+
:raises ValueError: array is not contiguous
612+
:raises: :py:exc:`tiledb.TileDBError`
613+
"""
614+
append_dim = kw.pop("append_dim", None)
615+
mode = kw.pop("mode", "ingest")
616+
start_idx = kw.pop("start_idx", None)
617+
618+
if not self.isopen or self.mode != "w":
619+
raise tiledb.TileDBError("DenseArray is not opened for writing")
620+
if self.schema.nattr != 1:
621+
raise ValueError("cannot write_direct to a multi-attribute DenseArray")
622+
if not array.flags.c_contiguous and not array.flags.f_contiguous:
623+
raise ValueError("array is not contiguous")
624+
625+
use_global_order = (
626+
self.ctx.config().get("py.use_global_order_1d_write", False) == "true"
627+
)
628+
629+
layout = lt.LayoutType.ROW_MAJOR
630+
if array.ndim == 1 and use_global_order:
631+
layout = lt.LayoutType.GLOBAL_ORDER
632+
elif array.flags.f_contiguous:
633+
layout = lt.LayoutType.COL_MAJOR
634+
635+
range_start_idx = start_idx or 0
636+
637+
subarray_ranges = np.zeros(2 * array.ndim, np.uint64)
638+
for n in range(array.ndim):
639+
subarray_ranges[n * 2] = range_start_idx
640+
subarray_ranges[n * 2 + 1] = array.shape[n] + range_start_idx - 1
641+
642+
if mode == "append":
643+
with Array.load_typed(self.uri) as A:
644+
ned = A.nonempty_domain()
645+
646+
if array.ndim <= append_dim:
647+
raise IndexError("`append_dim` out of range")
648+
649+
if array.ndim != len(ned):
650+
raise ValueError(
651+
"The number of dimension of the TileDB array and "
652+
"Numpy array to append do not match"
653+
)
654+
655+
for n in range(array.ndim):
656+
if n == append_dim:
657+
if start_idx is not None:
658+
range_start_idx = start_idx
659+
range_end_idx = array.shape[n] + start_idx - 1
660+
else:
661+
range_start_idx = ned[n][1] + 1
662+
range_end_idx = array.shape[n] + ned[n][1]
663+
664+
subarray_ranges[n * 2] = range_start_idx
665+
subarray_ranges[n * 2 + 1] = range_end_idx
666+
else:
667+
if array.shape[n] != ned[n][1] - ned[n][0] + 1:
668+
raise ValueError(
669+
"The input Numpy array must be of the same "
670+
"shape as the TileDB array, exluding the "
671+
"`append_dim`, but the Numpy array at index "
672+
f"{n} has {array.shape[n]} dimension(s) and "
673+
f"the TileDB array has {ned[n][1]-ned[n][0]}."
674+
)
675+
676+
ctx = lt.Context(self.ctx)
677+
q = lt.Query(ctx, self.array, lt.QueryType.WRITE)
678+
q.layout = layout
679+
680+
subarray = lt.Subarray(ctx, self.array)
681+
for n in range(array.ndim):
682+
subarray._add_dim_range(
683+
n, (subarray_ranges[n * 2], subarray_ranges[n * 2 + 1])
684+
)
685+
q.set_subarray(subarray)
686+
687+
attr = self.schema.attr(0)
688+
battr_name = attr._internal_name.encode("UTF-8")
689+
690+
tiledb_type = DataType.from_numpy(array.dtype)
691+
692+
if tiledb_type in (lt.DataType.BLOB, lt.DataType.CHAR, lt.DataType.STRING_UTF8):
693+
q.set_data_buffer(battr_name, array, array.nbytes)
694+
else:
695+
q.set_data_buffer(battr_name, array, tiledb_type.ncells * array.size)
605696

606-
write_direct_dense(self, array, **kw)
697+
q._submit()
698+
q.finalize()
607699

608700
def read_direct(self, name=None):
609701
"""Read attribute directly with minimal overhead, returns a numpy ndarray over the entire domain

tiledb/libtiledb.pyx

Lines changed: 0 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -340,140 +340,3 @@ cdef _raise_ctx_err(tiledb_ctx_t* ctx_ptr, int rc):
340340
raise MemoryError()
341341
raise TileDBError("error retrieving error object from ctx")
342342
_raise_tiledb_error(err_ptr)
343-
344-
345-
def write_direct_dense(self: Array, np.ndarray array not None, **kw):
346-
"""
347-
Write directly to given array attribute with minimal checks,
348-
assumes that the numpy array is the same shape as the array's domain
349-
350-
:param np.ndarray array: Numpy contiguous dense array of the same dtype \
351-
and shape and layout of the DenseArray instance
352-
:raises ValueError: array is not contiguous
353-
:raises: :py:exc:`tiledb.TileDBError`
354-
355-
"""
356-
append_dim = kw.pop("append_dim", None)
357-
mode = kw.pop("mode", "ingest")
358-
start_idx = kw.pop("start_idx", None)
359-
360-
if not self.isopen or self.mode != 'w':
361-
raise TileDBError("DenseArray is not opened for writing")
362-
if self.schema.nattr != 1:
363-
raise ValueError("cannot write_direct to a multi-attribute DenseArray")
364-
if not array.flags.c_contiguous and not array.flags.f_contiguous:
365-
raise ValueError("array is not contiguous")
366-
367-
cdef tiledb_ctx_t* ctx_ptr = safe_ctx_ptr(self.ctx)
368-
cdef tiledb_array_t* array_ptr = <tiledb_array_t*>PyCapsule_GetPointer(self.array.__capsule__(), "array")
369-
370-
# attr name
371-
attr = self.schema.attr(0)
372-
cdef bytes battr_name = attr._internal_name.encode('UTF-8')
373-
cdef const char* attr_name_ptr = PyBytes_AS_STRING(battr_name)
374-
375-
cdef void* buff_ptr = np.PyArray_DATA(array)
376-
cdef uint64_t buff_size = array.nbytes
377-
cdef np.ndarray subarray = np.zeros(2*array.ndim, np.uint64)
378-
379-
try:
380-
use_global_order = self.ctx.config().get(
381-
"py.use_global_order_1d_write") == "true"
382-
except KeyError:
383-
use_global_order = False
384-
385-
cdef tiledb_layout_t layout = TILEDB_ROW_MAJOR
386-
if array.ndim == 1 and use_global_order:
387-
layout = TILEDB_GLOBAL_ORDER
388-
elif array.flags.f_contiguous:
389-
layout = TILEDB_COL_MAJOR
390-
391-
cdef tiledb_query_t* query_ptr = NULL
392-
cdef tiledb_subarray_t* subarray_ptr = NULL
393-
cdef int rc = TILEDB_OK
394-
rc = tiledb_query_alloc(ctx_ptr, array_ptr, TILEDB_WRITE, &query_ptr)
395-
if rc != TILEDB_OK:
396-
tiledb_query_free(&query_ptr)
397-
_raise_ctx_err(ctx_ptr, rc)
398-
try:
399-
rc = tiledb_query_set_layout(ctx_ptr, query_ptr, layout)
400-
if rc != TILEDB_OK:
401-
_raise_ctx_err(ctx_ptr, rc)
402-
403-
range_start_idx = start_idx or 0
404-
for n in range(array.ndim):
405-
subarray[n*2] = range_start_idx
406-
subarray[n*2 + 1] = array.shape[n] + range_start_idx - 1
407-
408-
if mode == "append":
409-
with Array.load_typed(self.uri) as A:
410-
ned = A.nonempty_domain()
411-
412-
if array.ndim <= append_dim:
413-
raise IndexError("`append_dim` out of range")
414-
415-
if array.ndim != len(ned):
416-
raise ValueError(
417-
"The number of dimension of the TileDB array and "
418-
"Numpy array to append do not match"
419-
)
420-
421-
for n in range(array.ndim):
422-
if n == append_dim:
423-
if start_idx is not None:
424-
range_start_idx = start_idx
425-
range_end_idx = array.shape[n] + start_idx -1
426-
else:
427-
range_start_idx = ned[n][1] + 1
428-
range_end_idx = array.shape[n] + ned[n][1]
429-
430-
subarray[n*2] = range_start_idx
431-
subarray[n*2 + 1] = range_end_idx
432-
else:
433-
if array.shape[n] != ned[n][1] - ned[n][0] + 1:
434-
raise ValueError(
435-
"The input Numpy array must be of the same "
436-
"shape as the TileDB array, exluding the "
437-
"`append_dim`, but the Numpy array at index "
438-
f"{n} has {array.shape[n]} dimension(s) and "
439-
f"the TileDB array has {ned[n][1]-ned[n][0]}."
440-
)
441-
442-
rc = tiledb_subarray_alloc(ctx_ptr, array_ptr, &subarray_ptr)
443-
if rc != TILEDB_OK:
444-
_raise_ctx_err(ctx_ptr, rc)
445-
rc = tiledb_subarray_set_subarray(
446-
ctx_ptr,
447-
subarray_ptr,
448-
<void*>np.PyArray_DATA(subarray)
449-
)
450-
if rc != TILEDB_OK:
451-
_raise_ctx_err(ctx_ptr, rc)
452-
453-
rc = tiledb_query_set_subarray_t(ctx_ptr, query_ptr, subarray_ptr)
454-
if rc != TILEDB_OK:
455-
_raise_ctx_err(ctx_ptr, rc)
456-
457-
rc = tiledb_query_set_data_buffer(
458-
ctx_ptr,
459-
query_ptr,
460-
attr_name_ptr,
461-
buff_ptr,
462-
&buff_size
463-
)
464-
if rc != TILEDB_OK:
465-
_raise_ctx_err(ctx_ptr, rc)
466-
467-
with nogil:
468-
rc = tiledb_query_submit(ctx_ptr, query_ptr)
469-
if rc != TILEDB_OK:
470-
_raise_ctx_err(ctx_ptr, rc)
471-
472-
with nogil:
473-
rc = tiledb_query_finalize(ctx_ptr, query_ptr)
474-
if rc != TILEDB_OK:
475-
_raise_ctx_err(ctx_ptr, rc)
476-
finally:
477-
tiledb_subarray_free(&subarray_ptr)
478-
tiledb_query_free(&query_ptr)
479-
return

tiledb/tests/cc/test_cc.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -340,8 +340,8 @@ def write():
340340
q.layout = lt.LayoutType.UNORDERED
341341
assert q.query_type == lt.QueryType.WRITE
342342

343-
q.set_data_buffer("a", data)
344-
q.set_data_buffer("x", coords)
343+
q.set_data_buffer("a", data, len(data))
344+
q.set_data_buffer("x", coords, len(coords))
345345

346346
assert q._submit() == lt.QueryStatus.COMPLETE
347347

@@ -358,8 +358,8 @@ def read(uri):
358358
rcoords = np.zeros(10).astype(np.int32)
359359
rdata = np.zeros(10).astype(np.int32)
360360

361-
q.set_data_buffer("a", rdata)
362-
q.set_data_buffer("x", rcoords)
361+
q.set_data_buffer("a", rdata, len(rdata))
362+
q.set_data_buffer("x", rcoords, len(rcoords))
363363

364364
assert q._submit() == lt.QueryStatus.COMPLETE
365365
assert np.all(rcoords == coords)
@@ -404,8 +404,8 @@ def write():
404404

405405
q.set_subarray(subarray)
406406

407-
q.set_data_buffer("a", data)
408-
# q.set_data_buffer("x", coords)
407+
q.set_data_buffer("a", data, len(data))
408+
# q.set_data_buffer("x", coords, len(coords))
409409

410410
assert q._submit() == lt.QueryStatus.COMPLETE
411411

@@ -426,7 +426,7 @@ def read(uri):
426426

427427
rdata = np.zeros(10).astype(np.float32)
428428

429-
q.set_data_buffer("a", rdata)
429+
q.set_data_buffer("a", rdata, len(rdata))
430430

431431
assert q._submit() == lt.QueryStatus.COMPLETE
432432
assert np.all(rdata == data)

0 commit comments

Comments
 (0)