@@ -46,7 +46,7 @@ namespace py = pybind11;
4646namespace {
4747struct CPPArrayFx {
4848 public:
49- CPPArrayFx (std::string uri, const uint64_t col_size)
49+ CPPArrayFx (std::string uri, const uint64_t col_size, const uint8_t offset )
5050 : vfs(ctx)
5151 , uri(uri) {
5252 if (vfs.is_dir (uri))
@@ -59,41 +59,47 @@ struct CPPArrayFx {
5959 domain.add_dimensions (d1);
6060
6161 std::vector<Attribute> attrs;
62- attrs.insert (
63- attrs.end (),
64- {Attribute::create<int8_t >(ctx, " int8" ),
65- Attribute::create<int16_t >(ctx, " int16" ),
66- Attribute::create<int32_t >(ctx, " int32" ),
67- Attribute::create<int64_t >(ctx, " int64" ),
68-
69- Attribute::create<uint8_t >(ctx, " uint8" ),
70- Attribute::create<uint16_t >(ctx, " uint16" ),
71- Attribute::create<uint32_t >(ctx, " uint32" ),
72- Attribute::create<uint64_t >(ctx, " uint64" ),
73-
74- Attribute::create<float >(ctx, " float32" ),
75- Attribute::create<double >(ctx, " float64" )});
76-
77- // must be constructed manually to get TILEDB_STRING_UTF8 type
78- {
79- auto str_attr = Attribute (ctx, " utf_string1" , TILEDB_STRING_UTF8);
62+ if (offset == 64 ) {
63+ auto str_attr = Attribute (ctx, " utf_big_string" , TILEDB_STRING_UTF8);
8064 str_attr.set_cell_val_num (TILEDB_VAR_NUM);
8165 attrs.push_back (str_attr);
82- }
83- {
84- auto str_attr = Attribute (ctx, " utf_string2" , TILEDB_STRING_UTF8);
85- str_attr.set_cell_val_num (TILEDB_VAR_NUM);
86- attrs.push_back (str_attr);
87- }
88- {
89- auto str_attr = Attribute (ctx, " tiledb_char" , TILEDB_CHAR);
90- str_attr.set_cell_val_num (TILEDB_VAR_NUM);
91- attrs.push_back (str_attr);
92- }
66+ } else if (offset == 32 ) {
67+ attrs.insert (
68+ attrs.end (),
69+ {Attribute::create<int8_t >(ctx, " int8" ),
70+ Attribute::create<int16_t >(ctx, " int16" ),
71+ Attribute::create<int32_t >(ctx, " int32" ),
72+ Attribute::create<int64_t >(ctx, " int64" ),
73+
74+ Attribute::create<uint8_t >(ctx, " uint8" ),
75+ Attribute::create<uint16_t >(ctx, " uint16" ),
76+ Attribute::create<uint32_t >(ctx, " uint32" ),
77+ Attribute::create<uint64_t >(ctx, " uint64" ),
78+
79+ Attribute::create<float >(ctx, " float32" ),
80+ Attribute::create<double >(ctx, " float64" )});
81+
82+ // must be constructed manually to get TILEDB_STRING_UTF8 type
83+ {
84+ auto str_attr = Attribute (ctx, " utf_string1" , TILEDB_STRING_UTF8);
85+ str_attr.set_cell_val_num (TILEDB_VAR_NUM);
86+ attrs.push_back (str_attr);
87+ }
88+ {
89+ auto str_attr = Attribute (ctx, " utf_string2" , TILEDB_STRING_UTF8);
90+ str_attr.set_cell_val_num (TILEDB_VAR_NUM);
91+ attrs.push_back (str_attr);
92+ }
93+ {
94+ auto str_attr = Attribute (ctx, " tiledb_char" , TILEDB_CHAR);
95+ str_attr.set_cell_val_num (TILEDB_VAR_NUM);
96+ attrs.push_back (str_attr);
97+ }
9398
94- // must be constructed manually to get TILEDB_DATETIME_NS type
95- auto datetimens_attr = Attribute (ctx, " datetime_ns" , TILEDB_DATETIME_NS);
96- attrs.push_back (datetimens_attr);
99+ // must be constructed manually to get TILEDB_DATETIME_NS type
100+ auto datetimens_attr = Attribute (ctx, " datetime_ns" , TILEDB_DATETIME_NS);
101+ attrs.push_back (datetimens_attr);
102+ }
97103
98104 FilterList filters (ctx);
99105 filters.add_filter ({ctx, TILEDB_FILTER_LZ4});
@@ -217,9 +223,11 @@ void allocate_query_buffers(tiledb::Query* const query) {
217223
218224}; // namespace
219225
220- void test_for_column_size (size_t col_size) {
221- std::string uri (" test_arrow_io_" + std::to_string (col_size));
222- CPPArrayFx _fx (uri, col_size);
226+ void test_for_column_size (const size_t col_size, const uint8_t offset) {
227+ std::string uri (
228+ " test_arrow_io_" + std::to_string (col_size) + " _" +
229+ std::to_string (offset));
230+ CPPArrayFx _fx (uri, col_size, offset);
223231
224232 py::object py_data_source;
225233 py::object py_data_arrays;
@@ -234,7 +242,8 @@ void test_for_column_size(size_t col_size) {
234242 unit_arrow = py::module::import (" unit_arrow" );
235243
236244 // this class generates random test data for each attribute
237- auto h_data_source = unit_arrow.attr (" DataFactory" );
245+ auto class_name = " DataFactory" + std::to_string (offset);
246+ auto h_data_source = unit_arrow.attr (class_name.c_str ());
238247 py_data_source = h_data_source (py::int_ (col_size));
239248 py_data_names = py_data_source.attr (" names" );
240249 py_data_arrays = py_data_source.attr (" arrays" );
@@ -248,7 +257,7 @@ void test_for_column_size(size_t col_size) {
248257 * Test write
249258 */
250259 Config config;
251- config[" sm.var_offsets.bitsize" ] = 32 ;
260+ config[" sm.var_offsets.bitsize" ] = offset ;
252261 config[" sm.var_offsets.mode" ] = " elements" ;
253262 config[" sm.var_offsets.extra_element" ] = " true" ;
254263 Context ctx (config);
@@ -303,14 +312,12 @@ void test_for_column_size(size_t col_size) {
303312 // However, there is an unexplained crash due to an early destructor
304313 // when both brace scopes are converted to SECTIONs.
305314 // SECTION("Test reading data back via ArrowAdapter into pyarrow arrays")
306-
307- // test both bitsize read modes
308- for (auto bitsize : {32 , 64 }) {
315+ {
309316 /*
310317 * Test read
311318 */
312319 Config config;
313- config[" sm.var_offsets.bitsize" ] = bitsize ;
320+ config[" sm.var_offsets.bitsize" ] = 64 ;
314321 config[" sm.var_offsets.mode" ] = " elements" ;
315322 config[" sm.var_offsets.extra_element" ] = " true" ;
316323 Context ctx (config);
@@ -437,8 +444,9 @@ TEST_CASE("Arrow IO integration tests", "[arrow]") {
437444#endif
438445
439446 // do not use catch2 GENERATE here: it causes bad things to happen w/ python
440- uint64_t col_sizes[] = {0 }; // ,1,2,3,4, 11,103};
447+ uint64_t col_sizes[] = {0 , 1 , 2 , 3 , 4 , 11 , 103 };
441448 for (auto sz : col_sizes) {
442- test_for_column_size (sz);
449+ test_for_column_size (sz, 32 );
450+ test_for_column_size (sz, 64 );
443451 }
444452}
0 commit comments