@@ -29,143 +29,63 @@ bool ArrayBuffers::use_memory_pool(const std::shared_ptr<tiledb::Array>& array)
2929 return use_memory_pool;
3030}
3131
32- ArrayBuffers::ArrayBuffers (const std::vector<std::string>& names, const tiledb::Array& array) {
33- size_t memory_budget = DEFAULT_ALLOC_BYTES;
34- auto config = array.config ();
35- if (config.contains (CONFIG_KEY_MEMORY_BUDGET)) {
36- auto value_str = config.get (CONFIG_KEY_MEMORY_BUDGET);
37- try {
38- memory_budget = std::stoull (value_str);
39- } catch (const std::exception& e) {
40- throw TileDBSOMAError (
41- fmt::format (
42- " [ArrayBuffers] Error parsing {}: '{}' ({})" , CONFIG_KEY_MEMORY_BUDGET, value_str, e.what ()));
43- }
44- }
45-
46- size_t factor = 1 ;
47- if (config.contains (CONFIG_KEY_VAR_SIZED_FACTOR)) {
48- auto value_str = config.get (CONFIG_KEY_VAR_SIZED_FACTOR);
49- try {
50- factor = std::stoull (value_str);
51- } catch (const std::exception& e) {
52- throw TileDBSOMAError (
53- fmt::format (
54- " [ArrayBuffers] Error parsing {}: '{}' ({})" , CONFIG_KEY_VAR_SIZED_FACTOR, value_str, e.what ()));
55- }
32+ ArrayBuffers::ArrayBuffers (
33+ const std::vector<std::string>& names,
34+ const tiledb::Array& array,
35+ std::unique_ptr<ColumnBufferAllocationStrategy> strategy)
36+ : names_(names)
37+ , strategy_(std::move(strategy)) {
38+ if (!strategy_) {
39+ strategy_ = std::make_unique<BasicAllocationStrategy>(array);
5640 }
5741
58- MemoryMode mode = ColumnBuffer::memory_mode (config);
59-
60- ArraySchema schema = array.schema ();
42+ MemoryMode mode = ColumnBuffer::memory_mode (array. config () );
43+ const tiledb::ArraySchema schema = array. schema ();
44+ const tiledb::Context& context = array.context ();
6145 // Split memory budget to each column depending on the byte size of each columns element
6246 // Var sized columns will be allocated the same as an 8 byte datatype
6347
64- // Ensure minimum buffer size is multiple of 8
65- size_t memory_budget_unit = (memory_budget /
66- std::transform_reduce (
67- names.begin (),
68- names.end (),
69- 0L ,
70- std::plus{},
71- [&](auto name) {
72- size_t weight = 0 ;
73-
74- // Check if column is a TileDB attribute
75- if (schema.has_attribute (name)) {
76- Attribute attr = schema.attribute (name);
77-
78- if (!attr.variable_sized () && attr.cell_val_num () != 1 ) {
79- throw TileDBSOMAError (
80- " [ArrayBuffers] Values per cell > 1 is not supported: " + name);
81- }
82-
83- weight += attr.nullable () ? 1 : 0 ;
84- // If column has variable size add the offset array in the column budget
85- weight += attr.variable_sized () ? sizeof (uint64_t ) * (1 + 2 * factor) :
86- tiledb::impl::type_size (attr.type ());
87-
88- return weight;
89- }
90- // Else check if column is a TileDB dimension
91- else if (schema.domain ().has_dimension (name)) {
92- Dimension dim = schema.domain ().dimension (name);
93-
94- bool is_var = dim.cell_val_num () == TILEDB_VAR_NUM ||
95- dim.type () == TILEDB_STRING_ASCII ||
96- dim.type () == TILEDB_STRING_UTF8;
97-
98- if (!is_var && dim.cell_val_num () != 1 ) {
99- throw TileDBSOMAError (
100- " [ArrayBuffers] Values per cell > 1 is not supported: " + name);
101- }
102-
103- weight += (dim.type () == TILEDB_STRING_ASCII ||
104- dim.type () == TILEDB_STRING_UTF8) ?
105- sizeof (uint64_t ) * (1 + 2 * factor) :
106- tiledb::impl::type_size (dim.type ());
107-
108- return weight;
109- }
110-
111- throw TileDBSOMAError (
112- fmt::format (" [ArrayBuffers] Missing column name '{}'" , name));
113- }) /
114- 8 ) *
115- 8 ;
116-
117- for (const auto & name : names) {
118- names_.push_back (name);
119-
48+ for (const auto & name : names_) {
12049 if (schema.has_attribute (name)) {
121- Attribute attr = schema.attribute (name);
50+ tiledb:: Attribute attribute = schema.attribute (name);
12251
123- size_t column_budget = (attr.variable_sized () ? sizeof (uint64_t ) * 2 * factor :
124- tiledb::impl::type_size (attr.type ())) *
125- memory_budget_unit;
126-
127- size_t num_cells = memory_budget_unit;
128-
129- auto enum_name = AttributeExperimental::get_enumeration_name (schema.context (), attr);
52+ auto [column_budget, num_cells] = strategy_->get_buffer_sizes (attribute);
53+ auto enum_name = AttributeExperimental::get_enumeration_name (context, attribute);
13054 std::optional<Enumeration> enumeration = std::nullopt ;
13155 bool is_ordered = false ;
13256 if (enum_name.has_value ()) {
133- auto enmr = ArrayExperimental::get_enumeration (schema. context (), array, *enum_name);
134- is_ordered = enmr. ordered ( );
135- enumeration = std::make_optional<Enumeration>(enmr );
57+ enumeration = std::make_optional<Enumeration>(
58+ ArrayExperimental::get_enumeration (context, array, *enum_name) );
59+ is_ordered = enumeration-> ordered ( );
13660 }
13761
13862 buffers_.insert (
13963 std::make_pair (
14064 name,
14165 std::make_shared<CArrayColumnBuffer>(
14266 name,
143- attr .type (),
67+ attribute .type (),
14468 num_cells,
14569 column_budget,
146- attr .variable_sized (),
147- attr .nullable (),
70+ attribute .variable_sized (),
71+ attribute .nullable (),
14872 enumeration,
14973 is_ordered,
15074 mode)));
15175 }
15276 // Else check if column is a TileDB dimension
15377 else if (schema.domain ().has_dimension (name)) {
154- Dimension dim = schema.domain ().dimension (name);
155-
156- bool is_var = dim.cell_val_num () == TILEDB_VAR_NUM || dim.type () == TILEDB_STRING_ASCII ||
157- dim.type () == TILEDB_STRING_UTF8;
78+ tiledb::Dimension dimension = schema.domain ().dimension (name);
15879
159- // Ensure buffer size is multiple of 8
160- size_t column_budget = (is_var ? sizeof (uint64_t ) * 2 * factor : tiledb::impl::type_size (dim.type ())) *
161- memory_budget_unit;
162- size_t num_cells = memory_budget_unit;
80+ auto [column_budget, num_cells] = strategy_->get_buffer_sizes (dimension);
81+ bool is_var = dimension.cell_val_num () == TILEDB_VAR_NUM || dimension.type () == TILEDB_STRING_ASCII ||
82+ dimension.type () == TILEDB_STRING_UTF8;
16383
16484 buffers_.insert (
16585 std::make_pair (
16686 name,
16787 std::make_shared<CArrayColumnBuffer>(
168- name, dim .type (), num_cells, column_budget, is_var, false , std::nullopt , false , mode)));
88+ name, dimension .type (), num_cells, column_budget, is_var, false , std::nullopt , false , mode)));
16989 }
17090 }
17191}
@@ -178,4 +98,12 @@ void ArrayBuffers::emplace(const std::string& name, std::shared_ptr<ColumnBuffer
17898 buffers_.emplace (name, buffer);
17999}
180100
101+ void ArrayBuffers::expand_buffers () {
102+ for (const auto & name : names_) {
103+ std::shared_ptr<ReadColumnBuffer> buffer = at<ReadColumnBuffer>(name);
104+ buffer->resize (
105+ buffer->max_size () * DEFAULT_BUFFER_EXPANSION_FACTOR,
106+ buffer->max_num_cells () * DEFAULT_BUFFER_EXPANSION_FACTOR);
107+ }
108+ }
181109} // namespace tiledbsoma
0 commit comments