Skip to content

Commit 7c9db9a

Browse files
authored
chore: Attribute::get_enumeration_name returns a reference rather than a copy in order to add Rust binding (#5567)
Differences in copy and move semantics between Rust and C++ mean that they are generally not very good at passing values across the boundary - most of the time it is necessary to pass references instead. #5566 intends to call `Attribute::get_enumeration_name` from Rust. Prior to these changes, that function returns `std::optional<std::string>`, which cannot be passed across the boundary (neither `std::optional` nor `std::string` can be). We can either pass a pointer to a string, or pass a string contained within a smart pointer. To avoid the additional memory allocations required for both copying the `std::string` and placing it in a `unique_ptr`, we choose the former. However, `std::optional` does not naturally support references, so we cannot do `std::optional<std::string&>`. Instead we change the return type to `std::optional<std::reference_wrapper<std::string>>`. In addition to enabling passing the result of this function to Rust without allocating additional memory, this also avoids making additional copies of the string. This is not likely to matter from a performance perspective, but is still nice! --- TYPE: NO_HISTORY DESC: Rust binding for `Attribute::get_enumeration_name`
1 parent 555916f commit 7c9db9a

File tree

15 files changed

+120
-31
lines changed

15 files changed

+120
-31
lines changed

cmake/oxidize.cmake

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,10 +306,9 @@ if (TILEDB_RUST)
306306
set(bridge_h_src "${bridge_generated_dir}/src/${bridge}.h")
307307
set(bridge_cc_src "${bridge_generated_dir}/src/${bridge}.cc")
308308

309+
string(REPLACE "-" "_" bridge_sanitize_relpath ${bridge_sanitize_relpath})
309310
set(bridge_h_dst "${OXIDIZE_INCLUDE_DIR}/tiledb/oxidize/${bridge_sanitize_relpath}.h")
310311
set(bridge_cc_dst "${OXIDIZE_INCLUDE_DIR}/tiledb/oxidize/${bridge_sanitize_relpath}.cc")
311-
string(REPLACE "-" "_" bridge_h_dst ${bridge_h_dst})
312-
string(REPLACE "-" "_" bridge_cc_dst ${bridge_cc_dst})
313312

314313
cmake_path(GET bridge_h_dst PARENT_PATH bridge_sanitize_dirname)
315314

test/src/unit-enumerations.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,7 +1244,7 @@ TEST_CASE_METHOD(
12441244
auto enmr_name = schema->attribute("attr1")->get_enumeration_name();
12451245
REQUIRE(enmr_name.has_value());
12461246

1247-
auto enmr_path = schema->get_enumeration_path_name(enmr_name.value());
1247+
auto enmr_path = schema->get_enumeration_path_name(enmr_name.value().get());
12481248

12491249
auto loaded =
12501250
ad->load_enumerations_from_paths({enmr_path}, enc_key_, memory_tracker_);
@@ -1292,7 +1292,7 @@ TEST_CASE_METHOD(
12921292
auto ad = get_array_directory();
12931293

12941294
auto enmr_name = schema->attribute("attr1")->get_enumeration_name();
1295-
auto enmr_path = schema->get_enumeration_path_name(enmr_name.value());
1295+
auto enmr_path = schema->get_enumeration_path_name(enmr_name.value().get());
12961296

12971297
memory_tracker_->set_budget(memory_tracker_->get_memory_usage() + 1);
12981298

test/support/src/helpers.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1636,7 +1636,13 @@ void schema_equiv(
16361636
CHECK(a->name() == b->name());
16371637
CHECK(a->type() == b->type());
16381638
CHECK(a->nullable() == b->nullable());
1639-
CHECK(a->get_enumeration_name() == b->get_enumeration_name());
1639+
1640+
const auto a_enmr = a->get_enumeration_name(),
1641+
b_enmr = b->get_enumeration_name();
1642+
CHECK(a_enmr.has_value() == b_enmr.has_value());
1643+
if (a_enmr.has_value() && b_enmr.has_value()) {
1644+
CHECK(a_enmr.value().get() == b_enmr.value().get());
1645+
}
16401646
}
16411647
CHECK(schema1.capacity() == schema2.capacity());
16421648
CHECK(schema1.cell_order() == schema2.cell_order());

tiledb/api/c_api/array_schema/array_schema_api.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,9 +250,9 @@ capi_return_t tiledb_array_schema_get_enumeration_from_attribute_name(
250250
return TILEDB_OK;
251251
}
252252

253-
array_schema->load_enumeration(ctx, enumeration_name->c_str());
253+
array_schema->load_enumeration(ctx, enumeration_name->get().c_str());
254254

255-
auto ptr = array_schema->get_enumeration(enumeration_name->c_str());
255+
auto ptr = array_schema->get_enumeration(enumeration_name->get().c_str());
256256
*enumeration = tiledb_enumeration_handle_t::make_handle(ptr);
257257

258258
return TILEDB_OK;

tiledb/api/c_api/attribute/attribute_api_internal.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,12 @@ struct tiledb_attribute_handle_t
187187
* Facade for `Attribute` function
188188
*/
189189
[[nodiscard]] std::optional<std::string> get_enumeration_name() const {
190-
return attr_->get_enumeration_name();
190+
const auto eref = attr_->get_enumeration_name();
191+
if (eref.has_value()) {
192+
return eref.value().get();
193+
} else {
194+
return std::nullopt;
195+
}
191196
};
192197
/**
193198
* Facade for `Attribute` function
Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,25 @@
11
#include "tiledb/oxidize/cxx-interface/cc/array_schema.h"
22

3-
namespace tiledb::oxidize {
4-
53
using namespace tiledb::sm;
64

5+
namespace tiledb::oxidize::sm {
6+
7+
namespace attribute {
8+
9+
const std::string* enumeration_name_cxx(const Attribute& attribute) {
10+
std::optional<std::reference_wrapper<const std::string>> e =
11+
attribute.get_enumeration_name();
12+
if (e.has_value()) {
13+
return &e.value().get();
14+
} else {
15+
return nullptr;
16+
}
17+
}
18+
19+
} // namespace attribute
20+
21+
namespace dimension {
22+
723
void set_domain(Dimension& dimension, rust::Slice<const uint8_t> domain) {
824
dimension.set_domain(static_cast<const void*>(domain.data()));
925
}
@@ -12,4 +28,6 @@ void set_tile_extent(Dimension& dimension, rust::Slice<const uint8_t> domain) {
1228
dimension.set_tile_extent(static_cast<const void*>(domain.data()));
1329
}
1430

15-
} // namespace tiledb::oxidize
31+
} // namespace dimension
32+
33+
} // namespace tiledb::oxidize::sm

tiledb/oxidize/cxx-interface/cc/array_schema.h

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,27 @@
44
#include "tiledb/sm/array_schema/dimension.h"
55
#include "tiledb/sm/array_schema/domain.h"
66

7-
namespace tiledb::oxidize {
7+
namespace tiledb::oxidize::sm {
88

99
using namespace tiledb::sm;
1010

11+
namespace attribute {
12+
1113
using ConstAttribute = const Attribute;
12-
using ConstDimension = const Dimension;
14+
15+
const std::string* enumeration_name_cxx(const Attribute& attribute);
16+
17+
} // namespace attribute
18+
19+
namespace dimension {
20+
21+
using namespace tiledb::sm;
22+
23+
using ConstDimension = const tiledb::sm::Dimension;
1324

1425
void set_domain(Dimension& dimension, rust::Slice<const uint8_t> domain);
1526
void set_tile_extent(Dimension& dimension, rust::Slice<const uint8_t> domain);
1627

17-
} // namespace tiledb::oxidize
28+
} // namespace dimension
29+
30+
} // namespace tiledb::oxidize::sm

tiledb/oxidize/cxx-interface/src/sm/array_schema/mod.rs

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ mod ffi {
66
type Layout = crate::sm::enums::Layout;
77
}
88

9-
#[namespace = "tiledb::oxidize"]
9+
#[namespace = "tiledb::oxidize::sm::attribute"]
1010
unsafe extern "C++" {
1111
include!("tiledb/oxidize/cxx-interface/cc/array_schema.h");
1212
type ConstAttribute;
@@ -26,6 +26,9 @@ mod ffi {
2626
#[cxx_name = "type"]
2727
fn datatype(&self) -> Datatype;
2828

29+
#[namespace = "tiledb::oxidize::sm::attribute"]
30+
fn enumeration_name_cxx(attr: &Attribute) -> *const CxxString;
31+
2932
fn set_cell_val_num(self: Pin<&mut Attribute>, cell_val_num: u32);
3033
}
3134

@@ -42,10 +45,10 @@ mod ffi {
4245
#[cxx_name = "type"]
4346
fn datatype(&self) -> Datatype;
4447

45-
#[namespace = "tiledb::oxidize"]
48+
#[namespace = "tiledb::oxidize::sm::dimension"]
4649
fn set_domain(dimension: Pin<&mut Dimension>, domain: &[u8]) -> Result<()>;
4750

48-
#[namespace = "tiledb::oxidize"]
51+
#[namespace = "tiledb::oxidize::sm::dimension"]
4952
fn set_tile_extent(dimension: Pin<&mut Dimension>, extent: &[u8]) -> Result<()>;
5053
}
5154

@@ -63,6 +66,19 @@ mod ffi {
6366
fn add_dimension(self: Pin<&mut Domain>, dim: SharedPtr<Dimension>);
6467
}
6568

69+
#[namespace = "tiledb::sm"]
70+
unsafe extern "C++" {
71+
include!("tiledb/sm/array_schema/enumeration.h");
72+
73+
type Enumeration;
74+
75+
#[cxx_name = "cell_val_num"]
76+
fn cell_val_num_cxx(&self) -> u32;
77+
78+
#[cxx_name = "type"]
79+
fn datatype(&self) -> Datatype;
80+
}
81+
6682
#[namespace = "tiledb::sm"]
6783
unsafe extern "C++" {
6884
include!("tiledb/sm/array_schema/array_schema.h");
@@ -117,7 +133,7 @@ use std::str::Utf8Error;
117133

118134
use num_traits::ToBytes;
119135

120-
pub use ffi::{ArraySchema, Attribute, ConstAttribute, Datatype, Dimension, Domain};
136+
pub use ffi::{ArraySchema, Attribute, ConstAttribute, Datatype, Dimension, Domain, Enumeration};
121137

122138
#[derive(Debug)]
123139
pub enum CellValNum {
@@ -222,6 +238,19 @@ impl Attribute {
222238
// SAFETY: non-zero would have been validated by the ArraySchema
223239
CellValNum::from_cxx(cxx).unwrap()
224240
}
241+
242+
pub fn enumeration_name_cxx(&self) -> *const cxx::CxxString {
243+
ffi::enumeration_name_cxx(self)
244+
}
245+
246+
pub fn enumeration_name(&self) -> Option<Result<&str, Utf8Error>> {
247+
let ptr = self.enumeration_name_cxx();
248+
if ptr.is_null() {
249+
return None;
250+
}
251+
let cxx = unsafe { &*ptr };
252+
Some(cxx.to_str())
253+
}
225254
}
226255

227256
impl Debug for Attribute {
@@ -274,6 +303,22 @@ impl Field<'_> {
274303
Self::Dimension(_) => false,
275304
}
276305
}
306+
307+
pub fn enumeration_name(&self) -> Option<Result<&str, Utf8Error>> {
308+
match self {
309+
Self::Attribute(a) => a.enumeration_name(),
310+
Self::Dimension(_) => None,
311+
}
312+
}
313+
}
314+
315+
impl Enumeration {
316+
pub fn cell_val_num(&self) -> CellValNum {
317+
let cxx = self.cell_val_num_cxx();
318+
319+
// SAFETY: non-zero would have been validated by the ArraySchema
320+
CellValNum::from_cxx(cxx).unwrap()
321+
}
277322
}
278323

279324
impl ArraySchema {

tiledb/oxidize/test-support/cxx-interface/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ mod ffi {
3232
type ResultTile = tiledb_cxx_interface::sm::query::readers::ResultTile;
3333
}
3434

35-
#[namespace = "tiledb::oxidize"]
35+
#[namespace = "tiledb::oxidize::sm::attribute"]
3636
extern "C++" {
3737
type ConstAttribute = tiledb_cxx_interface::sm::array_schema::ConstAttribute;
3838
}

tiledb/sm/array_schema/array_schema.cc

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -806,13 +806,13 @@ void ArraySchema::add_attribute(
806806

807807
auto enmr_name = attr->get_enumeration_name();
808808
if (enmr_name.has_value()) {
809-
// The referenced enumeration must exist when the attribut is added
810-
auto iter = enumeration_map_.find(enmr_name.value());
809+
// The referenced enumeration must exist when the attribute is added
810+
auto iter = enumeration_map_.find(enmr_name.value().get());
811811
if (iter == enumeration_map_.end()) {
812812
std::string msg =
813813
"Cannot add attribute; Attribute refers to an "
814814
"unknown enumeration named '" +
815-
enmr_name.value() + "'.";
815+
enmr_name.value().get() + "'.";
816816
throw ArraySchemaException(msg);
817817
}
818818

@@ -835,14 +835,15 @@ void ArraySchema::add_attribute(
835835
auto enmr = get_enumeration(enmr_name.value());
836836
if (enmr == nullptr) {
837837
throw ArraySchemaException(
838-
"Cannot add attribute referencing enumeration '" + enmr_name.value() +
838+
"Cannot add attribute referencing enumeration '" +
839+
enmr_name.value().get() +
839840
"' as the enumeration has not been loaded.");
840841
}
841842

842843
// The +1 here is because of 0 being a valid index into the enumeration.
843844
if (datatype_max_integral_value(attr->type()) <= enmr->elem_count()) {
844845
throw ArraySchemaException(
845-
"Unable to use enumeration '" + enmr_name.value() +
846+
"Unable to use enumeration '" + enmr_name.value().get() +
846847
"' for attribute '" + attr->name() +
847848
"' because the attribute's type is not large enough to represent "
848849
"all enumeration values.");
@@ -1147,7 +1148,7 @@ void ArraySchema::drop_enumeration(const std::string& enmr_name) {
11471148
if (!attr_enmr_name.has_value()) {
11481149
continue;
11491150
}
1150-
if (attr_enmr_name.value() == enmr_name) {
1151+
if (attr_enmr_name.value().get() == enmr_name) {
11511152
throw ArraySchemaException(
11521153
"Unable to drop enumeration '" + enmr_name + "' as it is used by" +
11531154
" attribute '" + attr->name() + "'.");

0 commit comments

Comments
 (0)