Skip to content

Commit ff7d11d

Browse files
Make the ArrowVTab module public (#259)
* Make the ArrowVTab module public * chore: clippy lint fixes * Add unsafe to HelloWithNamedVTab * Update dependencies --------- Co-authored-by: Mitch <[email protected]>
1 parent 34a6448 commit ff7d11d

File tree

5 files changed

+61
-23
lines changed

5 files changed

+61
-23
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ memchr = "2.3"
5252
uuid = { version = "1.0", optional = true }
5353
smallvec = "1.6.1"
5454
cast = { version = "0.3", features = ["std"] }
55-
arrow = { version = "49", default-features = false, features = ["prettyprint", "ffi"] }
55+
arrow = { version = "50", default-features = false, features = ["prettyprint", "ffi"] }
5656
rust_decimal = "1.14"
5757
strum = { version = "0.25", features = ["derive"] }
5858
r2d2 = { version = "0.8.9", optional = true }

examples/hello-ext/main.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ impl VTab for HelloVTab {
4242
type InitData = HelloInitData;
4343
type BindData = HelloBindData;
4444

45-
fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn std::error::Error>> {
45+
unsafe fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn std::error::Error>> {
4646
bind.add_result_column("column0", LogicalType::new(LogicalTypeId::Varchar));
4747
let param = bind.get_parameter(0).to_string();
4848
unsafe {
@@ -51,14 +51,14 @@ impl VTab for HelloVTab {
5151
Ok(())
5252
}
5353

54-
fn init(_: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn std::error::Error>> {
54+
unsafe fn init(_: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn std::error::Error>> {
5555
unsafe {
5656
(*data).done = false;
5757
}
5858
Ok(())
5959
}
6060

61-
fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
61+
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
6262
let init_info = func.get_init_data::<HelloInitData>();
6363
let bind_info = func.get_bind_data::<HelloBindData>();
6464

src/vtab/arrow.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ use arrow::{
1818

1919
use num::cast::AsPrimitive;
2020

21+
/// A pointer to the Arrow record batch for the table function.
2122
#[repr(C)]
22-
struct ArrowBindData {
23+
pub struct ArrowBindData {
2324
rb: *mut RecordBatch,
2425
}
2526

@@ -34,14 +35,16 @@ impl Free for ArrowBindData {
3435
}
3536
}
3637

38+
/// Keeps track of whether the Arrow record batch has been consumed.
3739
#[repr(C)]
38-
struct ArrowInitData {
40+
pub struct ArrowInitData {
3941
done: bool,
4042
}
4143

4244
impl Free for ArrowInitData {}
4345

44-
struct ArrowVTab;
46+
/// The Arrow table function.
47+
pub struct ArrowVTab;
4548

4649
unsafe fn address_to_arrow_schema(address: usize) -> FFI_ArrowSchema {
4750
let ptr = address as *mut FFI_ArrowSchema;
@@ -70,7 +73,7 @@ impl VTab for ArrowVTab {
7073
type BindData = ArrowBindData;
7174
type InitData = ArrowInitData;
7275

73-
fn bind(bind: &BindInfo, data: *mut ArrowBindData) -> Result<(), Box<dyn std::error::Error>> {
76+
unsafe fn bind(bind: &BindInfo, data: *mut ArrowBindData) -> Result<(), Box<dyn std::error::Error>> {
7477
let param_count = bind.get_parameter_count();
7578
assert!(param_count == 2);
7679
let array = bind.get_parameter(0).to_int64();
@@ -88,14 +91,14 @@ impl VTab for ArrowVTab {
8891
Ok(())
8992
}
9093

91-
fn init(_: &InitInfo, data: *mut ArrowInitData) -> Result<(), Box<dyn std::error::Error>> {
94+
unsafe fn init(_: &InitInfo, data: *mut ArrowInitData) -> Result<(), Box<dyn std::error::Error>> {
9295
unsafe {
9396
(*data).done = false;
9497
}
9598
Ok(())
9699
}
97100

98-
fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
101+
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
99102
let init_info = func.get_init_data::<ArrowInitData>();
100103
let bind_info = func.get_bind_data::<ArrowBindData>();
101104
unsafe {

src/vtab/excel.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ impl VTab for ExcelVTab {
3333
type BindData = ExcelBindData;
3434
type InitData = ExcelInitData;
3535

36-
fn bind(bind: &BindInfo, data: *mut ExcelBindData) -> Result<(), Box<dyn std::error::Error>> {
36+
unsafe fn bind(bind: &BindInfo, data: *mut ExcelBindData) -> Result<(), Box<dyn std::error::Error>> {
3737
let param_count = bind.get_parameter_count();
3838
assert!(param_count == 2);
3939
let path = bind.get_parameter(0).to_string();
@@ -125,14 +125,14 @@ impl VTab for ExcelVTab {
125125
Ok(())
126126
}
127127

128-
fn init(_: &InitInfo, data: *mut ExcelInitData) -> Result<(), Box<dyn std::error::Error>> {
128+
unsafe fn init(_: &InitInfo, data: *mut ExcelInitData) -> Result<(), Box<dyn std::error::Error>> {
129129
unsafe {
130130
(*data).start = 1;
131131
}
132132
Ok(())
133133
}
134134

135-
fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
135+
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
136136
let init_info = func.get_init_data::<ExcelInitData>();
137137
let bind_info = func.get_bind_data::<ExcelBindData>();
138138
unsafe {

src/vtab/mod.rs

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ mod logical_type;
99
mod value;
1010
mod vector;
1111

12+
/// The duckdb Arrow table function interface
1213
#[cfg(feature = "vtab-arrow")]
13-
mod arrow;
14+
pub mod arrow;
1415
#[cfg(feature = "vtab-arrow")]
1516
pub use self::arrow::{
1617
arrow_arraydata_to_query_params, arrow_ffi_to_query_params, arrow_recordbatch_to_query_params,
@@ -66,11 +67,45 @@ pub trait VTab: Sized {
6667
type BindData: Sized + Free;
6768

6869
/// Bind data to the table function
69-
fn bind(bind: &BindInfo, data: *mut Self::BindData) -> Result<(), Box<dyn std::error::Error>>;
70+
///
71+
/// # Safety
72+
///
73+
/// This function is unsafe because it dereferences raw pointers (`data`) and manipulates the memory directly.
74+
/// The caller must ensure that:
75+
///
76+
/// - The `data` pointer is valid and points to a properly initialized `BindData` instance.
77+
/// - The lifetime of `data` must outlive the execution of `bind` to avoid dangling pointers, especially since
78+
/// `bind` does not take ownership of `data`.
79+
/// - Concurrent access to `data` (if applicable) must be properly synchronized.
80+
/// - The `bind` object must be valid and correctly initialized.
81+
unsafe fn bind(bind: &BindInfo, data: *mut Self::BindData) -> Result<(), Box<dyn std::error::Error>>;
7082
/// Initialize the table function
71-
fn init(init: &InitInfo, data: *mut Self::InitData) -> Result<(), Box<dyn std::error::Error>>;
83+
///
84+
/// # Safety
85+
///
86+
/// This function is unsafe because it performs raw pointer dereferencing on the `data` argument.
87+
/// The caller is responsible for ensuring that:
88+
///
89+
/// - The `data` pointer is non-null and points to a valid `InitData` instance.
90+
/// - There is no data race when accessing `data`, meaning if `data` is accessed from multiple threads,
91+
/// proper synchronization is required.
92+
/// - The lifetime of `data` extends beyond the scope of this call to avoid use-after-free errors.
93+
unsafe fn init(init: &InitInfo, data: *mut Self::InitData) -> Result<(), Box<dyn std::error::Error>>;
7294
/// The actual function
73-
fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>>;
95+
///
96+
/// # Safety
97+
///
98+
/// This function is unsafe because it:
99+
///
100+
/// - Dereferences multiple raw pointers (`func` to access `init_info` and `bind_info`).
101+
///
102+
/// The caller must ensure that:
103+
///
104+
/// - All pointers (`func`, `output`, internal `init_info`, and `bind_info`) are valid and point to the expected types of data structures.
105+
/// - The `init_info` and `bind_info` data pointed to remains valid and is not freed until after this function completes.
106+
/// - No other threads are concurrently mutating the data pointed to by `init_info` and `bind_info` without proper synchronization.
107+
/// - The `output` parameter is correctly initialized and can safely be written to.
108+
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>>;
74109
/// Does the table function support pushdown
75110
/// default is false
76111
fn supports_pushdown() -> bool {
@@ -197,7 +232,7 @@ mod test {
197232
type InitData = HelloInitData;
198233
type BindData = HelloBindData;
199234

200-
fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn std::error::Error>> {
235+
unsafe fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn std::error::Error>> {
201236
bind.add_result_column("column0", LogicalType::new(LogicalTypeId::Varchar));
202237
let param = bind.get_parameter(0).to_string();
203238
unsafe {
@@ -206,14 +241,14 @@ mod test {
206241
Ok(())
207242
}
208243

209-
fn init(_: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn std::error::Error>> {
244+
unsafe fn init(_: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn std::error::Error>> {
210245
unsafe {
211246
(*data).done = false;
212247
}
213248
Ok(())
214249
}
215250

216-
fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
251+
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
217252
let init_info = func.get_init_data::<HelloInitData>();
218253
let bind_info = func.get_bind_data::<HelloBindData>();
219254

@@ -244,7 +279,7 @@ mod test {
244279
type InitData = HelloInitData;
245280
type BindData = HelloBindData;
246281

247-
fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn Error>> {
282+
unsafe fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn Error>> {
248283
bind.add_result_column("column0", LogicalType::new(LogicalTypeId::Varchar));
249284
let param = bind.get_named_parameter("name").unwrap().to_string();
250285
assert!(bind.get_named_parameter("unknown_name").is_none());
@@ -254,11 +289,11 @@ mod test {
254289
Ok(())
255290
}
256291

257-
fn init(init_info: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn Error>> {
292+
unsafe fn init(init_info: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn Error>> {
258293
HelloVTab::init(init_info, data)
259294
}
260295

261-
fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn Error>> {
296+
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn Error>> {
262297
HelloVTab::func(func, output)
263298
}
264299

0 commit comments

Comments
 (0)