Skip to content

Commit 1e29fc1

Browse files
Maxxensourcefrog
andauthored
1.2.0 followup (#428)
* Use ptr::write when writing uninitialized memory * Use smaller unsafe blocks * Rust BindData/InitData can just use Rust types * Use unsafe blocks inside unsafe fns generated by macro This will prevent the macro generating a warning in edition 2024 * Fix unused import * Better safety docs for vtab methods Contrary to the previous docs, the instances passed to these functions are *not* initialized by the caller. Rather, the called function is responsible for writing into uninitialized memory. * Similar safety fixes in vtab tests * VTab::bind and init are now safe Rather than passing a pointer to a block of uninitialized memory, which can easily lead to UB, these functions now just return Rust objects. This improves #414 by reducing the amount of unsafe code needed from extensions. * vtab::Free is no longer needed BindInfo and InitInfo will be dropped in the usual way when freed by duckdb core. Any necessary destructors can be in Drop impls. * BindData and InitData should be Send+Sync It's not completely clear but it looks like the engine could run the table fn from multiple threads, so requiring this seems safer * Add a safe & typed interface to get bind_data * Also safely retrieve the init_data * Add unsafe blocks, rm unnecessary cast * clippy --------- Co-authored-by: Martin Pool <[email protected]>
1 parent 7848ebb commit 1e29fc1

File tree

6 files changed

+227
-294
lines changed

6 files changed

+227
-294
lines changed

crates/duckdb/examples/hello-ext-capi/main.rs

Lines changed: 22 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -4,78 +4,55 @@ extern crate libduckdb_sys;
44

55
use duckdb::{
66
core::{DataChunkHandle, Inserter, LogicalTypeHandle, LogicalTypeId},
7-
vtab::{BindInfo, Free, FunctionInfo, InitInfo, VTab},
7+
vtab::{BindInfo, FunctionInfo, InitInfo, VTab},
88
Connection, Result,
99
};
1010
use duckdb_loadable_macros::duckdb_entrypoint_c_api;
1111
use libduckdb_sys as ffi;
1212
use std::{
1313
error::Error,
14-
ffi::{c_char, CString},
14+
ffi::CString,
15+
sync::atomic::{AtomicBool, Ordering},
1516
};
1617

1718
#[repr(C)]
1819
struct HelloBindData {
19-
name: *mut c_char,
20-
}
21-
22-
impl Free for HelloBindData {
23-
fn free(&mut self) {
24-
unsafe {
25-
if self.name.is_null() {
26-
return;
27-
}
28-
drop(CString::from_raw(self.name));
29-
}
30-
}
20+
name: String,
3121
}
3222

3323
#[repr(C)]
3424
struct HelloInitData {
35-
done: bool,
25+
done: AtomicBool,
3626
}
3727

3828
struct HelloVTab;
3929

40-
impl Free for HelloInitData {}
41-
4230
impl VTab for HelloVTab {
4331
type InitData = HelloInitData;
4432
type BindData = HelloBindData;
4533

46-
unsafe fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn std::error::Error>> {
34+
fn bind(bind: &BindInfo) -> Result<Self::BindData, Box<dyn std::error::Error>> {
4735
bind.add_result_column("column0", LogicalTypeHandle::from(LogicalTypeId::Varchar));
48-
let param = bind.get_parameter(0).to_string();
49-
unsafe {
50-
(*data).name = CString::new(param).unwrap().into_raw();
51-
}
52-
Ok(())
36+
let name = bind.get_parameter(0).to_string();
37+
Ok(HelloBindData { name })
5338
}
5439

55-
unsafe fn init(_: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn std::error::Error>> {
56-
unsafe {
57-
(*data).done = false;
58-
}
59-
Ok(())
40+
fn init(_: &InitInfo) -> Result<Self::InitData, Box<dyn std::error::Error>> {
41+
Ok(HelloInitData {
42+
done: AtomicBool::new(false),
43+
})
6044
}
6145

62-
unsafe fn func(func: &FunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box<dyn std::error::Error>> {
63-
let init_info = func.get_init_data::<HelloInitData>();
64-
let bind_info = func.get_bind_data::<HelloBindData>();
65-
66-
unsafe {
67-
if (*init_info).done {
68-
output.set_len(0);
69-
} else {
70-
(*init_info).done = true;
71-
let vector = output.flat_vector(0);
72-
let name = CString::from_raw((*bind_info).name);
73-
let result = CString::new(format!("Hello {}", name.to_str()?))?;
74-
// Can't consume the CString
75-
(*bind_info).name = CString::into_raw(name);
76-
vector.insert(0, result);
77-
output.set_len(1);
78-
}
46+
fn func(func: &FunctionInfo<Self>, output: &mut DataChunkHandle) -> Result<(), Box<dyn std::error::Error>> {
47+
let init_data = func.get_init_data();
48+
let bind_data = func.get_bind_data();
49+
if init_data.done.swap(true, Ordering::Relaxed) {
50+
output.set_len(0);
51+
} else {
52+
let vector = output.flat_vector(0);
53+
let result = CString::new(format!("Hello {}", bind_data.name))?;
54+
vector.insert(0, result);
55+
output.set_len(1);
7956
}
8057
Ok(())
8158
}

crates/duckdb/examples/hello-ext/main.rs

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,61 +6,51 @@ extern crate libduckdb_sys;
66

77
use duckdb::{
88
core::{DataChunkHandle, Inserter, LogicalTypeHandle, LogicalTypeId},
9-
vtab::{BindInfo, Free, FunctionInfo, InitInfo, VTab},
9+
vtab::{BindInfo, FunctionInfo, InitInfo, VTab},
1010
Connection, Result,
1111
};
1212
use duckdb_loadable_macros::duckdb_entrypoint;
1313
use libduckdb_sys as ffi;
1414
use std::{
1515
error::Error,
1616
ffi::{c_char, c_void, CString},
17-
ptr,
17+
sync::atomic::{AtomicBool, Ordering},
1818
};
1919

2020
struct HelloBindData {
2121
name: String,
2222
}
2323

24-
impl Free for HelloBindData {}
25-
2624
struct HelloInitData {
27-
done: bool,
25+
done: AtomicBool,
2826
}
2927

3028
struct HelloVTab;
3129

32-
impl Free for HelloInitData {}
33-
3430
impl VTab for HelloVTab {
3531
type InitData = HelloInitData;
3632
type BindData = HelloBindData;
3733

38-
unsafe fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn std::error::Error>> {
34+
fn bind(bind: &BindInfo) -> Result<Self::BindData, Box<dyn std::error::Error>> {
3935
bind.add_result_column("column0", LogicalTypeHandle::from(LogicalTypeId::Varchar));
4036
let name = bind.get_parameter(0).to_string();
41-
unsafe {
42-
ptr::write(data, HelloBindData { name });
43-
}
44-
Ok(())
37+
Ok(HelloBindData { name })
4538
}
4639

47-
unsafe fn init(_: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn std::error::Error>> {
48-
unsafe {
49-
ptr::write(data, HelloInitData { done: false });
50-
}
51-
Ok(())
40+
fn init(_: &InitInfo) -> Result<Self::InitData, Box<dyn std::error::Error>> {
41+
Ok(HelloInitData {
42+
done: AtomicBool::new(false),
43+
})
5244
}
5345

54-
unsafe fn func(func: &FunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box<dyn std::error::Error>> {
55-
let init_info = unsafe { func.get_init_data::<HelloInitData>().as_mut().unwrap() };
56-
let bind_info = unsafe { func.get_bind_data::<HelloBindData>().as_mut().unwrap() };
57-
58-
if init_info.done {
46+
fn func(func: &FunctionInfo<Self>, output: &mut DataChunkHandle) -> Result<(), Box<dyn std::error::Error>> {
47+
let init_data = func.get_init_data();
48+
let bind_data = func.get_bind_data();
49+
if init_data.done.swap(true, Ordering::Relaxed) {
5950
output.set_len(0);
6051
} else {
61-
init_info.done = true;
6252
let vector = output.flat_vector(0);
63-
let result = CString::new(format!("Hello {}", bind_info.name))?;
53+
let result = CString::new(format!("Hello {}", bind_data.name))?;
6454
vector.insert(0, result);
6555
output.set_len(1);
6656
}

crates/duckdb/src/vtab/arrow.rs

Lines changed: 23 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
use super::{BindInfo, DataChunkHandle, Free, FunctionInfo, InitInfo, LogicalTypeHandle, LogicalTypeId, VTab};
2-
use std::ptr::null_mut;
1+
use super::{BindInfo, DataChunkHandle, FunctionInfo, InitInfo, LogicalTypeHandle, LogicalTypeId, VTab};
2+
use std::sync::{atomic::AtomicBool, Mutex};
33

44
use crate::core::{ArrayVector, FlatVector, Inserter, ListVector, StructVector, Vector};
55
use arrow::{
@@ -24,28 +24,15 @@ use num::{cast::AsPrimitive, ToPrimitive};
2424
/// A pointer to the Arrow record batch for the table function.
2525
#[repr(C)]
2626
pub struct ArrowBindData {
27-
rb: *mut RecordBatch,
28-
}
29-
30-
impl Free for ArrowBindData {
31-
fn free(&mut self) {
32-
unsafe {
33-
if self.rb.is_null() {
34-
return;
35-
}
36-
drop(Box::from_raw(self.rb));
37-
}
38-
}
27+
rb: Mutex<RecordBatch>,
3928
}
4029

4130
/// Keeps track of whether the Arrow record batch has been consumed.
4231
#[repr(C)]
4332
pub struct ArrowInitData {
44-
done: bool,
33+
done: AtomicBool,
4534
}
4635

47-
impl Free for ArrowInitData {}
48-
4936
/// The Arrow table function.
5037
pub struct ArrowVTab;
5138

@@ -76,14 +63,14 @@ impl VTab for ArrowVTab {
7663
type BindData = ArrowBindData;
7764
type InitData = ArrowInitData;
7865

79-
unsafe fn bind(bind: &BindInfo, data: *mut ArrowBindData) -> Result<(), Box<dyn std::error::Error>> {
80-
(*data).rb = null_mut();
66+
fn bind(bind: &BindInfo) -> Result<Self::BindData, Box<dyn std::error::Error>> {
8167
let param_count = bind.get_parameter_count();
8268
if param_count != 2 {
8369
return Err(format!("Bad param count: {param_count}, expected 2").into());
8470
}
8571
let array = bind.get_parameter(0).to_int64();
8672
let schema = bind.get_parameter(1).to_int64();
73+
8774
unsafe {
8875
let rb = address_to_arrow_record_batch(array as usize, schema as usize);
8976
for f in rb.schema().fields() {
@@ -92,32 +79,29 @@ impl VTab for ArrowVTab {
9279
let logical_type = to_duckdb_logical_type(data_type)?;
9380
bind.add_result_column(name, logical_type);
9481
}
95-
(*data).rb = Box::into_raw(Box::new(rb));
82+
83+
Ok(ArrowBindData { rb: Mutex::new(rb) })
9684
}
97-
Ok(())
9885
}
9986

100-
unsafe fn init(_: &InitInfo, data: *mut ArrowInitData) -> Result<(), Box<dyn std::error::Error>> {
101-
unsafe {
102-
(*data).done = false;
103-
}
104-
Ok(())
87+
fn init(_: &InitInfo) -> Result<Self::InitData, Box<dyn std::error::Error>> {
88+
Ok(ArrowInitData {
89+
done: AtomicBool::new(false),
90+
})
10591
}
10692

107-
unsafe fn func(func: &FunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box<dyn std::error::Error>> {
108-
let init_info = func.get_init_data::<ArrowInitData>();
109-
let bind_info = func.get_bind_data::<ArrowBindData>();
110-
unsafe {
111-
if (*init_info).done {
112-
output.set_len(0);
113-
} else {
114-
let rb = Box::from_raw((*bind_info).rb);
115-
(*bind_info).rb = null_mut(); // erase ref in case of failure in record_batch_to_duckdb_data_chunk
116-
record_batch_to_duckdb_data_chunk(&rb, output)?;
117-
(*bind_info).rb = Box::into_raw(rb);
118-
(*init_info).done = true;
119-
}
93+
fn func(func: &FunctionInfo<Self>, output: &mut DataChunkHandle) -> Result<(), Box<dyn std::error::Error>> {
94+
let init_info = func.get_init_data();
95+
let bind_info = func.get_bind_data();
96+
97+
if init_info.done.load(std::sync::atomic::Ordering::Relaxed) {
98+
output.set_len(0);
99+
} else {
100+
let rb = bind_info.rb.lock().unwrap();
101+
record_batch_to_duckdb_data_chunk(&rb, output)?;
102+
init_info.done.store(true, std::sync::atomic::Ordering::Relaxed);
120103
}
104+
121105
Ok(())
122106
}
123107

0 commit comments

Comments
 (0)