Skip to content

Commit cd544ce

Browse files
feat(rust): propagate SEA manifest metadata through Arrow schema (#353)
## Summary - Propagate SEA manifest column metadata (type_name, type_text, type_precision, type_scale, type_interval_type) through the Arrow C Data Interface FFI boundary as `databricks.*` field-level key-value metadata - Enables the C++ ODBC driver to read server-provided metadata instead of reverse-engineering it from Arrow type IDs — fixing systematic diffs in nullable, precision, scale, display_size, octet_length, and type_name - All paths that export Arrow streams (Statement::execute, metadata FFI functions) now carry the manifest through ## Changes | File | Change | |------|--------| | `src/types/sea.rs` | Add `type_precision`, `type_scale`, `type_interval_type` to `ColumnInfo` | | `src/reader/mod.rs` | Add `metadata_keys` constants + `augment_schema_with_manifest()` | | `src/reader/mod.rs` | Update `ResultReaderAdapter::new()` to accept optional manifest | | `src/client/mod.rs` | Add `manifest: Option<ResultManifest>` to `ExecuteResult` | | `src/client/sea.rs` | Pass `response.manifest` through to `ExecuteResult` | | `src/statement.rs` | Pass manifest to `ResultReaderAdapter::new()` | | `src/ffi/metadata.rs` | Pass manifest through `export_reader()` for all metadata FFI functions | ## Test plan - [x] Unit tests for `ColumnInfo` deserialization with/without optional fields - [x] Unit tests for `augment_schema_with_manifest` (basic types, DECIMAL precision/scale, INTERVAL, missing ColumnInfo, preserves existing metadata) - [x] Unit tests for `ResultReaderAdapter` with and without manifest - [x] E2E test (`metadata_propagation_test` example) verifying metadata flows through for INT, LONG, STRING, BOOLEAN, DOUBLE, FLOAT, SHORT, BYTE, DECIMAL(10,2), DECIMAL(18,5), DECIMAL(38,0), DATE, TIMESTAMP, ARRAY, MAP, STRUCT, BINARY - [ ] ODBC cross-driver comparator (after C++ consumer PR lands) This pull request was AI-assisted by Isaac.
1 parent 140868b commit cd544ce

File tree

10 files changed

+1111
-24
lines changed

10 files changed

+1111
-24
lines changed

rust/docs/designs/result-metadata-propagation.md

Lines changed: 462 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
// Copyright (c) 2025 ADBC Drivers Contributors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
//! E2E test for result metadata propagation from SEA manifest to Arrow schema.
16+
//!
17+
//! Verifies that `databricks.*` field-level metadata (type_name, type_text,
18+
//! type_precision, type_scale) is attached to Arrow fields after query execution.
19+
//!
20+
//! Run with:
21+
//! ```bash
22+
//! cargo run --example metadata_propagation_test
23+
//! ```
24+
25+
use adbc_core::options::{OptionDatabase, OptionValue};
26+
use adbc_core::Connection as ConnectionTrait;
27+
use adbc_core::Database as DatabaseTrait;
28+
use adbc_core::Driver as DriverTrait;
29+
use adbc_core::Optionable;
30+
use adbc_core::Statement as StatementTrait;
31+
use arrow_array::RecordBatchReader;
32+
use databricks_adbc::Driver;
33+
34+
const DATABRICKS_TYPE_NAME: &str = "databricks.type_name";
35+
const DATABRICKS_TYPE_TEXT: &str = "databricks.type_text";
36+
const DATABRICKS_TYPE_PRECISION: &str = "databricks.type_precision";
37+
const DATABRICKS_TYPE_SCALE: &str = "databricks.type_scale";
38+
39+
fn main() {
40+
let host =
41+
std::env::var("DATABRICKS_HOST").expect("DATABRICKS_HOST environment variable required");
42+
let http_path = std::env::var("DATABRICKS_HTTP_PATH")
43+
.expect("DATABRICKS_HTTP_PATH environment variable required");
44+
let token =
45+
std::env::var("DATABRICKS_TOKEN").expect("DATABRICKS_TOKEN environment variable required");
46+
47+
let mut driver = Driver::new();
48+
let mut db = driver.new_database().expect("Failed to create database");
49+
50+
db.set_option(OptionDatabase::Uri, OptionValue::String(host))
51+
.expect("Failed to set uri");
52+
db.set_option(
53+
OptionDatabase::Other("databricks.http_path".into()),
54+
OptionValue::String(http_path),
55+
)
56+
.expect("Failed to set http_path");
57+
db.set_option(
58+
OptionDatabase::Other("databricks.auth.type".into()),
59+
OptionValue::String("access_token".to_string()),
60+
)
61+
.expect("Failed to set auth type");
62+
db.set_option(
63+
OptionDatabase::Other("databricks.access_token".into()),
64+
OptionValue::String(token),
65+
)
66+
.expect("Failed to set access_token");
67+
68+
let mut conn = db.new_connection().expect("Failed to create connection");
69+
70+
let mut all_passed = true;
71+
72+
// Test 1: Basic types
73+
println!("=== Test 1: Basic Types ===");
74+
all_passed &= run_test(
75+
&mut conn,
76+
r#"SELECT
77+
CAST(1 AS INT) as int_col,
78+
CAST(100 AS BIGINT) as bigint_col,
79+
'hello' as string_col,
80+
TRUE as bool_col,
81+
CAST(3.14 AS DOUBLE) as double_col,
82+
CAST(1.5 AS FLOAT) as float_col,
83+
CAST(42 AS SMALLINT) as smallint_col,
84+
CAST(7 AS TINYINT) as tinyint_col
85+
"#,
86+
&[
87+
("int_col", "INT", "INT", None, None),
88+
("bigint_col", "LONG", "BIGINT", None, None),
89+
("string_col", "STRING", "STRING", None, None),
90+
("bool_col", "BOOLEAN", "BOOLEAN", None, None),
91+
("double_col", "DOUBLE", "DOUBLE", None, None),
92+
("float_col", "FLOAT", "FLOAT", None, None),
93+
("smallint_col", "SHORT", "SMALLINT", None, None),
94+
("tinyint_col", "BYTE", "TINYINT", None, None),
95+
],
96+
);
97+
98+
// Test 2: DECIMAL with precision and scale
99+
println!("\n=== Test 2: DECIMAL Types ===");
100+
all_passed &= run_test(
101+
&mut conn,
102+
r#"SELECT
103+
CAST(1.23 AS DECIMAL(10,2)) as dec_10_2,
104+
CAST(99999.99999 AS DECIMAL(18,5)) as dec_18_5,
105+
CAST(0 AS DECIMAL(38,0)) as dec_38_0
106+
"#,
107+
&[
108+
(
109+
"dec_10_2",
110+
"DECIMAL",
111+
"DECIMAL(10,2)",
112+
Some("10"),
113+
Some("2"),
114+
),
115+
(
116+
"dec_18_5",
117+
"DECIMAL",
118+
"DECIMAL(18,5)",
119+
Some("18"),
120+
Some("5"),
121+
),
122+
(
123+
"dec_38_0",
124+
"DECIMAL",
125+
"DECIMAL(38,0)",
126+
Some("38"),
127+
Some("0"),
128+
),
129+
],
130+
);
131+
132+
// Test 3: Date/Time types
133+
println!("\n=== Test 3: Date/Time Types ===");
134+
all_passed &= run_test(
135+
&mut conn,
136+
r#"SELECT
137+
CURRENT_DATE() as date_col,
138+
CURRENT_TIMESTAMP() as timestamp_col
139+
"#,
140+
&[
141+
("date_col", "DATE", "DATE", None, None),
142+
("timestamp_col", "TIMESTAMP", "TIMESTAMP", None, None),
143+
],
144+
);
145+
146+
// Test 4: Complex types
147+
println!("\n=== Test 4: Complex Types ===");
148+
all_passed &= run_test(
149+
&mut conn,
150+
r#"SELECT
151+
ARRAY(1, 2, 3) as array_col,
152+
MAP('a', 1, 'b', 2) as map_col,
153+
NAMED_STRUCT('x', 1, 'y', 'hello') as struct_col
154+
"#,
155+
&[
156+
("array_col", "ARRAY", "ARRAY<INT>", None, None),
157+
("map_col", "MAP", "MAP<STRING, INT>", None, None),
158+
(
159+
"struct_col",
160+
"STRUCT",
161+
"STRUCT<x: INT NOT NULL, y: STRING NOT NULL>",
162+
None,
163+
None,
164+
),
165+
],
166+
);
167+
168+
// Test 5: BINARY type
169+
println!("\n=== Test 5: BINARY Type ===");
170+
all_passed &= run_test(
171+
&mut conn,
172+
r#"SELECT CAST('bytes' AS BINARY) as binary_col"#,
173+
&[("binary_col", "BINARY", "BINARY", None, None)],
174+
);
175+
176+
println!("\n========================================");
177+
if all_passed {
178+
println!("ALL TESTS PASSED");
179+
} else {
180+
println!("SOME TESTS FAILED");
181+
std::process::exit(1);
182+
}
183+
}
184+
185+
/// Expected column metadata: (field_name, type_name, type_text, precision, scale)
186+
type ExpectedColumn<'a> = (&'a str, &'a str, &'a str, Option<&'a str>, Option<&'a str>);
187+
188+
/// Run a query and verify that the Arrow field metadata matches expectations.
189+
fn run_test(conn: &mut impl ConnectionTrait, sql: &str, expected: &[ExpectedColumn<'_>]) -> bool {
190+
let mut stmt = conn.new_statement().expect("Failed to create statement");
191+
stmt.set_sql_query(sql).expect("Failed to set query");
192+
let reader = stmt.execute().expect("Failed to execute query");
193+
let schema = reader.schema();
194+
195+
let mut all_ok = true;
196+
197+
for (i, (name, exp_type_name, exp_type_text, exp_precision, exp_scale)) in
198+
expected.iter().enumerate()
199+
{
200+
let field = schema.field(i);
201+
let meta = field.metadata();
202+
203+
let got_name = meta.get(DATABRICKS_TYPE_NAME);
204+
let got_text = meta.get(DATABRICKS_TYPE_TEXT);
205+
let got_prec = meta.get(DATABRICKS_TYPE_PRECISION);
206+
let got_scale = meta.get(DATABRICKS_TYPE_SCALE);
207+
208+
let name_ok = got_name.map(|s| s.as_str()) == Some(exp_type_name);
209+
let text_ok = got_text.map(|s| s.as_str()) == Some(exp_type_text);
210+
let prec_ok = got_prec.map(|s| s.as_str()) == *exp_precision;
211+
let scale_ok = got_scale.map(|s| s.as_str()) == *exp_scale;
212+
213+
let ok = name_ok && text_ok && prec_ok && scale_ok;
214+
let status = if ok { "PASS" } else { "FAIL" };
215+
216+
println!(
217+
" [{status}] {name}: type_name={} type_text={} precision={} scale={}",
218+
got_name.map(|s| s.as_str()).unwrap_or("<missing>"),
219+
got_text.map(|s| s.as_str()).unwrap_or("<missing>"),
220+
got_prec.map(|s| s.as_str()).unwrap_or("<none>"),
221+
got_scale.map(|s| s.as_str()).unwrap_or("<none>"),
222+
);
223+
224+
if !ok {
225+
all_ok = false;
226+
if !name_ok {
227+
println!(
228+
" expected type_name={exp_type_name}, got {:?}",
229+
got_name
230+
);
231+
}
232+
if !text_ok {
233+
println!(
234+
" expected type_text={exp_type_text}, got {:?}",
235+
got_text
236+
);
237+
}
238+
if !prec_ok {
239+
println!(
240+
" expected precision={:?}, got {:?}",
241+
exp_precision, got_prec
242+
);
243+
}
244+
if !scale_ok {
245+
println!(
246+
" expected scale={:?}, got {:?}",
247+
exp_scale, got_scale
248+
);
249+
}
250+
}
251+
}
252+
253+
all_ok
254+
}

rust/src/client/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,18 +62,20 @@ pub struct SessionInfo {
6262
pub session_id: String,
6363
}
6464

65-
/// Result from `execute_statement`. Contains the statement ID (for cancellation/cleanup)
66-
/// and a reader over the result data.
65+
/// Result from `execute_statement`. Contains the statement ID (for cancellation/cleanup),
66+
/// a reader over the result data, and optionally the SEA manifest for metadata propagation.
6767
pub struct ExecuteResult {
6868
pub statement_id: String,
6969
pub reader: Box<dyn ResultReader + Send>,
70+
pub manifest: Option<ResultManifest>,
7071
}
7172

7273
impl std::fmt::Debug for ExecuteResult {
7374
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
7475
f.debug_struct("ExecuteResult")
7576
.field("statement_id", &self.statement_id)
7677
.field("reader", &"<dyn ResultReader>")
78+
.field("manifest", &self.manifest)
7779
.finish()
7880
}
7981
}

rust/src/client/sea.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,7 @@ impl DatabricksClient for SeaClient {
420420
Ok(ExecuteResult {
421421
statement_id: response.statement_id,
422422
reader,
423+
manifest: response.manifest,
423424
})
424425
}
425426

0 commit comments

Comments
 (0)