diff --git a/build.rs b/build.rs index 083898c..a5616d1 100644 --- a/build.rs +++ b/build.rs @@ -69,6 +69,8 @@ fn main() -> Result<(), Box> { // Blocklist raw deparse functions that use types from bindings_raw .blocklist_function("pg_query_deparse_raw") .blocklist_function("pg_query_deparse_raw_opts") + // Blocklist raw fingerprint function that uses types from bindings_raw + .blocklist_function("pg_query_fingerprint_raw") .generate() .map_err(|_| "Unable to generate bindings")? .write_to_file(out_dir.join("bindings.rs"))?; @@ -433,6 +435,15 @@ fn main() -> Result<(), Box> { .allowlist_function("pg_query_list_make1") .allowlist_function("pg_query_list_append") .allowlist_function("pg_query_deparse_nodes") + // Raw scan functions (bypasses protobuf) + .allowlist_type("PgQueryRawScanToken") + .allowlist_type("PgQueryRawScanResult") + .allowlist_function("pg_query_scan_raw") + .allowlist_function("pg_query_free_raw_scan_result") + // Raw fingerprint (works with raw parse result) + .allowlist_type("PgQueryFingerprintResult") + .allowlist_function("pg_query_fingerprint_raw") + .allowlist_function("pg_query_free_fingerprint_result") .generate() .map_err(|_| "Unable to generate raw bindings")? .write_to_file(out_dir.join("bindings_raw.rs"))?; diff --git a/libpg_query b/libpg_query index db02663..3393939 160000 --- a/libpg_query +++ b/libpg_query @@ -1 +1 @@ -Subproject commit db02663b0be81a8499ee8c0cc87081effb13d54e +Subproject commit 3393939ab11d1eec2d240baaf812a4295c0c5ade diff --git a/src/lib.rs b/src/lib.rs index 135f914..3c23ff8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -50,7 +50,9 @@ mod parse_result; pub mod protobuf; mod query; mod raw_deparse; +mod raw_fingerprint; mod raw_parse; +mod raw_scan; mod summary; mod summary_result; mod truncate; @@ -62,7 +64,9 @@ pub use node_ref::*; pub use parse_result::*; pub use query::*; pub use raw_deparse::deparse_raw; +pub use raw_fingerprint::fingerprint_raw; pub use raw_parse::parse_raw; +pub use raw_scan::scan_raw; pub use summary::*; pub use summary_result::*; pub use truncate::*; diff --git a/src/node_enum.rs b/src/node_enum.rs index c4f39b2..96efe32 100644 --- a/src/node_enum.rs +++ b/src/node_enum.rs @@ -19,6 +19,13 @@ impl NodeEnum { }) } + pub fn deparse_raw(&self) -> Result { + crate::deparse_raw(&protobuf::ParseResult { + version: crate::bindings::PG_VERSION_NUM as i32, + stmts: vec![protobuf::RawStmt { stmt: Some(Box::new(Node { node: Some(self.clone()) })), stmt_location: 0, stmt_len: 0 }], + }) + } + pub fn nodes(&self) -> Vec<(NodeRef<'_>, i32, Context, bool)> { let mut iter = vec![(self.to_ref(), 0, Context::None, false)]; let mut nodes = Vec::new(); diff --git a/src/node_mut.rs b/src/node_mut.rs index 615cebb..c293abb 100644 --- a/src/node_mut.rs +++ b/src/node_mut.rs @@ -280,6 +280,13 @@ impl NodeMut { }) } + pub fn deparse_raw(&self) -> Result { + crate::deparse_raw(&protobuf::ParseResult { + version: crate::bindings::PG_VERSION_NUM as i32, + stmts: vec![protobuf::RawStmt { stmt: Some(Box::new(Node { node: Some(self.to_enum()?) })), stmt_location: 0, stmt_len: 0 }], + }) + } + pub fn to_enum(&self) -> Result { unsafe { let err = Error::InvalidPointer; diff --git a/src/node_structs.rs b/src/node_structs.rs index 49e06b6..1d6f8a4 100644 --- a/src/node_structs.rs +++ b/src/node_structs.rs @@ -7,6 +7,13 @@ impl Node { stmts: vec![protobuf::RawStmt { stmt: Some(Box::new(self.clone())), stmt_location: 0, stmt_len: 0 }], }) } + + pub fn deparse_raw(&self) -> Result { + crate::deparse_raw(&protobuf::ParseResult { + version: crate::bindings::PG_VERSION_NUM as i32, + stmts: vec![protobuf::RawStmt { stmt: Some(Box::new(self.clone())), stmt_location: 0, stmt_len: 0 }], + }) + } } impl protobuf::Alias { diff --git a/src/raw_fingerprint.rs b/src/raw_fingerprint.rs new file mode 100644 index 0000000..eb43213 --- /dev/null +++ b/src/raw_fingerprint.rs @@ -0,0 +1,83 @@ +//! Direct fingerprinting that bypasses protobuf serialization/deserialization. +//! +//! This module provides a faster alternative to the standard fingerprint function by +//! parsing directly into PostgreSQL's internal structures and fingerprinting them +//! without going through protobuf serialization. + +use crate::bindings_raw; +use crate::query::Fingerprint; +use crate::{Error, Result}; +use std::ffi::{CStr, CString}; + +/// Fingerprints a SQL statement without going through protobuf serialization. +/// +/// This function is faster than `fingerprint` because it skips the protobuf encode/decode step. +/// The SQL is parsed directly into PostgreSQL's internal structures and fingerprinted there. +/// +/// # Example +/// +/// ```rust +/// let result = pg_query::fingerprint_raw("SELECT * FROM contacts WHERE name='Paul'").unwrap(); +/// assert_eq!(result.hex, "0e2581a461ece536"); +/// ``` +pub fn fingerprint_raw(statement: &str) -> Result { + let input = CString::new(statement)?; + + // Parse the SQL into raw C structures + let parse_result = unsafe { bindings_raw::pg_query_parse_raw(input.as_ptr()) }; + + // Fingerprint the raw parse tree + let fingerprint_result = unsafe { bindings_raw::pg_query_fingerprint_raw(parse_result) }; + + // Free the parse result (the fingerprint result has its own copies of any needed data) + unsafe { bindings_raw::pg_query_free_raw_parse_result(parse_result) }; + + // Convert the fingerprint result to Rust types + let result = if !fingerprint_result.error.is_null() { + let message = unsafe { CStr::from_ptr((*fingerprint_result.error).message) }.to_string_lossy().to_string(); + Err(Error::Parse(message)) + } else { + let hex = unsafe { CStr::from_ptr(fingerprint_result.fingerprint_str) }; + Ok(Fingerprint { value: fingerprint_result.fingerprint, hex: hex.to_string_lossy().to_string() }) + }; + + unsafe { bindings_raw::pg_query_free_fingerprint_result(fingerprint_result) }; + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_fingerprint_raw_basic() { + let result = fingerprint_raw("SELECT * FROM users").unwrap(); + assert!(!result.hex.is_empty()); + assert_eq!(result.hex.len(), 16); + } + + #[test] + fn test_fingerprint_raw_matches_fingerprint() { + let sql = "SELECT * FROM contacts WHERE name='Paul'"; + let raw_result = fingerprint_raw(sql).unwrap(); + let std_result = crate::fingerprint(sql).unwrap(); + + assert_eq!(raw_result.value, std_result.value); + assert_eq!(raw_result.hex, std_result.hex); + } + + #[test] + fn test_fingerprint_raw_normalizes_values() { + // These should have the same fingerprint since values are normalized + let fp1 = fingerprint_raw("SELECT * FROM users WHERE id = 1").unwrap(); + let fp2 = fingerprint_raw("SELECT * FROM users WHERE id = 999").unwrap(); + assert_eq!(fp1.value, fp2.value); + assert_eq!(fp1.hex, fp2.hex); + } + + #[test] + fn test_fingerprint_raw_error() { + let result = fingerprint_raw("NOT VALID SQL @#$"); + assert!(result.is_err()); + } +} diff --git a/src/raw_scan.rs b/src/raw_scan.rs new file mode 100644 index 0000000..20ea795 --- /dev/null +++ b/src/raw_scan.rs @@ -0,0 +1,98 @@ +//! Direct scanning that bypasses protobuf serialization/deserialization. +//! +//! This module provides a faster alternative to the protobuf-based scanning by +//! directly reading the scanner's token output and converting it to Rust protobuf types. + +use crate::bindings; +use crate::bindings_raw; +use crate::protobuf; +use crate::{Error, Result}; +use std::ffi::{CStr, CString}; + +/// Scans a SQL statement directly into protobuf types without going through protobuf serialization. +/// +/// This function is faster than `scan` because it skips the protobuf encode/decode step. +/// The tokens are read directly from the C scanner output. +/// +/// # Example +/// +/// ```rust +/// let result = pg_query::scan_raw("SELECT * FROM users").unwrap(); +/// assert!(!result.tokens.is_empty()); +/// ``` +pub fn scan_raw(sql: &str) -> Result { + let input = CString::new(sql)?; + let result = unsafe { bindings_raw::pg_query_scan_raw(input.as_ptr()) }; + + let scan_result = if !result.error.is_null() { + let message = unsafe { CStr::from_ptr((*result.error).message) }.to_string_lossy().to_string(); + Err(Error::Scan(message)) + } else { + // Convert the C tokens to protobuf types + let tokens = unsafe { convert_tokens(result.tokens, result.n_tokens) }; + Ok(protobuf::ScanResult { version: bindings::PG_VERSION_NUM as i32, tokens }) + }; + + unsafe { bindings_raw::pg_query_free_raw_scan_result(result) }; + scan_result +} + +/// Converts C scan tokens to protobuf ScanToken vector. +unsafe fn convert_tokens(tokens: *mut bindings_raw::PgQueryRawScanToken, n_tokens: usize) -> Vec { + if tokens.is_null() || n_tokens == 0 { + return Vec::new(); + } + + let mut result = Vec::with_capacity(n_tokens); + + for i in 0..n_tokens { + let token = &*tokens.add(i); + result.push(protobuf::ScanToken { start: token.start, end: token.end, token: token.token, keyword_kind: token.keyword_kind }); + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_scan_raw_basic() { + let result = scan_raw("SELECT * FROM users").unwrap(); + assert!(!result.tokens.is_empty()); + // First token should be SELECT + assert_eq!(result.tokens[0].start, 0); + assert_eq!(result.tokens[0].end, 6); + } + + #[test] + fn test_scan_raw_matches_scan() { + let sql = "SELECT id, name FROM users WHERE active = true"; + let raw_result = scan_raw(sql).unwrap(); + let prost_result = crate::scan(sql).unwrap(); + + assert_eq!(raw_result.version, prost_result.version); + assert_eq!(raw_result.tokens.len(), prost_result.tokens.len()); + + for (raw_token, prost_token) in raw_result.tokens.iter().zip(prost_result.tokens.iter()) { + assert_eq!(raw_token.start, prost_token.start); + assert_eq!(raw_token.end, prost_token.end); + assert_eq!(raw_token.token, prost_token.token); + assert_eq!(raw_token.keyword_kind, prost_token.keyword_kind); + } + } + + #[test] + fn test_scan_raw_empty() { + let result = scan_raw("").unwrap(); + assert!(result.tokens.is_empty()); + } + + #[test] + fn test_scan_raw_complex() { + let sql = r#"SELECT "column" AS left /* comment */ FROM between"#; + let result = scan_raw(sql).unwrap(); + assert!(!result.tokens.is_empty()); + } +}