Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// Blocklist raw deparse functions that use types from bindings_raw
.blocklist_function("pg_query_deparse_raw")
.blocklist_function("pg_query_deparse_raw_opts")
// Blocklist raw fingerprint function that uses types from bindings_raw
.blocklist_function("pg_query_fingerprint_raw")
.generate()
.map_err(|_| "Unable to generate bindings")?
.write_to_file(out_dir.join("bindings.rs"))?;
Expand Down Expand Up @@ -433,6 +435,15 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.allowlist_function("pg_query_list_make1")
.allowlist_function("pg_query_list_append")
.allowlist_function("pg_query_deparse_nodes")
// Raw scan functions (bypasses protobuf)
.allowlist_type("PgQueryRawScanToken")
.allowlist_type("PgQueryRawScanResult")
.allowlist_function("pg_query_scan_raw")
.allowlist_function("pg_query_free_raw_scan_result")
// Raw fingerprint (works with raw parse result)
.allowlist_type("PgQueryFingerprintResult")
.allowlist_function("pg_query_fingerprint_raw")
.allowlist_function("pg_query_free_fingerprint_result")
.generate()
.map_err(|_| "Unable to generate raw bindings")?
.write_to_file(out_dir.join("bindings_raw.rs"))?;
Expand Down
2 changes: 1 addition & 1 deletion libpg_query
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ mod parse_result;
pub mod protobuf;
mod query;
mod raw_deparse;
mod raw_fingerprint;
mod raw_parse;
mod raw_scan;
mod summary;
mod summary_result;
mod truncate;
Expand All @@ -62,7 +64,9 @@ pub use node_ref::*;
pub use parse_result::*;
pub use query::*;
pub use raw_deparse::deparse_raw;
pub use raw_fingerprint::fingerprint_raw;
pub use raw_parse::parse_raw;
pub use raw_scan::scan_raw;
pub use summary::*;
pub use summary_result::*;
pub use truncate::*;
Expand Down
7 changes: 7 additions & 0 deletions src/node_enum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ impl NodeEnum {
})
}

pub fn deparse_raw(&self) -> Result<String> {
crate::deparse_raw(&protobuf::ParseResult {
version: crate::bindings::PG_VERSION_NUM as i32,
stmts: vec![protobuf::RawStmt { stmt: Some(Box::new(Node { node: Some(self.clone()) })), stmt_location: 0, stmt_len: 0 }],
})
}

pub fn nodes(&self) -> Vec<(NodeRef<'_>, i32, Context, bool)> {
let mut iter = vec![(self.to_ref(), 0, Context::None, false)];
let mut nodes = Vec::new();
Expand Down
7 changes: 7 additions & 0 deletions src/node_mut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,13 @@ impl NodeMut {
})
}

pub fn deparse_raw(&self) -> Result<String> {
crate::deparse_raw(&protobuf::ParseResult {
version: crate::bindings::PG_VERSION_NUM as i32,
stmts: vec![protobuf::RawStmt { stmt: Some(Box::new(Node { node: Some(self.to_enum()?) })), stmt_location: 0, stmt_len: 0 }],
})
}

pub fn to_enum(&self) -> Result<NodeEnum> {
unsafe {
let err = Error::InvalidPointer;
Expand Down
7 changes: 7 additions & 0 deletions src/node_structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ impl Node {
stmts: vec![protobuf::RawStmt { stmt: Some(Box::new(self.clone())), stmt_location: 0, stmt_len: 0 }],
})
}

pub fn deparse_raw(&self) -> Result<String> {
crate::deparse_raw(&protobuf::ParseResult {
version: crate::bindings::PG_VERSION_NUM as i32,
stmts: vec![protobuf::RawStmt { stmt: Some(Box::new(self.clone())), stmt_location: 0, stmt_len: 0 }],
})
}
}

impl protobuf::Alias {
Expand Down
83 changes: 83 additions & 0 deletions src/raw_fingerprint.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
//! Direct fingerprinting that bypasses protobuf serialization/deserialization.
//!
//! This module provides a faster alternative to the standard fingerprint function by
//! parsing directly into PostgreSQL's internal structures and fingerprinting them
//! without going through protobuf serialization.

use crate::bindings_raw;
use crate::query::Fingerprint;
use crate::{Error, Result};
use std::ffi::{CStr, CString};

/// Fingerprints a SQL statement without going through protobuf serialization.
///
/// This function is faster than `fingerprint` because it skips the protobuf encode/decode step.
/// The SQL is parsed directly into PostgreSQL's internal structures and fingerprinted there.
///
/// # Example
///
/// ```rust
/// let result = pg_query::fingerprint_raw("SELECT * FROM contacts WHERE name='Paul'").unwrap();
/// assert_eq!(result.hex, "0e2581a461ece536");
/// ```
pub fn fingerprint_raw(statement: &str) -> Result<Fingerprint> {
let input = CString::new(statement)?;

// Parse the SQL into raw C structures
let parse_result = unsafe { bindings_raw::pg_query_parse_raw(input.as_ptr()) };

// Fingerprint the raw parse tree
let fingerprint_result = unsafe { bindings_raw::pg_query_fingerprint_raw(parse_result) };

// Free the parse result (the fingerprint result has its own copies of any needed data)
unsafe { bindings_raw::pg_query_free_raw_parse_result(parse_result) };

// Convert the fingerprint result to Rust types
let result = if !fingerprint_result.error.is_null() {
let message = unsafe { CStr::from_ptr((*fingerprint_result.error).message) }.to_string_lossy().to_string();
Err(Error::Parse(message))
} else {
let hex = unsafe { CStr::from_ptr(fingerprint_result.fingerprint_str) };
Ok(Fingerprint { value: fingerprint_result.fingerprint, hex: hex.to_string_lossy().to_string() })
};

unsafe { bindings_raw::pg_query_free_fingerprint_result(fingerprint_result) };
result
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_fingerprint_raw_basic() {
let result = fingerprint_raw("SELECT * FROM users").unwrap();
assert!(!result.hex.is_empty());
assert_eq!(result.hex.len(), 16);
}

#[test]
fn test_fingerprint_raw_matches_fingerprint() {
let sql = "SELECT * FROM contacts WHERE name='Paul'";
let raw_result = fingerprint_raw(sql).unwrap();
let std_result = crate::fingerprint(sql).unwrap();

assert_eq!(raw_result.value, std_result.value);
assert_eq!(raw_result.hex, std_result.hex);
}

#[test]
fn test_fingerprint_raw_normalizes_values() {
// These should have the same fingerprint since values are normalized
let fp1 = fingerprint_raw("SELECT * FROM users WHERE id = 1").unwrap();
let fp2 = fingerprint_raw("SELECT * FROM users WHERE id = 999").unwrap();
assert_eq!(fp1.value, fp2.value);
assert_eq!(fp1.hex, fp2.hex);
}

#[test]
fn test_fingerprint_raw_error() {
let result = fingerprint_raw("NOT VALID SQL @#$");
assert!(result.is_err());
}
}
98 changes: 98 additions & 0 deletions src/raw_scan.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
//! Direct scanning that bypasses protobuf serialization/deserialization.
//!
//! This module provides a faster alternative to the protobuf-based scanning by
//! directly reading the scanner's token output and converting it to Rust protobuf types.

use crate::bindings;
use crate::bindings_raw;
use crate::protobuf;
use crate::{Error, Result};
use std::ffi::{CStr, CString};

/// Scans a SQL statement directly into protobuf types without going through protobuf serialization.
///
/// This function is faster than `scan` because it skips the protobuf encode/decode step.
/// The tokens are read directly from the C scanner output.
///
/// # Example
///
/// ```rust
/// let result = pg_query::scan_raw("SELECT * FROM users").unwrap();
/// assert!(!result.tokens.is_empty());
/// ```
pub fn scan_raw(sql: &str) -> Result<protobuf::ScanResult> {
let input = CString::new(sql)?;
let result = unsafe { bindings_raw::pg_query_scan_raw(input.as_ptr()) };

let scan_result = if !result.error.is_null() {
let message = unsafe { CStr::from_ptr((*result.error).message) }.to_string_lossy().to_string();
Err(Error::Scan(message))
} else {
// Convert the C tokens to protobuf types
let tokens = unsafe { convert_tokens(result.tokens, result.n_tokens) };
Ok(protobuf::ScanResult { version: bindings::PG_VERSION_NUM as i32, tokens })
};

unsafe { bindings_raw::pg_query_free_raw_scan_result(result) };
scan_result
}

/// Converts C scan tokens to protobuf ScanToken vector.
unsafe fn convert_tokens(tokens: *mut bindings_raw::PgQueryRawScanToken, n_tokens: usize) -> Vec<protobuf::ScanToken> {
if tokens.is_null() || n_tokens == 0 {
return Vec::new();
}

let mut result = Vec::with_capacity(n_tokens);

for i in 0..n_tokens {
let token = &*tokens.add(i);
result.push(protobuf::ScanToken { start: token.start, end: token.end, token: token.token, keyword_kind: token.keyword_kind });
}

result
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_scan_raw_basic() {
let result = scan_raw("SELECT * FROM users").unwrap();
assert!(!result.tokens.is_empty());
// First token should be SELECT
assert_eq!(result.tokens[0].start, 0);
assert_eq!(result.tokens[0].end, 6);
}

#[test]
fn test_scan_raw_matches_scan() {
let sql = "SELECT id, name FROM users WHERE active = true";
let raw_result = scan_raw(sql).unwrap();
let prost_result = crate::scan(sql).unwrap();

assert_eq!(raw_result.version, prost_result.version);
assert_eq!(raw_result.tokens.len(), prost_result.tokens.len());

for (raw_token, prost_token) in raw_result.tokens.iter().zip(prost_result.tokens.iter()) {
assert_eq!(raw_token.start, prost_token.start);
assert_eq!(raw_token.end, prost_token.end);
assert_eq!(raw_token.token, prost_token.token);
assert_eq!(raw_token.keyword_kind, prost_token.keyword_kind);
}
}

#[test]
fn test_scan_raw_empty() {
let result = scan_raw("").unwrap();
assert!(result.tokens.is_empty());
}

#[test]
fn test_scan_raw_complex() {
let sql = r#"SELECT "column" AS left /* comment */ FROM between"#;
let result = scan_raw(sql).unwrap();
assert!(!result.tokens.is_empty());
}
}