Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 141 additions & 3 deletions cassandra-protocol/src/frame/message_result.rs
Original file line number Diff line number Diff line change
Expand Up @@ -648,9 +648,14 @@ impl FromCursor for ColTypeOption {
fn from_cursor(cursor: &mut Cursor<&[u8]>, version: Version) -> error::Result<ColTypeOption> {
let id = ColType::from_cursor(cursor, version)?;
let value = match id {
ColType::Custom => Some(ColTypeOptionValue::CString(
from_cursor_str(cursor)?.to_string(),
)),
ColType::Custom => {
let class_name = from_cursor_str(cursor)?.to_string();
if let Some(vec_info) = parse_vector_class_name(&class_name) {
Some(ColTypeOptionValue::CVector(vec_info.0, vec_info.1))
} else {
Some(ColTypeOptionValue::CString(class_name))
}
}
ColType::Set => {
let col_type = ColTypeOption::from_cursor(cursor, version)?;
Some(ColTypeOptionValue::CSet(Box::new(col_type)))
Expand Down Expand Up @@ -691,6 +696,9 @@ pub enum ColTypeOptionValue {
UdtType(CUdt),
TupleType(CTuple),
CMap(Box<ColTypeOption>, Box<ColTypeOption>),
/// Vector type parsed from Custom class name: element type name + dimensions.
/// e.g. VectorType(FloatType, 768) → CVector("FloatType", 768)
CVector(String, u16),
}

impl Serialize for ColTypeOptionValue {
Expand All @@ -707,10 +715,47 @@ impl Serialize for ColTypeOptionValue {
v1.serialize(cursor, version);
v2.serialize(cursor, version);
}
Self::CVector(elem_type, dimensions) => {
// Serialize as Custom type class name string
let class_name = format!(
"org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.{} , {})",
elem_type, dimensions
);
serialize_str(cursor, &class_name, version);
}
}
}
}

const VECTOR_TYPE_PREFIX: &str = "org.apache.cassandra.db.marshal.VectorType(";

/// Parse a Custom type class name to detect VectorType.
///
/// Cassandra 5.0 sends vector columns as Custom (0x0000) with class name:
/// `org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType , 4)`
///
/// Returns Some((element_type_short_name, dimensions)) if it's a VectorType.
fn parse_vector_class_name(class_name: &str) -> Option<(String, u16)> {
let inner = class_name
.strip_prefix(VECTOR_TYPE_PREFIX)?
.strip_suffix(')')?;

// Split on last comma — format is "element_type , dimensions"
let (elem_str, dim_str) = if let Some(pos) = inner.rfind(',') {
(&inner[..pos], inner[pos + 1..].trim())
} else {
return None;
};

let elem_str = elem_str.trim();

// Extract short type name from fully qualified class name
let elem_short = elem_str.rsplit('.').next().unwrap_or(elem_str).to_string();

let dimensions: u16 = dim_str.parse().ok()?;
Some((elem_short, dimensions))
}

/// User defined type.
#[derive(Debug, Clone, PartialEq, Ord, PartialOrd, Eq, Hash)]
pub struct CUdt {
Expand Down Expand Up @@ -1617,3 +1662,96 @@ mod schema_change {
test_encode_decode(bytes, expected);
}
}

#[cfg(test)]
mod vector_type {
use super::*;

#[test]
fn parse_vector_float_4() {
let class_name =
"org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType , 4)";
let (elem, dims) = parse_vector_class_name(class_name).unwrap();
assert_eq!(elem, "FloatType");
assert_eq!(dims, 4);
}

#[test]
fn parse_vector_float_768() {
let class_name =
"org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType , 768)";
let (elem, dims) = parse_vector_class_name(class_name).unwrap();
assert_eq!(elem, "FloatType");
assert_eq!(dims, 768);
}

#[test]
fn parse_vector_double() {
let class_name =
"org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.DoubleType , 3)";
let (elem, dims) = parse_vector_class_name(class_name).unwrap();
assert_eq!(elem, "DoubleType");
assert_eq!(dims, 3);
}

#[test]
fn parse_vector_compact_format() {
let class_name =
"org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType,4)";
let (elem, dims) = parse_vector_class_name(class_name).unwrap();
assert_eq!(elem, "FloatType");
assert_eq!(dims, 4);
}

#[test]
fn parse_non_vector_returns_none() {
assert!(parse_vector_class_name("org.apache.cassandra.db.marshal.UTF8Type").is_none());
assert!(parse_vector_class_name("").is_none());
assert!(parse_vector_class_name("not a type").is_none());
}

#[test]
fn custom_type_with_vector_class_name_becomes_cvector() {
// Simulate what from_cursor would produce for Custom(VectorType(...))
let class_name = "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType , 4)";

// Build wire bytes: [0x0000 (Custom)][string class_name]
let mut buf = Vec::new();
buf.extend_from_slice(&0x0000u16.to_be_bytes()); // Custom type ID
buf.extend_from_slice(&(class_name.len() as u16).to_be_bytes());
buf.extend_from_slice(class_name.as_bytes());

let mut cursor = Cursor::new(buf.as_slice());
let option = ColTypeOption::from_cursor(&mut cursor, Version::V4).unwrap();

assert_eq!(option.id, ColType::Custom);
match option.value {
Some(ColTypeOptionValue::CVector(elem, dims)) => {
assert_eq!(elem, "FloatType");
assert_eq!(dims, 4);
}
other => panic!("expected CVector, got {:?}", other),
}
}

#[test]
fn custom_type_non_vector_stays_cstring() {
let class_name = "org.apache.cassandra.db.marshal.UTF8Type";

let mut buf = Vec::new();
buf.extend_from_slice(&0x0000u16.to_be_bytes());
buf.extend_from_slice(&(class_name.len() as u16).to_be_bytes());
buf.extend_from_slice(class_name.as_bytes());

let mut cursor = Cursor::new(buf.as_slice());
let option = ColTypeOption::from_cursor(&mut cursor, Version::V4).unwrap();

assert_eq!(option.id, ColType::Custom);
match option.value {
Some(ColTypeOptionValue::CString(s)) => {
assert_eq!(s, class_name);
}
other => panic!("expected CString, got {:?}", other),
}
}
}
4 changes: 4 additions & 0 deletions cassandra-protocol/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,10 @@ macro_rules! as_rust_type {
return as_res_opt!($data_value, decode_blob);
}
}
// Vector columns (Custom with CVector) contain raw bytes
if let Some(crate::frame::message_result::ColTypeOptionValue::CVector(..)) = &$data_type_option.value {
return as_res_opt!($data_value, decode_blob);
}

Err(crate::error::Error::General(format!(
"Invalid conversion. \
Expand Down
14 changes: 13 additions & 1 deletion cassandra-protocol/src/types/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,21 @@ pub struct VectorInfo {
}

pub fn get_vector_type_info(option_value: &ColTypeOptionValue) -> Result<VectorInfo> {
// Handle structured CVector (parsed from Custom class name)
if let ColTypeOptionValue::CVector(ref elem_type, dimensions) = option_value {
return Ok(VectorInfo {
internal_type: elem_type.clone(),
count: *dimensions as usize,
});
}

let input = match option_value {
ColTypeOptionValue::CString(ref s) => s,
_ => return Err(Error::General("Option value must be a string!".into())),
_ => {
return Err(Error::General(
"Option value must be a string or CVector!".into(),
))
}
};

let _custom_type = input.split('(').next().unwrap().rsplit('.').next().unwrap();
Expand Down
Loading