Skip to content

Commit 647c5e5

Browse files
committed
feat: parse vector<T, N> from Custom type class name (Cassandra 5.0)
Cassandra 5.0 sends vector columns as Custom (0x0000) with class name: org.apache.cassandra.db.marshal.VectorType( org.apache.cassandra.db.marshal.FloatType , 768) This patch detects VectorType in Custom class name strings and parses the element type and dimension count into a structured CVector variant. Changes in cassandra-protocol/src/frame/message_result.rs: - ColTypeOptionValue::CVector(element_type_name, dimensions) - parse_vector_class_name() extracts type + dims from class name - Custom type handler checks for VectorType before falling back to CString - Serialize CVector back to the Cassandra class name format - 7 new tests: parsing, compact format, non-vector passthrough, wire roundtrip for both vector and non-vector Custom types No new ColType variant needed — vectors stay as ColType::Custom on the wire, matching the Cassandra native protocol spec (which has no dedicated vector type ID).
1 parent df4b776 commit 647c5e5

File tree

2 files changed

+154
-4
lines changed

2 files changed

+154
-4
lines changed

cassandra-protocol/src/frame/message_result.rs

Lines changed: 141 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -648,9 +648,14 @@ impl FromCursor for ColTypeOption {
648648
fn from_cursor(cursor: &mut Cursor<&[u8]>, version: Version) -> error::Result<ColTypeOption> {
649649
let id = ColType::from_cursor(cursor, version)?;
650650
let value = match id {
651-
ColType::Custom => Some(ColTypeOptionValue::CString(
652-
from_cursor_str(cursor)?.to_string(),
653-
)),
651+
ColType::Custom => {
652+
let class_name = from_cursor_str(cursor)?.to_string();
653+
if let Some(vec_info) = parse_vector_class_name(&class_name) {
654+
Some(ColTypeOptionValue::CVector(vec_info.0, vec_info.1))
655+
} else {
656+
Some(ColTypeOptionValue::CString(class_name))
657+
}
658+
}
654659
ColType::Set => {
655660
let col_type = ColTypeOption::from_cursor(cursor, version)?;
656661
Some(ColTypeOptionValue::CSet(Box::new(col_type)))
@@ -691,6 +696,9 @@ pub enum ColTypeOptionValue {
691696
UdtType(CUdt),
692697
TupleType(CTuple),
693698
CMap(Box<ColTypeOption>, Box<ColTypeOption>),
699+
/// Vector type parsed from Custom class name: element type name + dimensions.
700+
/// e.g. VectorType(FloatType, 768) → CVector("FloatType", 768)
701+
CVector(String, u16),
694702
}
695703

696704
impl Serialize for ColTypeOptionValue {
@@ -707,10 +715,47 @@ impl Serialize for ColTypeOptionValue {
707715
v1.serialize(cursor, version);
708716
v2.serialize(cursor, version);
709717
}
718+
Self::CVector(elem_type, dimensions) => {
719+
// Serialize as Custom type class name string
720+
let class_name = format!(
721+
"org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.{} , {})",
722+
elem_type, dimensions
723+
);
724+
serialize_str(cursor, &class_name, version);
725+
}
710726
}
711727
}
712728
}
713729

730+
const VECTOR_TYPE_PREFIX: &str = "org.apache.cassandra.db.marshal.VectorType(";
731+
732+
/// Parse a Custom type class name to detect VectorType.
733+
///
734+
/// Cassandra 5.0 sends vector columns as Custom (0x0000) with class name:
735+
/// `org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType , 4)`
736+
///
737+
/// Returns Some((element_type_short_name, dimensions)) if it's a VectorType.
738+
fn parse_vector_class_name(class_name: &str) -> Option<(String, u16)> {
739+
let inner = class_name
740+
.strip_prefix(VECTOR_TYPE_PREFIX)?
741+
.strip_suffix(')')?;
742+
743+
// Split on last comma — format is "element_type , dimensions"
744+
let (elem_str, dim_str) = if let Some(pos) = inner.rfind(',') {
745+
(&inner[..pos], inner[pos + 1..].trim())
746+
} else {
747+
return None;
748+
};
749+
750+
let elem_str = elem_str.trim();
751+
752+
// Extract short type name from fully qualified class name
753+
let elem_short = elem_str.rsplit('.').next().unwrap_or(elem_str).to_string();
754+
755+
let dimensions: u16 = dim_str.parse().ok()?;
756+
Some((elem_short, dimensions))
757+
}
758+
714759
/// User defined type.
715760
#[derive(Debug, Clone, PartialEq, Ord, PartialOrd, Eq, Hash)]
716761
pub struct CUdt {
@@ -1617,3 +1662,96 @@ mod schema_change {
16171662
test_encode_decode(bytes, expected);
16181663
}
16191664
}
1665+
1666+
#[cfg(test)]
1667+
mod vector_type {
1668+
use super::*;
1669+
1670+
#[test]
1671+
fn parse_vector_float_4() {
1672+
let class_name =
1673+
"org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType , 4)";
1674+
let (elem, dims) = parse_vector_class_name(class_name).unwrap();
1675+
assert_eq!(elem, "FloatType");
1676+
assert_eq!(dims, 4);
1677+
}
1678+
1679+
#[test]
1680+
fn parse_vector_float_768() {
1681+
let class_name =
1682+
"org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType , 768)";
1683+
let (elem, dims) = parse_vector_class_name(class_name).unwrap();
1684+
assert_eq!(elem, "FloatType");
1685+
assert_eq!(dims, 768);
1686+
}
1687+
1688+
#[test]
1689+
fn parse_vector_double() {
1690+
let class_name =
1691+
"org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.DoubleType , 3)";
1692+
let (elem, dims) = parse_vector_class_name(class_name).unwrap();
1693+
assert_eq!(elem, "DoubleType");
1694+
assert_eq!(dims, 3);
1695+
}
1696+
1697+
#[test]
1698+
fn parse_vector_compact_format() {
1699+
let class_name =
1700+
"org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType,4)";
1701+
let (elem, dims) = parse_vector_class_name(class_name).unwrap();
1702+
assert_eq!(elem, "FloatType");
1703+
assert_eq!(dims, 4);
1704+
}
1705+
1706+
#[test]
1707+
fn parse_non_vector_returns_none() {
1708+
assert!(parse_vector_class_name("org.apache.cassandra.db.marshal.UTF8Type").is_none());
1709+
assert!(parse_vector_class_name("").is_none());
1710+
assert!(parse_vector_class_name("not a type").is_none());
1711+
}
1712+
1713+
#[test]
1714+
fn custom_type_with_vector_class_name_becomes_cvector() {
1715+
// Simulate what from_cursor would produce for Custom(VectorType(...))
1716+
let class_name = "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType , 4)";
1717+
1718+
// Build wire bytes: [0x0000 (Custom)][string class_name]
1719+
let mut buf = Vec::new();
1720+
buf.extend_from_slice(&0x0000u16.to_be_bytes()); // Custom type ID
1721+
buf.extend_from_slice(&(class_name.len() as u16).to_be_bytes());
1722+
buf.extend_from_slice(class_name.as_bytes());
1723+
1724+
let mut cursor = Cursor::new(buf.as_slice());
1725+
let option = ColTypeOption::from_cursor(&mut cursor, Version::V4).unwrap();
1726+
1727+
assert_eq!(option.id, ColType::Custom);
1728+
match option.value {
1729+
Some(ColTypeOptionValue::CVector(elem, dims)) => {
1730+
assert_eq!(elem, "FloatType");
1731+
assert_eq!(dims, 4);
1732+
}
1733+
other => panic!("expected CVector, got {:?}", other),
1734+
}
1735+
}
1736+
1737+
#[test]
1738+
fn custom_type_non_vector_stays_cstring() {
1739+
let class_name = "org.apache.cassandra.db.marshal.UTF8Type";
1740+
1741+
let mut buf = Vec::new();
1742+
buf.extend_from_slice(&0x0000u16.to_be_bytes());
1743+
buf.extend_from_slice(&(class_name.len() as u16).to_be_bytes());
1744+
buf.extend_from_slice(class_name.as_bytes());
1745+
1746+
let mut cursor = Cursor::new(buf.as_slice());
1747+
let option = ColTypeOption::from_cursor(&mut cursor, Version::V4).unwrap();
1748+
1749+
assert_eq!(option.id, ColType::Custom);
1750+
match option.value {
1751+
Some(ColTypeOptionValue::CString(s)) => {
1752+
assert_eq!(s, class_name);
1753+
}
1754+
other => panic!("expected CString, got {:?}", other),
1755+
}
1756+
}
1757+
}

cassandra-protocol/src/types/vector.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,21 @@ pub struct VectorInfo {
3131
}
3232

3333
pub fn get_vector_type_info(option_value: &ColTypeOptionValue) -> Result<VectorInfo> {
34+
// Handle structured CVector (parsed from Custom class name)
35+
if let ColTypeOptionValue::CVector(ref elem_type, dimensions) = option_value {
36+
return Ok(VectorInfo {
37+
internal_type: elem_type.clone(),
38+
count: *dimensions as usize,
39+
});
40+
}
41+
3442
let input = match option_value {
3543
ColTypeOptionValue::CString(ref s) => s,
36-
_ => return Err(Error::General("Option value must be a string!".into())),
44+
_ => {
45+
return Err(Error::General(
46+
"Option value must be a string or CVector!".into(),
47+
))
48+
}
3749
};
3850

3951
let _custom_type = input.split('(').next().unwrap().rsplit('.').next().unwrap();

0 commit comments

Comments
 (0)