From 1be459666daea6d507ed3cbaf21c6872d23794e4 Mon Sep 17 00:00:00 2001 From: Shaurya Dwivedi Date: Wed, 1 Oct 2025 14:42:34 -0700 Subject: [PATCH 1/4] Move typed null handling from ion_type() to read_null() methods --- src/lazy/binary/raw/v1_1/value.rs | 8 +++----- src/lazy/binary/raw/value.rs | 7 +++++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 18d1d07c..9e00990e 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -429,13 +429,11 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { } fn read_null(&self) -> IonResult { - let ion_type = if self.encoded_value.header.type_code() == OpcodeType::TypedNull { - let body = self.value_body(); - ION_1_1_TYPED_NULL_TYPES[body[0] as usize] + Ok(if self.encoded_value.header.type_code() == OpcodeType::TypedNull { + ION_1_1_TYPED_NULL_TYPES[self.value_body()[0] as usize] } else { self.encoded_value.ion_type() - }; - Ok(ion_type) + }) } pub fn is_delimited(&self) -> bool { diff --git a/src/lazy/binary/raw/value.rs b/src/lazy/binary/raw/value.rs index 8c1d2f18..e5af2f8f 100644 --- a/src/lazy/binary/raw/value.rs +++ b/src/lazy/binary/raw/value.rs @@ -501,10 +501,13 @@ impl<'top> LazyRawBinaryValue_1_0<'top> { /// calling this method will not read additional data; the `RawValueRef` will provide a /// [`LazyRawBinaryList_1_0`], [`LazyRawBinarySExp_1_0`], or [`LazyRawBinaryStruct_1_0`] /// that can be traversed to access the container's contents. + fn read_null(&self) -> IonResult { + Ok(self.encoded_value.ion_type()) + } + pub fn read(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { if self.is_null() { - let raw_value_ref = RawValueRef::Null(self.ion_type()); - return Ok(raw_value_ref); + return Ok(RawValueRef::Null(self.read_null()?)); } match self.ion_type() { From de99793c4fb3b6d1ad9943dffbe816a6095377ae Mon Sep 17 00:00:00 2001 From: Shaurya Dwivedi Date: Wed, 1 Oct 2025 16:22:02 -0700 Subject: [PATCH 2/4] Removed read_null --- src/lazy/binary/raw/v1_1/value.rs | 8 +++++--- src/lazy/binary/raw/value.rs | 7 ++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 9e00990e..18d1d07c 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -429,11 +429,13 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { } fn read_null(&self) -> IonResult { - Ok(if self.encoded_value.header.type_code() == OpcodeType::TypedNull { - ION_1_1_TYPED_NULL_TYPES[self.value_body()[0] as usize] + let ion_type = if self.encoded_value.header.type_code() == OpcodeType::TypedNull { + let body = self.value_body(); + ION_1_1_TYPED_NULL_TYPES[body[0] as usize] } else { self.encoded_value.ion_type() - }) + }; + Ok(ion_type) } pub fn is_delimited(&self) -> bool { diff --git a/src/lazy/binary/raw/value.rs b/src/lazy/binary/raw/value.rs index e5af2f8f..8c1d2f18 100644 --- a/src/lazy/binary/raw/value.rs +++ b/src/lazy/binary/raw/value.rs @@ -501,13 +501,10 @@ impl<'top> LazyRawBinaryValue_1_0<'top> { /// calling this method will not read additional data; the `RawValueRef` will provide a /// [`LazyRawBinaryList_1_0`], [`LazyRawBinarySExp_1_0`], or [`LazyRawBinaryStruct_1_0`] /// that can be traversed to access the container's contents. - fn read_null(&self) -> IonResult { - Ok(self.encoded_value.ion_type()) - } - pub fn read(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { if self.is_null() { - return Ok(RawValueRef::Null(self.read_null()?)); + let raw_value_ref = RawValueRef::Null(self.ion_type()); + return Ok(raw_value_ref); } match self.ion_type() { From 26e7eccf9661b3c9eb5f019ac9da8b0f763b7876 Mon Sep 17 00:00:00 2001 From: Shaurya Dwivedi Date: Wed, 1 Oct 2025 16:23:05 -0700 Subject: [PATCH 3/4] Fix ion_type() to correctly return typed null types in Ion 1.1 binary --- src/lazy/binary/raw/v1_1/value.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 18d1d07c..02d8dd77 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -141,7 +141,12 @@ impl<'top> HasRange for &'top LazyRawBinaryValue_1_1<'top> { impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1_1<'top> { fn ion_type(&self) -> IonType { - self.encoded_value.ion_type() + if self.encoded_value.header.type_code() == OpcodeType::TypedNull { + let body = self.value_body(); + ION_1_1_TYPED_NULL_TYPES[body[0] as usize] + } else { + self.encoded_value.ion_type() + } } fn is_null(&self) -> bool { From 49f160e04f29159a8af9453fa64fd20b319c1cee Mon Sep 17 00:00:00 2001 From: Shaurya Dwivedi Date: Tue, 7 Oct 2025 11:17:53 -0700 Subject: [PATCH 4/4] Fix encoding detection for incomplete binary IVM in streaming readers --- src/lazy/any_encoding.rs | 58 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index 50502bb2..d7fa5bca 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -459,6 +459,11 @@ impl LazyRawAnyReader<'_> { match *data { [0xE0, 0x01, 0x00, 0xEA, ..] => IonEncoding::Binary_1_0, [0xE0, 0x01, 0x01, 0xEA, ..] => IonEncoding::Binary_1_1, + + // We should try binary first since it can handle incomplete data better when we have incomplete data + [0xE0, 0x01, 0x00] | [0xE0, 0x01, 0x01] => IonEncoding::Binary_1_0, + [0xE0, 0x01] => IonEncoding::Binary_1_0, + [0xE0] => IonEncoding::Binary_1_0, _ => IonEncoding::Text_1_0, } } @@ -1992,4 +1997,57 @@ mod tests { Ok(()) } + + #[test] + fn test_detect_encoding_from_stream() { + use std::io::{self, Cursor, Read}; + use crate::{Reader, AnyEncoding}; + + let data = [ + 0xE0u8, 0x01, 0x00, 0xEA, // IVM + 0x83, 65, 66, 67, // String: "ABC" + ]; + + let mut input: Box = Box::new(io::empty()); + for input_byte in data { + input = Box::new(input.chain(Cursor::new([input_byte]))); + } + let _values: Vec<_> = Reader::new(AnyEncoding, input) + .expect("a reader") + .collect::>() + .expect("values should be read successfully"); + } + + #[test] + fn test_detect_encoding_incomplete_patterns() { + // Test that incomplete binary IVM patterns are handled correctly + let test_cases = vec![ + vec![0xE0], + vec![0xE0, 0x01], + vec![0xE0, 0x01, 0x00], + vec![0xE0, 0x01, 0x01], + ]; + + for incomplete_data in test_cases { + let encoding = LazyRawAnyReader::detect_encoding(&incomplete_data); + assert_eq!(encoding, IonEncoding::Binary_1_0, + "Failed for data: {:?}", incomplete_data); + } + } + + #[test] + fn test_detect_encoding_complete_patterns() { + let test_cases = vec![ + (vec![0xE0, 0x01, 0x00, 0xEA], IonEncoding::Binary_1_0), + (vec![0xE0, 0x01, 0x01, 0xEA], IonEncoding::Binary_1_1), + (vec![0xE0, 0x01, 0x00, 0xEA, 0x21, 0x01], IonEncoding::Binary_1_0), // with extra data + (vec![0xE0, 0x01, 0x01, 0xEA, 0x21, 0x01], IonEncoding::Binary_1_1), // with extra data + ]; + + for (data, expected_encoding) in test_cases { + let encoding = LazyRawAnyReader::detect_encoding(&data); + assert_eq!(encoding, expected_encoding, + "Failed for data: {:?}", data); + } + } }