From 0eb89a3f787227dce8fba146402d7b523085d770 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Mon, 8 Sep 2025 14:48:30 -0700 Subject: [PATCH 1/3] Implement tagless ints --- src/lazy/binary/raw/v1_1/binary_buffer.rs | 21 +++++++++++++++- src/lazy/binary/raw/v1_1/e_expression.rs | 19 +++++++++++++++ src/lazy/binary/raw/v1_1/value.rs | 6 ++++- src/lazy/encoder/binary/v1_1/fixed_int.rs | 25 +++++++++++++++++++- src/lazy/encoder/binary/v1_1/value_writer.rs | 8 +++++++ src/lazy/encoder/writer.rs | 9 +++++++ src/lazy/expanded/compiler.rs | 4 ++++ src/lazy/expanded/macro_table.rs | 12 ++++++++++ src/lazy/expanded/template.rs | 12 ++++++++++ 9 files changed, 113 insertions(+), 3 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/binary_buffer.rs b/src/lazy/binary/raw/v1_1/binary_buffer.rs index 9d6d2c57..ad9c54e3 100644 --- a/src/lazy/binary/raw/v1_1/binary_buffer.rs +++ b/src/lazy/binary/raw/v1_1/binary_buffer.rs @@ -273,7 +273,26 @@ impl<'a> BinaryBuffer<'a> { let matched_input = self.slice(0, size_in_bytes); let remaining_input = self.slice_to_end(size_in_bytes); - let value = LazyRawBinaryValue_1_1::for_fixed_uint(matched_input, encoding); + let value = LazyRawBinaryValue_1_1::for_fixed_int_type(matched_input, encoding); + Ok((value, remaining_input)) + } + + pub fn read_fixed_int_as_lazy_value(self, encoding: BinaryValueEncoding) -> ParseResult<'a, LazyRawBinaryValue_1_1<'a>> { + let size_in_bytes = match encoding { + BinaryValueEncoding::Int8 => 1, + BinaryValueEncoding::Int16 => 2, + BinaryValueEncoding::Int32 => 4, + BinaryValueEncoding::Int64 => 8, + _ => return IonResult::illegal_operation(format!("invalid binary encoding for fixed int: {encoding:?}")), + }; + + if self.len() < size_in_bytes { + return IonResult::incomplete("an int", self.offset()); + } + + let matched_input = self.slice(0, size_in_bytes); + let remaining_input = self.slice_to_end(size_in_bytes); + let value = LazyRawBinaryValue_1_1::for_fixed_int_type(matched_input, encoding); Ok((value, remaining_input)) } diff --git a/src/lazy/binary/raw/v1_1/e_expression.rs b/src/lazy/binary/raw/v1_1/e_expression.rs index 6ce6d788..6b6d45a0 100644 --- a/src/lazy/binary/raw/v1_1/e_expression.rs +++ b/src/lazy/binary/raw/v1_1/e_expression.rs @@ -347,6 +347,25 @@ impl<'top> Iterator for BinaryEExpArgsInputIter<'top> { remaining, ) } + enc @ ParameterEncoding::Int8 | + enc @ ParameterEncoding::Int16 | + enc @ ParameterEncoding::Int32 | + enc @ ParameterEncoding::Int64 + => { + let binary_enc = try_or_some_err!(enc.try_into()); + let (fixed_int_lazy_value, remaining) = try_or_some_err! { + self.remaining_args_buffer.read_fixed_int_as_lazy_value(binary_enc) + }; + let value_ref = &*self + .remaining_args_buffer + .context() + .allocator() + .alloc_with(|| fixed_int_lazy_value); + ( + EExpArg::new(parameter, EExpArgExpr::ValueLiteral(value_ref)), + remaining, + ) + } ParameterEncoding::MacroShaped(_macro_ref) => { todo!("macro-shaped parameter encoding") } // TODO: The other tagless encodings diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 18d1d07c..4c7ad341 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -115,6 +115,10 @@ pub enum BinaryValueEncoding { UInt16, UInt32, UInt64, + Int8, + Int16, + Int32, + Int64, } #[derive(Debug, Copy, Clone)] @@ -364,7 +368,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { } } - pub(crate) fn for_fixed_uint(input: BinaryBuffer<'top>, encoding: BinaryValueEncoding) -> Self { + pub(crate) fn for_fixed_int_type(input: BinaryBuffer<'top>, encoding: BinaryValueEncoding) -> Self { let encoded_value = EncodedBinaryValue { encoding, header: Header { diff --git a/src/lazy/encoder/binary/v1_1/fixed_int.rs b/src/lazy/encoder/binary/v1_1/fixed_int.rs index af98c9b5..f1610451 100644 --- a/src/lazy/encoder/binary/v1_1/fixed_int.rs +++ b/src/lazy/encoder/binary/v1_1/fixed_int.rs @@ -1,9 +1,11 @@ use std::io::Write; use crate::decimal::coefficient::Coefficient; -use crate::result::IonFailure; +use crate::result::{IonError, IonFailure}; use crate::{Int, IonResult}; +use num_traits::{PrimInt, Signed}; + /// An Ion 1.1 encoding primitive that represents a fixed-length signed integer. #[derive(Debug)] pub struct FixedInt { @@ -100,6 +102,27 @@ impl FixedInt { Self::write_i128(output, value.data) } + /// Writes the given Into as a taggless int value with the specified primitive size + /// represented by I. + #[inline] + pub fn write_as_int(output: &mut impl Write, value: impl Into) -> IonResult<()> { + let size_in_bytes = std::mem::size_of::(); + let value: i128 = value.into().data; + let encoded_bytes = value.to_le_bytes(); + let max_value: i128 = num_traits::cast(I::max_value()) + .ok_or(IonError::encoding_error("Unable to represent bounds for value as 128bit value"))?; + let min_value: i128 = num_traits::cast(I::min_value()) + .ok_or(IonError::encoding_error("Unable to represent bounds for value as 128bit value"))?; + + if !(min_value..=max_value).contains(&value) { + return IonResult::encoding_error(format!("provided signed integer value does not fit within {size_in_bytes} bytes(s)")); + } + + output.write_all(&encoded_bytes[..size_in_bytes])?; + + Ok(()) + } + #[inline] pub fn encoded_size(value: impl Into) -> usize { let value = value.into(); diff --git a/src/lazy/encoder/binary/v1_1/value_writer.rs b/src/lazy/encoder/binary/v1_1/value_writer.rs index 8de8c7a4..ca3f267f 100644 --- a/src/lazy/encoder/binary/v1_1/value_writer.rs +++ b/src/lazy/encoder/binary/v1_1/value_writer.rs @@ -1086,6 +1086,10 @@ impl<'value, 'top> ValueWriter for BinaryEExpParameterValueWriter_1_1<'value, 't PE::UInt64 => value .try_into() .and_then(|uint: UInt| FixedUInt::write_as_uint::(self.buffer, uint)), + PE::Int8 => FixedInt::write_as_int::(self.buffer, value), + PE::Int16 => FixedInt::write_as_int::(self.buffer, value), + PE::Int32 => FixedInt::write_as_int::(self.buffer, value), + PE::Int64 => FixedInt::write_as_int::(self.buffer, value), PE::FlexUInt => value .try_into() .and_then(|uint: UInt| FlexUInt::write(self.buffer, uint)) @@ -1135,6 +1139,10 @@ impl<'value, 'top> ValueWriter for BinaryEExpParameterValueWriter_1_1<'value, 't PE::UInt64 => value .try_into() .and_then(|uint: UInt| FixedUInt::write_as_uint::(self.buffer, uint)), + PE::Int8 => FixedInt::write_as_int::(self.buffer, value), + PE::Int16 => FixedInt::write_as_int::(self.buffer, value), + PE::Int32 => FixedInt::write_as_int::(self.buffer, value), + PE::Int64 => FixedInt::write_as_int::(self.buffer, value), PE::FlexUInt => value .try_into() .and_then(|uint: UInt| FlexUInt::write(self.buffer, uint)) diff --git a/src/lazy/encoder/writer.rs b/src/lazy/encoder/writer.rs index bda7945f..6d89916e 100644 --- a/src/lazy/encoder/writer.rs +++ b/src/lazy/encoder/writer.rs @@ -1508,6 +1508,14 @@ mod tests { #[case::uint16("(macro foo (uint16::x) (%x))", 5, "5")] #[case::uint32("(macro foo (uint32::x) (%x))", 5, "5")] #[case::uint64("(macro foo (uint64::x) (%x))", 5, "5")] + #[case::int8_neg("(macro foo (int8::x) (%x))", -5, "-5")] + #[case::int8_pos("(macro foo (int8::x) (%x))", 5, "5")] + #[case::int16_neg("(macro foo (int16::x) (%x))", -5, "-5")] + #[case::int16_pos("(macro foo (int16::x) (%x))", 5, "5")] + #[case::int32_neg("(macro foo (int32::x) (%x))", -5, "-5")] + #[case::int32_pos("(macro foo (int32::x) (%x))", 5, "5")] + #[case::int64_neg("(macro foo (int64::x) (%x))", -5, "-5")] + #[case::int64_pos("(macro foo (int64::x) (%x))", 5, "5")] fn tagless_uint_encoding(#[case] macro_source: &str, #[case] input: i64, #[case] expected: &str) -> IonResult<()> { use crate::{Int, Element}; @@ -1593,5 +1601,6 @@ mod tests { Ok(()) } + } } diff --git a/src/lazy/expanded/compiler.rs b/src/lazy/expanded/compiler.rs index cc56c151..eb741431 100644 --- a/src/lazy/expanded/compiler.rs +++ b/src/lazy/expanded/compiler.rs @@ -382,6 +382,10 @@ impl TemplateCompiler { "uint16" => Ok(ParameterEncoding::UInt16), "uint32" => Ok(ParameterEncoding::UInt32), "uint64" => Ok(ParameterEncoding::UInt64), + "int8" => Ok(ParameterEncoding::Int8), + "int16" => Ok(ParameterEncoding::Int16), + "int32" => Ok(ParameterEncoding::Int32), + "int64" => Ok(ParameterEncoding::Int64), _ => IonResult::decoding_error(format!( "unrecognized encoding '{encoding_name}' specified for parameter" )), diff --git a/src/lazy/expanded/macro_table.rs b/src/lazy/expanded/macro_table.rs index c29483f2..5cf20f3b 100644 --- a/src/lazy/expanded/macro_table.rs +++ b/src/lazy/expanded/macro_table.rs @@ -175,6 +175,18 @@ fn write_macro_signature_as_ion( ParameterEncoding::UInt64 => value_writer .with_annotations("uint64")? .write_symbol(param.name())?, + ParameterEncoding::Int8 => value_writer + .with_annotations("int8")? + .write_symbol(param.name())?, + ParameterEncoding::Int16 => value_writer + .with_annotations("int16")? + .write_symbol(param.name())?, + ParameterEncoding::Int32 => value_writer + .with_annotations("int32")? + .write_symbol(param.name())?, + ParameterEncoding::Int64 => value_writer + .with_annotations("int64")? + .write_symbol(param.name())?, ParameterEncoding::MacroShaped(_) => todo!(), }; let cardinality_modifier = match param.cardinality() { diff --git a/src/lazy/expanded/template.rs b/src/lazy/expanded/template.rs index 875f83b5..74e56f48 100644 --- a/src/lazy/expanded/template.rs +++ b/src/lazy/expanded/template.rs @@ -211,6 +211,10 @@ pub enum ParameterEncoding { UInt16, UInt32, UInt64, + Int8, + Int16, + Int32, + Int64, // TODO: tagless types, including fixed-width types and macros MacroShaped(Arc), } @@ -225,6 +229,10 @@ impl Display for ParameterEncoding { UInt16 => write!(f, "uint16"), UInt32 => write!(f, "uint32"), UInt64 => write!(f, "uint64"), + Int8 => write!(f, "int8"), + Int16 => write!(f, "int16"), + Int32 => write!(f, "int32"), + Int64 => write!(f, "int64"), MacroShaped(m) => write!(f, "{}", m.name().unwrap_or("")), } } @@ -243,6 +251,10 @@ impl TryFrom<&ParameterEncoding> for BinaryValueEncoding { ParameterEncoding::UInt16 => Ok(BinaryValueEncoding::UInt16), ParameterEncoding::UInt32 => Ok(BinaryValueEncoding::UInt32), ParameterEncoding::UInt64 => Ok(BinaryValueEncoding::UInt64), + ParameterEncoding::Int8 => Ok(BinaryValueEncoding::Int8), + ParameterEncoding::Int16 => Ok(BinaryValueEncoding::Int16), + ParameterEncoding::Int32 => Ok(BinaryValueEncoding::Int32), + ParameterEncoding::Int64 => Ok(BinaryValueEncoding::Int64), } } } From 4ea2b8c1853ebc5e99bf0e1f9e7973afdcc56c8c Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Mon, 8 Sep 2025 15:01:14 -0700 Subject: [PATCH 2/3] Collapse redundant code between uint and int handling --- src/lazy/binary/raw/v1_1/binary_buffer.rs | 34 ++++++----------------- src/lazy/binary/raw/v1_1/e_expression.rs | 27 ++++-------------- 2 files changed, 14 insertions(+), 47 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/binary_buffer.rs b/src/lazy/binary/raw/v1_1/binary_buffer.rs index ad9c54e3..3e733bd4 100644 --- a/src/lazy/binary/raw/v1_1/binary_buffer.rs +++ b/src/lazy/binary/raw/v1_1/binary_buffer.rs @@ -258,36 +258,18 @@ impl<'a> BinaryBuffer<'a> { Ok((value, remaining_input)) } - pub fn read_fixed_uint_as_lazy_value(self, encoding: BinaryValueEncoding) -> ParseResult<'a, LazyRawBinaryValue_1_1<'a>> { + pub fn read_fixed_int_type_as_lazy_value(self, encoding: BinaryValueEncoding) -> ParseResult<'a, LazyRawBinaryValue_1_1<'a>> { + use BinaryValueEncoding::*; let size_in_bytes = match encoding { - BinaryValueEncoding::UInt8 => 1, - BinaryValueEncoding::UInt16 => 2, - BinaryValueEncoding::UInt32 => 4, - BinaryValueEncoding::UInt64 => 8, - _ => return IonResult::illegal_operation(format!("invalid binary encoding for fixed uint: {encoding:?}")), + UInt8 | Int8 => 1, + UInt16 | Int16 => 2, + UInt32 | Int32 => 4, + UInt64 | Int64 => 8, + _ => return IonResult::illegal_operation(format!("invalid binary encoding for fixed int or uint: {encoding:?}")), }; if self.len() < size_in_bytes { - return IonResult::incomplete("a uint", self.offset()); - } - - let matched_input = self.slice(0, size_in_bytes); - let remaining_input = self.slice_to_end(size_in_bytes); - let value = LazyRawBinaryValue_1_1::for_fixed_int_type(matched_input, encoding); - Ok((value, remaining_input)) - } - - pub fn read_fixed_int_as_lazy_value(self, encoding: BinaryValueEncoding) -> ParseResult<'a, LazyRawBinaryValue_1_1<'a>> { - let size_in_bytes = match encoding { - BinaryValueEncoding::Int8 => 1, - BinaryValueEncoding::Int16 => 2, - BinaryValueEncoding::Int32 => 4, - BinaryValueEncoding::Int64 => 8, - _ => return IonResult::illegal_operation(format!("invalid binary encoding for fixed int: {encoding:?}")), - }; - - if self.len() < size_in_bytes { - return IonResult::incomplete("an int", self.offset()); + return IonResult::incomplete("a(n) uint/int", self.offset()); } let matched_input = self.slice(0, size_in_bytes); diff --git a/src/lazy/binary/raw/v1_1/e_expression.rs b/src/lazy/binary/raw/v1_1/e_expression.rs index 6b6d45a0..4b718019 100644 --- a/src/lazy/binary/raw/v1_1/e_expression.rs +++ b/src/lazy/binary/raw/v1_1/e_expression.rs @@ -331,11 +331,15 @@ impl<'top> Iterator for BinaryEExpArgsInputIter<'top> { enc@ ParameterEncoding::UInt8 | enc@ ParameterEncoding::UInt16 | enc@ ParameterEncoding::UInt32 | - enc@ ParameterEncoding::UInt64 + enc@ ParameterEncoding::UInt64 | + enc@ ParameterEncoding::Int8 | + enc@ ParameterEncoding::Int16 | + enc@ ParameterEncoding::Int32 | + enc@ ParameterEncoding::Int64 => { let binary_enc = try_or_some_err!(enc.try_into()); let (fixed_uint_lazy_value, remaining) = try_or_some_err! { - self.remaining_args_buffer.read_fixed_uint_as_lazy_value(binary_enc) + self.remaining_args_buffer.read_fixed_int_type_as_lazy_value(binary_enc) }; let value_ref = &*self .remaining_args_buffer @@ -347,25 +351,6 @@ impl<'top> Iterator for BinaryEExpArgsInputIter<'top> { remaining, ) } - enc @ ParameterEncoding::Int8 | - enc @ ParameterEncoding::Int16 | - enc @ ParameterEncoding::Int32 | - enc @ ParameterEncoding::Int64 - => { - let binary_enc = try_or_some_err!(enc.try_into()); - let (fixed_int_lazy_value, remaining) = try_or_some_err! { - self.remaining_args_buffer.read_fixed_int_as_lazy_value(binary_enc) - }; - let value_ref = &*self - .remaining_args_buffer - .context() - .allocator() - .alloc_with(|| fixed_int_lazy_value); - ( - EExpArg::new(parameter, EExpArgExpr::ValueLiteral(value_ref)), - remaining, - ) - } ParameterEncoding::MacroShaped(_macro_ref) => { todo!("macro-shaped parameter encoding") } // TODO: The other tagless encodings From 106938051ca40673d8751e96fbba46b29d5845f5 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Mon, 8 Sep 2025 16:00:47 -0700 Subject: [PATCH 3/3] Add unit tests for boundary failures --- src/lazy/encoder/writer.rs | 73 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/src/lazy/encoder/writer.rs b/src/lazy/encoder/writer.rs index 6d89916e..906315cb 100644 --- a/src/lazy/encoder/writer.rs +++ b/src/lazy/encoder/writer.rs @@ -1375,7 +1375,7 @@ mod tests { mod eexp_parameter_validation { use super::*; - use num_traits::{PrimInt, Unsigned}; + use num_traits::{PrimInt, Signed, Unsigned}; use rstest::*; #[test] @@ -1516,7 +1516,7 @@ mod tests { #[case::int32_pos("(macro foo (int32::x) (%x))", 5, "5")] #[case::int64_neg("(macro foo (int64::x) (%x))", -5, "-5")] #[case::int64_pos("(macro foo (int64::x) (%x))", 5, "5")] - fn tagless_uint_encoding(#[case] macro_source: &str, #[case] input: i64, #[case] expected: &str) -> IonResult<()> { + fn tagless_int_type_encoding(#[case] macro_source: &str, #[case] input: i64, #[case] expected: &str) -> IonResult<()> { use crate::{Int, Element}; // write_int @@ -1549,6 +1549,10 @@ mod tests { Ok(()) } + /// Ensure that writing fixed int values outside the range of the tagless type's size is + /// caught as an error. This test uses the '_input' field to get the type of the primitive + /// we want to write in order to retrieve the maximum values for the primitive via + /// num_traits. #[rstest] #[case::uint8("(macro foo (uint8::x) (%x))", 5u8)] #[case::uint16("(macro foo (uint16::x) (%x))", 5u16)] @@ -1575,6 +1579,11 @@ mod tests { Ok(()) } + /// Ensure that writing fixed int values outside the range of the tagless type's size is + /// caught as an error. This test uses the '_input' field to get the type of the primitive + /// we want to write in order to retrieve the maximum values for the primitive via + /// num_traits. This test differs from the above test in that it uses `write_i64` and + /// cannot handle 64bit values. #[rstest] #[case::uint8("(macro foo (uint8::x) (%x))", 5u8)] #[case::uint16("(macro foo (uint16::x) (%x))", 5u16)] @@ -1602,5 +1611,65 @@ mod tests { Ok(()) } + /// Ensure that writing fixed int values outside the range of the tagless type's size is + /// caught as an error. This test uses the '_input' field to get the type of the primitive + /// we want to write in order to retrieve the minimum and maximum values for the primitive + /// via num_traits. + #[rstest] + #[case::int8("(macro foo (int8::x) (%x))", 5i8)] + #[case::uint16("(macro foo (int16::x) (%x))", 5i16)] + #[case::uint32("(macro foo (int32::x) (%x))", 5i32)] + #[case::uint64("(macro foo (int64::x) (%x))", 5i64)] + fn tagless_int_encoding_write_int_fails(#[case] macro_source: &str, #[case] _input: T) -> IonResult<()> { + let max_int = T::max_value(); + let max_int_plus_one = num_traits::cast::cast::<_, i128>(max_int).unwrap() + 1i128; + let min_int = T::min_value(); + let min_int_minus_one = num_traits::cast::cast::<_, i128>(min_int).unwrap() - 1i128; + + let mut writer = Writer::new(v1_1::Binary, Vec::new())?; + let foo = writer.compile_macro(macro_source)?; + let mut eexp_writer = writer.eexp_writer(&foo)?; + let result = eexp_writer.write_int(&max_int_plus_one.into()); + assert!(result.is_err(), "unexpected success"); + + let mut writer = Writer::new(v1_1::Binary, Vec::new())?; + let foo = writer.compile_macro(macro_source)?; + let mut eexp_writer = writer.eexp_writer(&foo)?; + let result = eexp_writer.write_int(&min_int_minus_one.into()); + assert!(result.is_err(), "unexpected success"); + + Ok(()) + } + + /// Ensure that writing fixed int values outside the range of the tagless type's size is + /// caught as an error. This test uses the '_input' field to get the type of the primitive + /// we want to write in order to retrieve the minimum and maximum values for the primitive + /// via num_traits. This test differs from the above test in that it uses `write_i64` and + /// cannot handle 64bit values. + #[rstest] + #[case::int8("(macro foo (int8::x) (%x))", 5i8)] + #[case::uint16("(macro foo (int16::x) (%x))", 5i16)] + #[case::uint32("(macro foo (int32::x) (%x))", 5i32)] + fn tagless_int_encoding_write_i64_fails(#[case] macro_source: &str, #[case] _input: T) -> IonResult<()> { + let max_int = T::max_value(); + let max_int_plus_one = num_traits::cast::cast::<_, i128>(max_int).unwrap() + 1i128; + let min_int = T::min_value(); + let min_int_minus_one = num_traits::cast::cast::<_, i128>(min_int).unwrap() - 1i128; + + let mut writer = Writer::new(v1_1::Binary, Vec::new())?; + let foo = writer.compile_macro(macro_source)?; + let mut eexp_writer = writer.eexp_writer(&foo)?; + let result = eexp_writer.write_i64(max_int_plus_one.try_into().unwrap()); + assert!(result.is_err(), "unexpected success"); + + let mut writer = Writer::new(v1_1::Binary, Vec::new())?; + let foo = writer.compile_macro(macro_source)?; + let mut eexp_writer = writer.eexp_writer(&foo)?; + let result = eexp_writer.write_i64(min_int_minus_one.try_into().unwrap()); + assert!(result.is_err(), "unexpected success"); + + Ok(()) + } + } }