Skip to content

Commit 121371c

Browse files
authored
feat: [Variant] Add Validation for Variant Deciaml (#7738)
# Which issue does this PR close? - Closes #7697 # Rationale for this change # What changes are included in this PR? - Introduced new types: VariantDecimal4, VariantDecimal8, and VariantDecimal16 - These types encapsulate decimal values and ensure proper validation and wrapping # Are there any user-facing changes?
1 parent a49ce3e commit 121371c

File tree

3 files changed

+209
-63
lines changed

3 files changed

+209
-63
lines changed

parquet-variant/src/builder.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717
use crate::decoder::{VariantBasicType, VariantPrimitiveType};
18-
use crate::{ShortString, Variant};
18+
use crate::{ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8};
1919
use std::collections::BTreeMap;
2020

2121
const BASIC_TYPE_BITS: u8 = 2;
@@ -384,9 +384,15 @@ impl VariantBuilder {
384384
Variant::Date(v) => self.append_date(v),
385385
Variant::TimestampMicros(v) => self.append_timestamp_micros(v),
386386
Variant::TimestampNtzMicros(v) => self.append_timestamp_ntz_micros(v),
387-
Variant::Decimal4 { integer, scale } => self.append_decimal4(integer, scale),
388-
Variant::Decimal8 { integer, scale } => self.append_decimal8(integer, scale),
389-
Variant::Decimal16 { integer, scale } => self.append_decimal16(integer, scale),
387+
Variant::Decimal4(VariantDecimal4 { integer, scale }) => {
388+
self.append_decimal4(integer, scale)
389+
}
390+
Variant::Decimal8(VariantDecimal8 { integer, scale }) => {
391+
self.append_decimal8(integer, scale)
392+
}
393+
Variant::Decimal16(VariantDecimal16 { integer, scale }) => {
394+
self.append_decimal16(integer, scale)
395+
}
390396
Variant::Float(v) => self.append_float(v),
391397
Variant::Double(v) => self.append_double(v),
392398
Variant::Binary(v) => self.append_binary(v),

parquet-variant/src/variant.rs

Lines changed: 187 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,100 @@ const MAX_SHORT_STRING_BYTES: usize = 0x3F;
4040
#[derive(Debug, Clone, Copy, PartialEq)]
4141
pub struct ShortString<'a>(pub(crate) &'a str);
4242

43+
/// Represents a 4-byte decimal value in the Variant format.
44+
///
45+
/// This struct stores a decimal number using a 32-bit signed integer for the coefficient
46+
/// and an 8-bit unsigned integer for the scale (number of decimal places). Its precision is limited to 9 digits.
47+
///
48+
/// For valid precision and scale values, see the Variant specification:
49+
/// <https://github.com/apache/parquet-format/blob/87f2c8bf77eefb4c43d0ebaeea1778bd28ac3609/VariantEncoding.md?plain=1#L418-L420>
50+
///
51+
#[derive(Debug, Clone, Copy, PartialEq)]
52+
pub struct VariantDecimal4 {
53+
pub(crate) integer: i32,
54+
pub(crate) scale: u8,
55+
}
56+
57+
impl VariantDecimal4 {
58+
pub fn try_new(integer: i32, scale: u8) -> Result<Self, ArrowError> {
59+
const PRECISION_MAX: u32 = 9;
60+
61+
// Validate that scale doesn't exceed precision
62+
if scale as u32 > PRECISION_MAX {
63+
return Err(ArrowError::InvalidArgumentError(format!(
64+
"Scale {} cannot be greater than precision 9 for 4-byte decimal",
65+
scale
66+
)));
67+
}
68+
69+
Ok(VariantDecimal4 { integer, scale })
70+
}
71+
}
72+
73+
/// Represents an 8-byte decimal value in the Variant format.
74+
///
75+
/// This struct stores a decimal number using a 64-bit signed integer for the coefficient
76+
/// and an 8-bit unsigned integer for the scale (number of decimal places). Its precision is between 10 and 18 digits.
77+
///
78+
/// For valid precision and scale values, see the Variant specification:
79+
///
80+
/// <https://github.com/apache/parquet-format/blob/87f2c8bf77eefb4c43d0ebaeea1778bd28ac3609/VariantEncoding.md?plain=1#L418-L420>
81+
///
82+
#[derive(Debug, Clone, Copy, PartialEq)]
83+
pub struct VariantDecimal8 {
84+
pub(crate) integer: i64,
85+
pub(crate) scale: u8,
86+
}
87+
88+
impl VariantDecimal8 {
89+
pub fn try_new(integer: i64, scale: u8) -> Result<Self, ArrowError> {
90+
const PRECISION_MAX: u32 = 18;
91+
92+
// Validate that scale doesn't exceed precision
93+
if scale as u32 > PRECISION_MAX {
94+
return Err(ArrowError::InvalidArgumentError(format!(
95+
"Scale {} cannot be greater than precision 18 for 8-byte decimal",
96+
scale
97+
)));
98+
}
99+
100+
Ok(VariantDecimal8 { integer, scale })
101+
}
102+
}
103+
104+
/// Represents an 16-byte decimal value in the Variant format.
105+
///
106+
/// This struct stores a decimal number using a 128-bit signed integer for the coefficient
107+
/// and an 8-bit unsigned integer for the scale (number of decimal places). Its precision is between 19 and 38 digits.
108+
///
109+
/// For valid precision and scale values, see the Variant specification:
110+
///
111+
/// <https://github.com/apache/parquet-format/blob/87f2c8bf77eefb4c43d0ebaeea1778bd28ac3609/VariantEncoding.md?plain=1#L418-L420>
112+
///
113+
#[derive(Debug, Clone, Copy, PartialEq)]
114+
pub struct VariantDecimal16 {
115+
pub(crate) integer: i128,
116+
pub(crate) scale: u8,
117+
}
118+
119+
impl VariantDecimal16 {
120+
pub fn try_new(integer: i128, scale: u8) -> Result<Self, ArrowError> {
121+
const PRECISION_MAX: u32 = 38;
122+
123+
// Validate that scale doesn't exceed precision
124+
if scale as u32 > PRECISION_MAX {
125+
return Err(ArrowError::InvalidArgumentError(format!(
126+
"Scale {} cannot be greater than precision 38 for 16-byte decimal",
127+
scale
128+
)));
129+
}
130+
131+
Ok(VariantDecimal16 { integer, scale })
132+
}
133+
}
134+
43135
impl<'a> ShortString<'a> {
44-
/// Attempts to interpret `value` as a variant short string value.
136+
/// Attempts to interpret `value` as a variant short string value.
45137
///
46138
/// # Validation
47139
///
@@ -194,11 +286,11 @@ pub enum Variant<'m, 'v> {
194286
/// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, MICROS)
195287
TimestampNtzMicros(NaiveDateTime),
196288
/// Primitive (type_id=1): DECIMAL(precision, scale) 32-bits
197-
Decimal4 { integer: i32, scale: u8 },
289+
Decimal4(VariantDecimal4),
198290
/// Primitive (type_id=1): DECIMAL(precision, scale) 64-bits
199-
Decimal8 { integer: i64, scale: u8 },
291+
Decimal8(VariantDecimal8),
200292
/// Primitive (type_id=1): DECIMAL(precision, scale) 128-bits
201-
Decimal16 { integer: i128, scale: u8 },
293+
Decimal16(VariantDecimal16),
202294
/// Primitive (type_id=1): FLOAT
203295
Float(f32),
204296
/// Primitive (type_id=1): DOUBLE
@@ -269,15 +361,15 @@ impl<'m, 'v> Variant<'m, 'v> {
269361
VariantPrimitiveType::Int64 => Variant::Int64(decoder::decode_int64(value_data)?),
270362
VariantPrimitiveType::Decimal4 => {
271363
let (integer, scale) = decoder::decode_decimal4(value_data)?;
272-
Variant::Decimal4 { integer, scale }
364+
Variant::Decimal4(VariantDecimal4 { integer, scale })
273365
}
274366
VariantPrimitiveType::Decimal8 => {
275367
let (integer, scale) = decoder::decode_decimal8(value_data)?;
276-
Variant::Decimal8 { integer, scale }
368+
Variant::Decimal8(VariantDecimal8 { integer, scale })
277369
}
278370
VariantPrimitiveType::Decimal16 => {
279371
let (integer, scale) = decoder::decode_decimal16(value_data)?;
280-
Variant::Decimal16 { integer, scale }
372+
Variant::Decimal16(VariantDecimal16 { integer, scale })
281373
}
282374
VariantPrimitiveType::Float => Variant::Float(decoder::decode_float(value_data)?),
283375
VariantPrimitiveType::Double => {
@@ -640,18 +732,18 @@ impl<'m, 'v> Variant<'m, 'v> {
640732
/// # Examples
641733
///
642734
/// ```
643-
/// use parquet_variant::Variant;
735+
/// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8};
644736
///
645737
/// // you can extract decimal parts from smaller or equally-sized decimal variants
646-
/// let v1 = Variant::from((1234_i32, 2));
738+
/// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
647739
/// assert_eq!(v1.as_decimal_int32(), Some((1234_i32, 2)));
648740
///
649741
/// // and from larger decimal variants if they fit
650-
/// let v2 = Variant::from((1234_i64, 2));
742+
/// let v2 = Variant::from(VariantDecimal8::try_new(1234_i64, 2).unwrap());
651743
/// assert_eq!(v2.as_decimal_int32(), Some((1234_i32, 2)));
652744
///
653745
/// // but not if the value would overflow i32
654-
/// let v3 = Variant::from((12345678901i64, 2));
746+
/// let v3 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap());
655747
/// assert_eq!(v3.as_decimal_int32(), None);
656748
///
657749
/// // or if the variant is not a decimal
@@ -660,17 +752,17 @@ impl<'m, 'v> Variant<'m, 'v> {
660752
/// ```
661753
pub fn as_decimal_int32(&self) -> Option<(i32, u8)> {
662754
match *self {
663-
Variant::Decimal4 { integer, scale } => Some((integer, scale)),
664-
Variant::Decimal8 { integer, scale } => {
665-
if let Ok(converted_integer) = integer.try_into() {
666-
Some((converted_integer, scale))
755+
Variant::Decimal4(decimal4) => Some((decimal4.integer, decimal4.scale)),
756+
Variant::Decimal8(decimal8) => {
757+
if let Ok(converted_integer) = decimal8.integer.try_into() {
758+
Some((converted_integer, decimal8.scale))
667759
} else {
668760
None
669761
}
670762
}
671-
Variant::Decimal16 { integer, scale } => {
672-
if let Ok(converted_integer) = integer.try_into() {
673-
Some((converted_integer, scale))
763+
Variant::Decimal16(decimal16) => {
764+
if let Ok(converted_integer) = decimal16.integer.try_into() {
765+
Some((converted_integer, decimal16.scale))
674766
} else {
675767
None
676768
}
@@ -688,18 +780,18 @@ impl<'m, 'v> Variant<'m, 'v> {
688780
/// # Examples
689781
///
690782
/// ```
691-
/// use parquet_variant::Variant;
783+
/// use parquet_variant::{Variant, VariantDecimal8, VariantDecimal16};
692784
///
693785
/// // you can extract decimal parts from smaller or equally-sized decimal variants
694-
/// let v1 = Variant::from((1234_i64, 2));
786+
/// let v1 = Variant::from(VariantDecimal8::try_new(1234_i64, 2).unwrap());
695787
/// assert_eq!(v1.as_decimal_int64(), Some((1234_i64, 2)));
696788
///
697789
/// // and from larger decimal variants if they fit
698-
/// let v2 = Variant::from((1234_i128, 2));
790+
/// let v2 = Variant::from(VariantDecimal16::try_new(1234_i128, 2).unwrap());
699791
/// assert_eq!(v2.as_decimal_int64(), Some((1234_i64, 2)));
700792
///
701793
/// // but not if the value would overflow i64
702-
/// let v3 = Variant::from((2e19 as i128, 2));
794+
/// let v3 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap());
703795
/// assert_eq!(v3.as_decimal_int64(), None);
704796
///
705797
/// // or if the variant is not a decimal
@@ -708,11 +800,11 @@ impl<'m, 'v> Variant<'m, 'v> {
708800
/// ```
709801
pub fn as_decimal_int64(&self) -> Option<(i64, u8)> {
710802
match *self {
711-
Variant::Decimal4 { integer, scale } => Some((integer.into(), scale)),
712-
Variant::Decimal8 { integer, scale } => Some((integer, scale)),
713-
Variant::Decimal16 { integer, scale } => {
714-
if let Ok(converted_integer) = integer.try_into() {
715-
Some((converted_integer, scale))
803+
Variant::Decimal4(decimal) => Some((decimal.integer.into(), decimal.scale)),
804+
Variant::Decimal8(decimal) => Some((decimal.integer, decimal.scale)),
805+
Variant::Decimal16(decimal) => {
806+
if let Ok(converted_integer) = decimal.integer.try_into() {
807+
Some((converted_integer, decimal.scale))
716808
} else {
717809
None
718810
}
@@ -730,10 +822,10 @@ impl<'m, 'v> Variant<'m, 'v> {
730822
/// # Examples
731823
///
732824
/// ```
733-
/// use parquet_variant::Variant;
825+
/// use parquet_variant::{Variant, VariantDecimal16};
734826
///
735827
/// // you can extract decimal parts from smaller or equally-sized decimal variants
736-
/// let v1 = Variant::from((1234_i128, 2));
828+
/// let v1 = Variant::from(VariantDecimal16::try_new(1234_i128, 2).unwrap());
737829
/// assert_eq!(v1.as_decimal_int128(), Some((1234_i128, 2)));
738830
///
739831
/// // but not if the variant is not a decimal
@@ -742,9 +834,9 @@ impl<'m, 'v> Variant<'m, 'v> {
742834
/// ```
743835
pub fn as_decimal_int128(&self) -> Option<(i128, u8)> {
744836
match *self {
745-
Variant::Decimal4 { integer, scale } => Some((integer.into(), scale)),
746-
Variant::Decimal8 { integer, scale } => Some((integer.into(), scale)),
747-
Variant::Decimal16 { integer, scale } => Some((integer, scale)),
837+
Variant::Decimal4(decimal) => Some((decimal.integer.into(), decimal.scale)),
838+
Variant::Decimal8(decimal) => Some((decimal.integer.into(), decimal.scale)),
839+
Variant::Decimal16(decimal) => Some((decimal.integer, decimal.scale)),
748840
_ => None,
749841
}
750842
}
@@ -912,30 +1004,21 @@ impl From<i64> for Variant<'_, '_> {
9121004
}
9131005
}
9141006

915-
impl From<(i32, u8)> for Variant<'_, '_> {
916-
fn from(value: (i32, u8)) -> Self {
917-
Variant::Decimal4 {
918-
integer: value.0,
919-
scale: value.1,
920-
}
1007+
impl From<VariantDecimal4> for Variant<'_, '_> {
1008+
fn from(value: VariantDecimal4) -> Self {
1009+
Variant::Decimal4(value)
9211010
}
9221011
}
9231012

924-
impl From<(i64, u8)> for Variant<'_, '_> {
925-
fn from(value: (i64, u8)) -> Self {
926-
Variant::Decimal8 {
927-
integer: value.0,
928-
scale: value.1,
929-
}
1013+
impl From<VariantDecimal8> for Variant<'_, '_> {
1014+
fn from(value: VariantDecimal8) -> Self {
1015+
Variant::Decimal8(value)
9301016
}
9311017
}
9321018

933-
impl From<(i128, u8)> for Variant<'_, '_> {
934-
fn from(value: (i128, u8)) -> Self {
935-
Variant::Decimal16 {
936-
integer: value.0,
937-
scale: value.1,
938-
}
1019+
impl From<VariantDecimal16> for Variant<'_, '_> {
1020+
fn from(value: VariantDecimal16) -> Self {
1021+
Variant::Decimal16(value)
9391022
}
9401023
}
9411024

@@ -994,6 +1077,36 @@ impl<'v> From<&'v str> for Variant<'_, 'v> {
9941077
}
9951078
}
9961079

1080+
impl TryFrom<(i32, u8)> for Variant<'_, '_> {
1081+
type Error = ArrowError;
1082+
1083+
fn try_from(value: (i32, u8)) -> Result<Self, Self::Error> {
1084+
Ok(Variant::Decimal4(VariantDecimal4::try_new(
1085+
value.0, value.1,
1086+
)?))
1087+
}
1088+
}
1089+
1090+
impl TryFrom<(i64, u8)> for Variant<'_, '_> {
1091+
type Error = ArrowError;
1092+
1093+
fn try_from(value: (i64, u8)) -> Result<Self, Self::Error> {
1094+
Ok(Variant::Decimal8(VariantDecimal8::try_new(
1095+
value.0, value.1,
1096+
)?))
1097+
}
1098+
}
1099+
1100+
impl TryFrom<(i128, u8)> for Variant<'_, '_> {
1101+
type Error = ArrowError;
1102+
1103+
fn try_from(value: (i128, u8)) -> Result<Self, Self::Error> {
1104+
Ok(Variant::Decimal16(VariantDecimal16::try_new(
1105+
value.0, value.1,
1106+
)?))
1107+
}
1108+
}
1109+
9971110
#[cfg(test)]
9981111
mod tests {
9991112
use super::*;
@@ -1007,4 +1120,28 @@ mod tests {
10071120
let res = ShortString::try_new(&long_string);
10081121
assert!(res.is_err());
10091122
}
1123+
1124+
#[test]
1125+
fn test_variant_decimal_conversion() {
1126+
let decimal4 = VariantDecimal4::try_new(1234_i32, 2).unwrap();
1127+
let variant = Variant::from(decimal4);
1128+
assert_eq!(variant.as_decimal_int32(), Some((1234_i32, 2)));
1129+
1130+
let decimal8 = VariantDecimal8::try_new(12345678901_i64, 2).unwrap();
1131+
let variant = Variant::from(decimal8);
1132+
assert_eq!(variant.as_decimal_int64(), Some((12345678901_i64, 2)));
1133+
1134+
let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890_i128, 2).unwrap();
1135+
let variant = Variant::from(decimal16);
1136+
assert_eq!(
1137+
variant.as_decimal_int128(),
1138+
Some((123456789012345678901234567890_i128, 2))
1139+
);
1140+
}
1141+
1142+
#[test]
1143+
fn test_invalid_variant_decimal_conversion() {
1144+
let decimal4 = VariantDecimal4::try_new(123456789_i32, 20);
1145+
assert!(decimal4.is_err(), "i32 overflow should fail");
1146+
}
10101147
}

0 commit comments

Comments
 (0)