Skip to content

Commit f8bcc58

Browse files
scovichalamb
andauthored
[VARIANT] impl Display for VariantDecimalXX (#7785)
# Which issue does this PR close? * Part of #6736 # Rationale for this change Follow-up to #7670, which accidentally introduced a lossy to-string conversion for variant decimals. # What changes are included in this PR? Use integer + string operations to convert decimal values to string, instead of floating point that could lose precision. Also, the `VariantDecimalXX` structs now `impl Display`, which greatly simplifies the to-json path. A new (private) macro encapsulates the to-string logic, since it's syntactically identical for all three decimal sizes. # Are these changes tested? Yes, new unit tests added. # Are there any user-facing changes? The `VariantDecimalXX` structs now `impl Display` --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 72e91fc commit f8bcc58

File tree

8 files changed

+288
-118
lines changed

8 files changed

+288
-118
lines changed

arrow-schema/src/datatype.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ impl fmt::Display for DataType {
467467
.map(|f| format!("{} {}", f.name(), f.data_type()))
468468
.collect::<Vec<_>>()
469469
.join(", ");
470-
write!(f, "{}", fields_str)?;
470+
write!(f, "{fields_str}")?;
471471
}
472472
write!(f, ")")?;
473473
Ok(())

arrow-schema/src/datatype_parse.rs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,9 @@ impl<'a> Parser<'a> {
7979
Token::LargeList => self.parse_large_list(),
8080
Token::FixedSizeList => self.parse_fixed_size_list(),
8181
Token::Struct => self.parse_struct(),
82-
Token::FieldName(word) => Err(make_error(
83-
self.val,
84-
&format!("unrecognized word: {}", word),
85-
)),
82+
Token::FieldName(word) => {
83+
Err(make_error(self.val, &format!("unrecognized word: {word}")))
84+
}
8685
tok => Err(make_error(
8786
self.val,
8887
&format!("finding next type, got unexpected '{tok}'"),
@@ -155,10 +154,9 @@ impl<'a> Parser<'a> {
155154
fn parse_double_quoted_string(&mut self, context: &str) -> ArrowResult<String> {
156155
match self.next_token()? {
157156
Token::DoubleQuotedString(s) => Ok(s),
158-
Token::FieldName(word) => Err(make_error(
159-
self.val,
160-
&format!("unrecognized word: {}", word),
161-
)),
157+
Token::FieldName(word) => {
158+
Err(make_error(self.val, &format!("unrecognized word: {word}")))
159+
}
162160
tok => Err(make_error(
163161
self.val,
164162
&format!("finding double quoted string for {context}, got '{tok}'"),

arrow-schema/src/fields.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ impl UnionFields {
365365
.inspect(|&idx| {
366366
let mask = 1_u128 << idx;
367367
if (set & mask) != 0 {
368-
panic!("duplicate type id: {}", idx);
368+
panic!("duplicate type id: {idx}");
369369
} else {
370370
set |= mask;
371371
}

parquet-variant/src/builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -855,7 +855,7 @@ mod tests {
855855
let val2 = list.get(2).unwrap();
856856
assert_eq!(val2, Variant::ShortString(ShortString("test")));
857857
}
858-
_ => panic!("Expected an array variant, got: {:?}", variant),
858+
_ => panic!("Expected an array variant, got: {variant:?}"),
859859
}
860860
}
861861

parquet-variant/src/decoder.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,7 @@ impl TryFrom<u8> for VariantPrimitiveType {
9595
15 => Ok(VariantPrimitiveType::Binary),
9696
16 => Ok(VariantPrimitiveType::String),
9797
_ => Err(ArrowError::InvalidArgumentError(format!(
98-
"unknown primitive type: {}",
99-
value
98+
"unknown primitive type: {value}",
10099
))),
101100
}
102101
}

parquet-variant/src/to_json.rs

Lines changed: 28 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -41,51 +41,7 @@ fn format_binary_base64(bytes: &[u8]) -> String {
4141
general_purpose::STANDARD.encode(bytes)
4242
}
4343

44-
/// Write decimal using scovich's hybrid approach for i32
45-
fn write_decimal_i32(
46-
json_buffer: &mut impl Write,
47-
integer: i32,
48-
scale: u8,
49-
) -> Result<(), ArrowError> {
50-
let integer = if scale == 0 {
51-
integer
52-
} else {
53-
let divisor = 10_i32.pow(scale as u32);
54-
if integer % divisor != 0 {
55-
// fall back to floating point
56-
let result = integer as f64 / divisor as f64;
57-
write!(json_buffer, "{}", result)?;
58-
return Ok(());
59-
}
60-
integer / divisor
61-
};
62-
write!(json_buffer, "{}", integer)?;
63-
Ok(())
64-
}
65-
66-
/// Write decimal using scovich's hybrid approach for i64
67-
fn write_decimal_i64(
68-
json_buffer: &mut impl Write,
69-
integer: i64,
70-
scale: u8,
71-
) -> Result<(), ArrowError> {
72-
let integer = if scale == 0 {
73-
integer
74-
} else {
75-
let divisor = 10_i64.pow(scale as u32);
76-
if integer % divisor != 0 {
77-
// fall back to floating point
78-
let result = integer as f64 / divisor as f64;
79-
write!(json_buffer, "{}", result)?;
80-
return Ok(());
81-
}
82-
integer / divisor
83-
};
84-
write!(json_buffer, "{}", integer)?;
85-
Ok(())
86-
}
87-
88-
/// Converts a Variant to JSON and writes it to the provided [`Write`]
44+
/// Converts a Variant to JSON and writes it to the provided `Write`
8945
///
9046
/// This function writes JSON directly to any type that implements [`Write`],
9147
/// making it efficient for streaming or when you want to control the output destination.
@@ -140,40 +96,15 @@ pub fn variant_to_json(json_buffer: &mut impl Write, variant: &Variant) -> Resul
14096
Variant::Null => write!(json_buffer, "null")?,
14197
Variant::BooleanTrue => write!(json_buffer, "true")?,
14298
Variant::BooleanFalse => write!(json_buffer, "false")?,
143-
Variant::Int8(i) => write!(json_buffer, "{}", i)?,
144-
Variant::Int16(i) => write!(json_buffer, "{}", i)?,
145-
Variant::Int32(i) => write!(json_buffer, "{}", i)?,
146-
Variant::Int64(i) => write!(json_buffer, "{}", i)?,
147-
Variant::Float(f) => write!(json_buffer, "{}", f)?,
148-
Variant::Double(f) => write!(json_buffer, "{}", f)?,
149-
Variant::Decimal4(VariantDecimal4 { integer, scale }) => {
150-
write_decimal_i32(json_buffer, *integer, *scale)?;
151-
}
152-
Variant::Decimal8(VariantDecimal8 { integer, scale }) => {
153-
write_decimal_i64(json_buffer, *integer, *scale)?;
154-
}
155-
Variant::Decimal16(VariantDecimal16 { integer, scale }) => {
156-
let integer = if *scale == 0 {
157-
*integer
158-
} else {
159-
let divisor = 10_i128.pow(*scale as u32);
160-
if integer % divisor != 0 {
161-
// fall back to floating point
162-
let result = *integer as f64 / divisor as f64;
163-
write!(json_buffer, "{}", result)?;
164-
return Ok(());
165-
}
166-
integer / divisor
167-
};
168-
// Prefer to emit as i64, but fall back to u64 or even f64 (lossy) if necessary
169-
if let Ok(i64_val) = i64::try_from(integer) {
170-
write!(json_buffer, "{}", i64_val)?;
171-
} else if let Ok(u64_val) = u64::try_from(integer) {
172-
write!(json_buffer, "{}", u64_val)?;
173-
} else {
174-
write!(json_buffer, "{}", integer as f64)?;
175-
}
176-
}
99+
Variant::Int8(i) => write!(json_buffer, "{i}")?,
100+
Variant::Int16(i) => write!(json_buffer, "{i}")?,
101+
Variant::Int32(i) => write!(json_buffer, "{i}")?,
102+
Variant::Int64(i) => write!(json_buffer, "{i}")?,
103+
Variant::Float(f) => write!(json_buffer, "{f}")?,
104+
Variant::Double(f) => write!(json_buffer, "{f}")?,
105+
Variant::Decimal4(decimal) => write!(json_buffer, "{decimal}")?,
106+
Variant::Decimal8(decimal) => write!(json_buffer, "{decimal}")?,
107+
Variant::Decimal16(decimal) => write!(json_buffer, "{decimal}")?,
177108
Variant::Date(date) => write!(json_buffer, "\"{}\"", format_date_string(date))?,
178109
Variant::TimestampMicros(ts) => write!(json_buffer, "\"{}\"", ts.to_rfc3339())?,
179110
Variant::TimestampNtzMicros(ts) => {
@@ -183,23 +114,23 @@ pub fn variant_to_json(json_buffer: &mut impl Write, variant: &Variant) -> Resul
183114
// Encode binary as base64 string
184115
let base64_str = format_binary_base64(bytes);
185116
let json_str = serde_json::to_string(&base64_str).map_err(|e| {
186-
ArrowError::InvalidArgumentError(format!("JSON encoding error: {}", e))
117+
ArrowError::InvalidArgumentError(format!("JSON encoding error: {e}"))
187118
})?;
188-
write!(json_buffer, "{}", json_str)?
119+
write!(json_buffer, "{json_str}")?
189120
}
190121
Variant::String(s) => {
191122
// Use serde_json to properly escape the string
192123
let json_str = serde_json::to_string(s).map_err(|e| {
193-
ArrowError::InvalidArgumentError(format!("JSON encoding error: {}", e))
124+
ArrowError::InvalidArgumentError(format!("JSON encoding error: {e}"))
194125
})?;
195-
write!(json_buffer, "{}", json_str)?
126+
write!(json_buffer, "{json_str}")?
196127
}
197128
Variant::ShortString(s) => {
198129
// Use serde_json to properly escape the string
199130
let json_str = serde_json::to_string(s.as_str()).map_err(|e| {
200-
ArrowError::InvalidArgumentError(format!("JSON encoding error: {}", e))
131+
ArrowError::InvalidArgumentError(format!("JSON encoding error: {e}"))
201132
})?;
202-
write!(json_buffer, "{}", json_str)?
133+
write!(json_buffer, "{json_str}")?
203134
}
204135
Variant::Object(obj) => {
205136
convert_object_to_json(json_buffer, obj)?;
@@ -226,9 +157,9 @@ fn convert_object_to_json(buffer: &mut impl Write, obj: &VariantObject) -> Resul
226157

227158
// Write the key (properly escaped)
228159
let json_key = serde_json::to_string(key).map_err(|e| {
229-
ArrowError::InvalidArgumentError(format!("JSON key encoding error: {}", e))
160+
ArrowError::InvalidArgumentError(format!("JSON key encoding error: {e}"))
230161
})?;
231-
write!(buffer, "{}:", json_key)?;
162+
write!(buffer, "{json_key}:")?;
232163

233164
// Recursively convert the value
234165
variant_to_json(buffer, &value)?;
@@ -313,7 +244,7 @@ pub fn variant_to_json_string(variant: &Variant) -> Result<String, ArrowError> {
313244
let mut buffer = Vec::new();
314245
variant_to_json(&mut buffer, variant)?;
315246
String::from_utf8(buffer)
316-
.map_err(|e| ArrowError::InvalidArgumentError(format!("UTF-8 conversion error: {}", e)))
247+
.map_err(|e| ArrowError::InvalidArgumentError(format!("UTF-8 conversion error: {e}")))
317248
}
318249

319250
/// Convert [`Variant`] to [`serde_json::Value`]
@@ -394,7 +325,8 @@ pub fn variant_to_json_value(variant: &Variant) -> Result<Value, ArrowError> {
394325
}
395326
integer / divisor
396327
};
397-
// Prefer to emit as i64, but fall back to u64 or even f64 (lossy) if necessary
328+
// i128 has higher precision than any 64-bit type. Try a lossless narrowing cast to
329+
// i64 or u64 first, falling back to a lossy narrowing cast to f64 if necessary.
398330
let value = i64::try_from(integer)
399331
.map(Value::from)
400332
.or_else(|_| u64::try_from(integer).map(Value::from))
@@ -928,8 +860,7 @@ mod tests {
928860
let json = variant_to_json_string(&variant)?;
929861

930862
// Parse the JSON to verify structure - handle JSON parsing errors manually
931-
let parsed: Value = serde_json::from_str(&json)
932-
.map_err(|e| ArrowError::ParseError(format!("JSON parse error: {}", e)))?;
863+
let parsed: Value = serde_json::from_str(&json).unwrap();
933864
let obj = parsed.as_object().expect("expected JSON object");
934865
assert_eq!(obj.get("name"), Some(&Value::String("Alice".to_string())));
935866
assert_eq!(obj.get("age"), Some(&Value::Number(30.into())));
@@ -990,8 +921,7 @@ mod tests {
990921
assert!(json.contains("😀 Smiley"));
991922

992923
// Verify that the JSON can be parsed back
993-
let parsed: Value = serde_json::from_str(&json)
994-
.map_err(|e| ArrowError::ParseError(format!("JSON parse error: {}", e)))?;
924+
let parsed: Value = serde_json::from_str(&json).unwrap();
995925
assert!(matches!(parsed, Value::Object(_)));
996926

997927
Ok(())
@@ -1069,8 +999,7 @@ mod tests {
1069999
let variant = Variant::try_new(&metadata, &value)?;
10701000
let json = variant_to_json_string(&variant)?;
10711001

1072-
let parsed: Value = serde_json::from_str(&json)
1073-
.map_err(|e| ArrowError::ParseError(format!("JSON parse error: {}", e)))?;
1002+
let parsed: Value = serde_json::from_str(&json).unwrap();
10741003
let arr = parsed.as_array().expect("expected JSON array");
10751004
assert_eq!(arr.len(), 5);
10761005
assert_eq!(arr[0], Value::String("hello".to_string()));
@@ -1102,8 +1031,7 @@ mod tests {
11021031
let json = variant_to_json_string(&variant)?;
11031032

11041033
// Parse and verify all fields are present
1105-
let parsed: Value = serde_json::from_str(&json)
1106-
.map_err(|e| ArrowError::ParseError(format!("JSON parse error: {}", e)))?;
1034+
let parsed: Value = serde_json::from_str(&json).unwrap();
11071035
let obj = parsed.as_object().expect("expected JSON object");
11081036
assert_eq!(obj.len(), 3);
11091037
assert_eq!(obj.get("alpha"), Some(&Value::String("first".to_string())));
@@ -1135,8 +1063,7 @@ mod tests {
11351063
let variant = Variant::try_new(&metadata, &value)?;
11361064
let json = variant_to_json_string(&variant)?;
11371065

1138-
let parsed: Value = serde_json::from_str(&json)
1139-
.map_err(|e| ArrowError::ParseError(format!("JSON parse error: {}", e)))?;
1066+
let parsed: Value = serde_json::from_str(&json).unwrap();
11401067
let arr = parsed.as_array().expect("expected JSON array");
11411068
assert_eq!(arr.len(), 7);
11421069
assert_eq!(arr[0], Value::String("string_value".to_string()));
@@ -1171,8 +1098,7 @@ mod tests {
11711098
let variant = Variant::try_new(&metadata, &value)?;
11721099
let json = variant_to_json_string(&variant)?;
11731100

1174-
let parsed: Value = serde_json::from_str(&json)
1175-
.map_err(|e| ArrowError::ParseError(format!("JSON parse error: {}", e)))?;
1101+
let parsed: Value = serde_json::from_str(&json).unwrap();
11761102
let obj = parsed.as_object().expect("expected JSON object");
11771103
assert_eq!(obj.len(), 6);
11781104
assert_eq!(
@@ -1202,8 +1128,7 @@ mod tests {
12021128

12031129
// Due to f64 precision limits, we expect precision loss for values > 2^53
12041130
// Both functions should produce consistent results (even if not exact)
1205-
let parsed: Value = serde_json::from_str(&json_string)
1206-
.map_err(|e| ArrowError::ParseError(format!("JSON parse error: {}", e)))?;
1131+
let parsed: Value = serde_json::from_str(&json_string).unwrap();
12071132
assert_eq!(parsed, json_value);
12081133

12091134
// Test a case that can be exactly represented (integer result)

0 commit comments

Comments
 (0)