Skip to content

Commit de7c979

Browse files
committed
[FIX] fix further precision issues from the decimal
1 parent 4010b73 commit de7c979

File tree

1 file changed

+107
-118
lines changed

1 file changed

+107
-118
lines changed

parquet-variant/src/to_json.rs

Lines changed: 107 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -42,44 +42,47 @@ fn format_binary_base64(bytes: &[u8]) -> String {
4242
general_purpose::STANDARD.encode(bytes)
4343
}
4444

45-
/// Generic function to write decimal values to JSON buffer
46-
fn write_decimal(
45+
/// Write decimal using scovich's hybrid approach for i32
46+
fn write_decimal_i32(
4747
json_buffer: &mut impl Write,
48-
integer: impl std::fmt::Display + std::fmt::Debug,
48+
integer: i32,
4949
scale: u8,
5050
) -> Result<(), ArrowError> {
51-
if scale == 0 {
52-
write!(json_buffer, "{}", integer)?;
51+
let integer = if scale == 0 {
52+
integer
5353
} else {
54-
// Convert to string and manually handle the decimal point placement
55-
let integer_str = integer.to_string();
56-
let is_negative = integer_str.starts_with('-');
57-
let abs_str = if is_negative { &integer_str[1..] } else { &integer_str };
58-
59-
let scale_usize = scale as usize;
60-
if abs_str.len() <= scale_usize {
61-
// Need to pad with leading zeros
62-
let zeros_needed = scale_usize - abs_str.len();
63-
let padded = format!("{}{}", "0".repeat(zeros_needed), abs_str);
64-
let decimal_part = padded.trim_end_matches('0');
65-
if decimal_part.is_empty() {
66-
write!(json_buffer, "0")?;
67-
} else {
68-
write!(json_buffer, "{}0.{}", if is_negative { "-" } else { "" }, decimal_part)?;
69-
}
70-
} else {
71-
// Split the string at the decimal point
72-
let split_point = abs_str.len() - scale_usize;
73-
let integer_part = &abs_str[..split_point];
74-
let decimal_part = abs_str[split_point..].trim_end_matches('0');
75-
76-
if decimal_part.is_empty() {
77-
write!(json_buffer, "{}{}", if is_negative { "-" } else { "" }, integer_part)?;
78-
} else {
79-
write!(json_buffer, "{}{}.{}", if is_negative { "-" } else { "" }, integer_part, decimal_part)?;
80-
}
54+
let divisor = 10_i32.pow(scale as u32);
55+
if integer % divisor != 0 {
56+
// fall back to floating point
57+
let result = integer as f64 / divisor as f64;
58+
write!(json_buffer, "{}", result)?;
59+
return Ok(());
8160
}
82-
}
61+
integer / divisor
62+
};
63+
write!(json_buffer, "{}", integer)?;
64+
Ok(())
65+
}
66+
67+
/// Write decimal using scovich's hybrid approach for i64
68+
fn write_decimal_i64(
69+
json_buffer: &mut impl Write,
70+
integer: i64,
71+
scale: u8,
72+
) -> Result<(), ArrowError> {
73+
let integer = if scale == 0 {
74+
integer
75+
} else {
76+
let divisor = 10_i64.pow(scale as u32);
77+
if integer % divisor != 0 {
78+
// fall back to floating point
79+
let result = integer as f64 / divisor as f64;
80+
write!(json_buffer, "{}", result)?;
81+
return Ok(());
82+
}
83+
integer / divisor
84+
};
85+
write!(json_buffer, "{}", integer)?;
8386
Ok(())
8487
}
8588

@@ -131,13 +134,32 @@ pub fn variant_to_json(json_buffer: &mut impl Write, variant: &Variant) -> Resul
131134
Variant::Float(f) => write!(json_buffer, "{}", f)?,
132135
Variant::Double(f) => write!(json_buffer, "{}", f)?,
133136
Variant::Decimal4(VariantDecimal4 { integer, scale }) => {
134-
write_decimal(json_buffer, *integer, *scale)?;
137+
write_decimal_i32(json_buffer, *integer, *scale)?;
135138
}
136139
Variant::Decimal8(VariantDecimal8 { integer, scale }) => {
137-
write_decimal(json_buffer, *integer, *scale)?;
140+
write_decimal_i64(json_buffer, *integer, *scale)?;
138141
}
139142
Variant::Decimal16(VariantDecimal16 { integer, scale }) => {
140-
write_decimal(json_buffer, *integer, *scale)?;
143+
let integer = if *scale == 0 {
144+
*integer
145+
} else {
146+
let divisor = 10_i128.pow(*scale as u32);
147+
if integer % divisor != 0 {
148+
// fall back to floating point
149+
let result = *integer as f64 / divisor as f64;
150+
write!(json_buffer, "{}", result)?;
151+
return Ok(());
152+
}
153+
integer / divisor
154+
};
155+
// Prefer to emit as i64, but fall back to u64 or even f64 (lossy) if necessary
156+
if let Ok(i64_val) = i64::try_from(integer) {
157+
write!(json_buffer, "{}", i64_val)?;
158+
} else if let Ok(u64_val) = u64::try_from(integer) {
159+
write!(json_buffer, "{}", u64_val)?;
160+
} else {
161+
write!(json_buffer, "{}", integer as f64)?;
162+
}
141163
}
142164
Variant::Date(date) => write!(json_buffer, "\"{}\"", format_date_string(date))?,
143165
Variant::TimestampMicros(ts) => write!(json_buffer, "\"{}\"", ts.to_rfc3339())?,
@@ -343,88 +365,48 @@ pub fn variant_to_json_value(variant: &Variant) -> Result<Value, ArrowError> {
343365
.map(Value::Number)
344366
.ok_or_else(|| ArrowError::InvalidArgumentError("Invalid double value".to_string())),
345367
Variant::Decimal4(VariantDecimal4 { integer, scale }) => {
346-
// Use integer arithmetic to avoid f64 precision loss
347-
if *scale == 0 {
348-
Ok(Value::Number((*integer).into()))
368+
let integer = if *scale == 0 {
369+
*integer
349370
} else {
350371
let divisor = 10_i32.pow(*scale as u32);
351-
let quotient = integer / divisor;
352-
let remainder = (integer % divisor).abs();
353-
354-
let decimal_str = if remainder == 0 {
355-
quotient.to_string()
356-
} else {
357-
// The {:0width$} format ensures it correctly renders with leading zeros (e.g., .0000100)
358-
let formatted_remainder = format!("{:0width$}", remainder, width = *scale as usize);
359-
// Then strip away any trailing zeros (e.g., .00001)
360-
let trimmed_remainder = formatted_remainder.trim_end_matches('0');
361-
format!("{}.{}", quotient, trimmed_remainder)
362-
};
363-
364-
// Parse as serde_json::Number to preserve precision
365-
decimal_str
366-
.parse::<serde_json::Number>()
367-
.map(Value::Number)
368-
.map_err(|e| {
369-
ArrowError::InvalidArgumentError(format!("Invalid decimal string: {}", e))
370-
})
371-
}
372+
if integer % divisor != 0 {
373+
// fall back to floating point
374+
return Ok(Value::from(*integer as f64 / divisor as f64));
375+
}
376+
integer / divisor
377+
};
378+
Ok(Value::from(integer))
372379
}
373380
Variant::Decimal8(VariantDecimal8 { integer, scale }) => {
374-
// Use integer arithmetic to avoid f64 precision loss
375-
if *scale == 0 {
376-
Ok(Value::Number((*integer).into()))
381+
let integer = if *scale == 0 {
382+
*integer
377383
} else {
378384
let divisor = 10_i64.pow(*scale as u32);
379-
let quotient = integer / divisor;
380-
let remainder = (integer % divisor).abs();
381-
382-
let decimal_str = if remainder == 0 {
383-
quotient.to_string()
384-
} else {
385-
// The {:0width$} format ensures it correctly renders with leading zeros (e.g., .0000100)
386-
let formatted_remainder = format!("{:0width$}", remainder, width = *scale as usize);
387-
// Then strip away any trailing zeros (e.g., .00001)
388-
let trimmed_remainder = formatted_remainder.trim_end_matches('0');
389-
format!("{}.{}", quotient, trimmed_remainder)
390-
};
391-
392-
// Parse as serde_json::Number to preserve precision
393-
decimal_str
394-
.parse::<serde_json::Number>()
395-
.map(Value::Number)
396-
.map_err(|e| {
397-
ArrowError::InvalidArgumentError(format!("Invalid decimal string: {}", e))
398-
})
399-
}
385+
if integer % divisor != 0 {
386+
// fall back to floating point
387+
return Ok(Value::from(*integer as f64 / divisor as f64));
388+
}
389+
integer / divisor
390+
};
391+
Ok(Value::from(integer))
400392
}
401393
Variant::Decimal16(VariantDecimal16 { integer, scale }) => {
402-
// Use integer arithmetic to avoid f64 precision loss
403-
if *scale == 0 {
404-
Ok(Value::Number((*integer as i64).into())) // Convert to i64 for JSON compatibility
394+
let integer = if *scale == 0 {
395+
*integer
405396
} else {
406397
let divisor = 10_i128.pow(*scale as u32);
407-
let quotient = integer / divisor;
408-
let remainder = (integer % divisor).abs();
409-
410-
let decimal_str = if remainder == 0 {
411-
quotient.to_string()
412-
} else {
413-
// The {:0width$} format ensures it correctly renders with leading zeros (e.g., .0000100)
414-
let formatted_remainder = format!("{:0width$}", remainder, width = *scale as usize);
415-
// Then strip away any trailing zeros (e.g., .00001)
416-
let trimmed_remainder = formatted_remainder.trim_end_matches('0');
417-
format!("{}.{}", quotient, trimmed_remainder)
418-
};
419-
420-
// Parse as serde_json::Number to preserve precision
421-
decimal_str
422-
.parse::<serde_json::Number>()
423-
.map(Value::Number)
424-
.map_err(|e| {
425-
ArrowError::InvalidArgumentError(format!("Invalid decimal string: {}", e))
426-
})
427-
}
398+
if integer % divisor != 0 {
399+
// fall back to floating point
400+
return Ok(Value::from(*integer as f64 / divisor as f64));
401+
}
402+
integer / divisor
403+
};
404+
// Prefer to emit as i64, but fall back to u64 or even f64 (lossy) if necessary
405+
let value = i64::try_from(integer)
406+
.map(Value::from)
407+
.or_else(|_| u64::try_from(integer).map(Value::from))
408+
.unwrap_or_else(|_| Value::from(integer as f64));
409+
Ok(value)
428410
}
429411
Variant::Date(date) => Ok(Value::String(format_date_string(date))),
430412
Variant::TimestampMicros(ts) => Ok(Value::String(ts.to_rfc3339())),
@@ -1222,8 +1204,8 @@ mod tests {
12221204
}
12231205

12241206
#[test]
1225-
fn test_high_precision_decimal_no_loss() -> Result<(), ArrowError> {
1226-
// Test case that would lose precision with f64 conversion
1207+
fn test_decimal_precision_behavior() -> Result<(), ArrowError> {
1208+
// Test case that demonstrates f64 precision limits
12271209
// This is a 63-bit precision decimal8 value that f64 cannot represent exactly
12281210
let high_precision_decimal8 = Variant::from(VariantDecimal8::try_new(
12291211
9007199254740993, // 2^53 + 1, exceeds f64 precision
@@ -1233,22 +1215,29 @@ mod tests {
12331215
let json_string = variant_to_json_string(&high_precision_decimal8)?;
12341216
let json_value = variant_to_json_value(&high_precision_decimal8)?;
12351217

1236-
// Expected result: 9007199254.740993 (exact representation)
1237-
assert_eq!(json_string, "9007199254.740993");
1238-
1239-
// Verify that both functions produce consistent results
1218+
// Due to f64 precision limits, we expect precision loss for values > 2^53
1219+
// Both functions should produce consistent results (even if not exact)
12401220
let parsed: Value = serde_json::from_str(&json_string)
12411221
.map_err(|e| ArrowError::ParseError(format!("JSON parse error: {}", e)))?;
12421222
assert_eq!(parsed, json_value);
12431223

1244-
// Test another case with trailing zeros that should be trimmed
1245-
let decimal_with_zeros = Variant::from(VariantDecimal8::try_new(
1224+
// Test a case that can be exactly represented (integer result)
1225+
let exact_decimal = Variant::from(VariantDecimal8::try_new(
12461226
1234567890000, // Should result in 1234567.89 (trailing zeros trimmed)
12471227
6,
12481228
)?);
12491229

1250-
let json_string_zeros = variant_to_json_string(&decimal_with_zeros)?;
1251-
assert_eq!(json_string_zeros, "1234567.89");
1230+
let json_string_exact = variant_to_json_string(&exact_decimal)?;
1231+
assert_eq!(json_string_exact, "1234567.89");
1232+
1233+
// Test integer case (should be exact)
1234+
let integer_decimal = Variant::from(VariantDecimal8::try_new(
1235+
42000000, // Should result in 42 (integer)
1236+
6,
1237+
)?);
1238+
1239+
let json_string_integer = variant_to_json_string(&integer_decimal)?;
1240+
assert_eq!(json_string_integer, "42");
12521241

12531242
Ok(())
12541243
}

0 commit comments

Comments
 (0)