Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 5 additions & 73 deletions parquet-variant/src/to_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,50 +42,6 @@ fn format_binary_base64(bytes: &[u8]) -> String {
general_purpose::STANDARD.encode(bytes)
}

/// Write decimal using scovich's hybrid approach for i32
fn write_decimal_i32(
json_buffer: &mut impl Write,
integer: i32,
scale: u8,
) -> Result<(), ArrowError> {
let integer = if scale == 0 {
integer
} else {
let divisor = 10_i32.pow(scale as u32);
if integer % divisor != 0 {
// fall back to floating point
let result = integer as f64 / divisor as f64;
write!(json_buffer, "{}", result)?;
return Ok(());
}
integer / divisor
};
write!(json_buffer, "{}", integer)?;
Ok(())
}

/// Write decimal using scovich's hybrid approach for i64
fn write_decimal_i64(
json_buffer: &mut impl Write,
integer: i64,
scale: u8,
) -> Result<(), ArrowError> {
let integer = if scale == 0 {
integer
} else {
let divisor = 10_i64.pow(scale as u32);
if integer % divisor != 0 {
// fall back to floating point
let result = integer as f64 / divisor as f64;
write!(json_buffer, "{}", result)?;
return Ok(());
Comment on lines -77 to -80
Copy link
Contributor Author

@scovich scovich Jun 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My original suggestion to use floating point was for the serde_json::Value::Number code path, which can only support i64, u64, and f64 values. Normal to-string can use arbitrary precision -- let the reader be the one to lose information (if it must).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it! this makes more sense, thank you

}
integer / divisor
};
write!(json_buffer, "{}", integer)?;
Ok(())
}

/// Converts a Variant to JSON and writes it to the provided `Write`
///
/// This function writes JSON directly to any type that implements [`Write`],
Expand Down Expand Up @@ -133,34 +89,9 @@ pub fn variant_to_json(json_buffer: &mut impl Write, variant: &Variant) -> Resul
Variant::Int64(i) => write!(json_buffer, "{}", i)?,
Variant::Float(f) => write!(json_buffer, "{}", f)?,
Variant::Double(f) => write!(json_buffer, "{}", f)?,
Variant::Decimal4(VariantDecimal4 { integer, scale }) => {
write_decimal_i32(json_buffer, *integer, *scale)?;
}
Variant::Decimal8(VariantDecimal8 { integer, scale }) => {
write_decimal_i64(json_buffer, *integer, *scale)?;
}
Variant::Decimal16(VariantDecimal16 { integer, scale }) => {
let integer = if *scale == 0 {
*integer
} else {
let divisor = 10_i128.pow(*scale as u32);
if integer % divisor != 0 {
// fall back to floating point
let result = *integer as f64 / divisor as f64;
write!(json_buffer, "{}", result)?;
return Ok(());
}
integer / divisor
};
// Prefer to emit as i64, but fall back to u64 or even f64 (lossy) if necessary
if let Ok(i64_val) = i64::try_from(integer) {
write!(json_buffer, "{}", i64_val)?;
} else if let Ok(u64_val) = u64::try_from(integer) {
write!(json_buffer, "{}", u64_val)?;
} else {
write!(json_buffer, "{}", integer as f64)?;
}
}
Variant::Decimal4(decimal) => write!(json_buffer, "{}", decimal)?,
Variant::Decimal8(decimal) => write!(json_buffer, "{}", decimal)?,
Variant::Decimal16(decimal) => write!(json_buffer, "{}", decimal)?,
Variant::Date(date) => write!(json_buffer, "\"{}\"", format_date_string(date))?,
Variant::TimestampMicros(ts) => write!(json_buffer, "\"{}\"", ts.to_rfc3339())?,
Variant::TimestampNtzMicros(ts) => {
Expand Down Expand Up @@ -401,7 +332,8 @@ pub fn variant_to_json_value(variant: &Variant) -> Result<Value, ArrowError> {
}
integer / divisor
};
// Prefer to emit as i64, but fall back to u64 or even f64 (lossy) if necessary
// i128 has higher precision than any 64-bit type. Try a lossless narrowing cast to
// i64 or u64 first, falling back to a lossy narrowing cast to f64 if necessary.
let value = i64::try_from(integer)
.map(Value::from)
.or_else(|_| u64::try_from(integer).map(Value::from))
Expand Down
249 changes: 249 additions & 0 deletions parquet-variant/src/variant/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,30 @@
// specific language governing permissions and limitations
// under the License.
use arrow_schema::ArrowError;
use std::fmt;

// Macro to format decimal values, using only integer arithmetic to avoid floating point precision loss
macro_rules! format_decimal {
($f:expr, $integer:expr, $scale:expr, $int_type:ty) => {{
let integer = if $scale == 0 {
$integer
} else {
let divisor = (10 as $int_type).pow($scale as u32);
Comment on lines +21 to +26
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was sorely tempted to just define a helper function that takes i128, since it would also produce correct output for all narrower integer types. But 128-bit division is vastly more expensive than 32- or 64-bit division, and the narrower types usually produce quotient and remainder from a single machine instruction.

If we think that performance difference doesn't matter, then we should use the helper function instead for simplicity.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

until we have benchmarks I don't think we can tell. This seems like a good improvement to me simply from a reusability perspective (I would liked to have a display impl for Variant in other times too)

let remainder = $integer % divisor;
if remainder != 0 {
// Track the sign explicitly, in case the quotient is zero
let sign = if $integer < 0 { "-" } else { "" };
// Format an unsigned remainder with leading zeros and strip (unnecessary) trailing zeros.
let remainder = format!("{:0width$}", remainder.abs(), width = $scale as usize);
let remainder = remainder.trim_end_matches('0');
let quotient = $integer / divisor;
return write!($f, "{}{}.{}", sign, quotient.abs(), remainder);
}
$integer / divisor
};
write!($f, "{}", integer)
}};
}

/// Represents a 4-byte decimal value in the Variant format.
///
Expand Down Expand Up @@ -57,6 +81,12 @@ impl VariantDecimal4 {
}
}

impl fmt::Display for VariantDecimal4 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
format_decimal!(f, self.integer, self.scale, i32)
}
}

/// Represents an 8-byte decimal value in the Variant format.
///
/// This struct stores a decimal number using a 64-bit signed integer for the coefficient
Expand Down Expand Up @@ -99,6 +129,12 @@ impl VariantDecimal8 {
}
}

impl fmt::Display for VariantDecimal8 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
format_decimal!(f, self.integer, self.scale, i64)
}
}

/// Represents an 16-byte decimal value in the Variant format.
///
/// This struct stores a decimal number using a 128-bit signed integer for the coefficient
Expand Down Expand Up @@ -141,6 +177,12 @@ impl VariantDecimal16 {
}
}

impl fmt::Display for VariantDecimal16 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
format_decimal!(f, self.integer, self.scale, i128)
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -328,4 +370,211 @@ mod tests {
"Decimal16 with scale = 38 should succeed"
);
}

#[test]
fn test_variant_decimal4_display() {
// Test zero scale (integers)
let d = VariantDecimal4::try_new(42, 0).unwrap();
assert_eq!(d.to_string(), "42");

let d = VariantDecimal4::try_new(-42, 0).unwrap();
assert_eq!(d.to_string(), "-42");

// Test basic decimal formatting
let d = VariantDecimal4::try_new(12345, 2).unwrap();
assert_eq!(d.to_string(), "123.45");

let d = VariantDecimal4::try_new(-12345, 2).unwrap();
assert_eq!(d.to_string(), "-123.45");

// Test trailing zeros are trimmed
let d = VariantDecimal4::try_new(12300, 2).unwrap();
assert_eq!(d.to_string(), "123");

let d = VariantDecimal4::try_new(-12300, 2).unwrap();
assert_eq!(d.to_string(), "-123");

// Test leading zeros in decimal part
let d = VariantDecimal4::try_new(1005, 3).unwrap();
assert_eq!(d.to_string(), "1.005");

let d = VariantDecimal4::try_new(-1005, 3).unwrap();
assert_eq!(d.to_string(), "-1.005");

// Test number smaller than scale (leading zero before decimal)
let d = VariantDecimal4::try_new(123, 4).unwrap();
assert_eq!(d.to_string(), "0.0123");

let d = VariantDecimal4::try_new(-123, 4).unwrap();
assert_eq!(d.to_string(), "-0.0123");

// Test zero
let d = VariantDecimal4::try_new(0, 0).unwrap();
assert_eq!(d.to_string(), "0");

let d = VariantDecimal4::try_new(0, 3).unwrap();
assert_eq!(d.to_string(), "0");

// Test max scale
let d = VariantDecimal4::try_new(123456789, 9).unwrap();
assert_eq!(d.to_string(), "0.123456789");

let d = VariantDecimal4::try_new(-123456789, 9).unwrap();
assert_eq!(d.to_string(), "-0.123456789");

// Test max precision
let d = VariantDecimal4::try_new(999999999, 0).unwrap();
assert_eq!(d.to_string(), "999999999");

let d = VariantDecimal4::try_new(-999999999, 0).unwrap();
assert_eq!(d.to_string(), "-999999999");

// Test trailing zeros with mixed decimal places
let d = VariantDecimal4::try_new(120050, 4).unwrap();
assert_eq!(d.to_string(), "12.005");

let d = VariantDecimal4::try_new(-120050, 4).unwrap();
assert_eq!(d.to_string(), "-12.005");
}

#[test]
fn test_variant_decimal8_display() {
// Test zero scale (integers)
let d = VariantDecimal8::try_new(42, 0).unwrap();
assert_eq!(d.to_string(), "42");

let d = VariantDecimal8::try_new(-42, 0).unwrap();
assert_eq!(d.to_string(), "-42");

// Test basic decimal formatting
let d = VariantDecimal8::try_new(1234567890, 3).unwrap();
assert_eq!(d.to_string(), "1234567.89");

let d = VariantDecimal8::try_new(-1234567890, 3).unwrap();
assert_eq!(d.to_string(), "-1234567.89");

// Test trailing zeros are trimmed
let d = VariantDecimal8::try_new(123000000, 6).unwrap();
assert_eq!(d.to_string(), "123");

let d = VariantDecimal8::try_new(-123000000, 6).unwrap();
assert_eq!(d.to_string(), "-123");

// Test leading zeros in decimal part
let d = VariantDecimal8::try_new(100005, 6).unwrap();
assert_eq!(d.to_string(), "0.100005");

let d = VariantDecimal8::try_new(-100005, 6).unwrap();
assert_eq!(d.to_string(), "-0.100005");

// Test number smaller than scale
let d = VariantDecimal8::try_new(123, 10).unwrap();
assert_eq!(d.to_string(), "0.0000000123");

let d = VariantDecimal8::try_new(-123, 10).unwrap();
assert_eq!(d.to_string(), "-0.0000000123");

// Test zero
let d = VariantDecimal8::try_new(0, 0).unwrap();
assert_eq!(d.to_string(), "0");

let d = VariantDecimal8::try_new(0, 10).unwrap();
assert_eq!(d.to_string(), "0");

// Test max scale
let d = VariantDecimal8::try_new(123456789012345678, 18).unwrap();
assert_eq!(d.to_string(), "0.123456789012345678");

let d = VariantDecimal8::try_new(-123456789012345678, 18).unwrap();
assert_eq!(d.to_string(), "-0.123456789012345678");

// Test max precision
let d = VariantDecimal8::try_new(999999999999999999, 0).unwrap();
assert_eq!(d.to_string(), "999999999999999999");

let d = VariantDecimal8::try_new(-999999999999999999, 0).unwrap();
assert_eq!(d.to_string(), "-999999999999999999");

// Test complex trailing zeros
let d = VariantDecimal8::try_new(1200000050000, 10).unwrap();
assert_eq!(d.to_string(), "120.000005");

let d = VariantDecimal8::try_new(-1200000050000, 10).unwrap();
assert_eq!(d.to_string(), "-120.000005");
}

#[test]
fn test_variant_decimal16_display() {
// Test zero scale (integers)
let d = VariantDecimal16::try_new(42, 0).unwrap();
assert_eq!(d.to_string(), "42");

let d = VariantDecimal16::try_new(-42, 0).unwrap();
assert_eq!(d.to_string(), "-42");

// Test basic decimal formatting
let d = VariantDecimal16::try_new(123456789012345, 4).unwrap();
assert_eq!(d.to_string(), "12345678901.2345");

let d = VariantDecimal16::try_new(-123456789012345, 4).unwrap();
assert_eq!(d.to_string(), "-12345678901.2345");

// Test trailing zeros are trimmed
let d = VariantDecimal16::try_new(12300000000, 8).unwrap();
assert_eq!(d.to_string(), "123");

let d = VariantDecimal16::try_new(-12300000000, 8).unwrap();
assert_eq!(d.to_string(), "-123");

// Test leading zeros in decimal part
let d = VariantDecimal16::try_new(10000005, 8).unwrap();
assert_eq!(d.to_string(), "0.10000005");

let d = VariantDecimal16::try_new(-10000005, 8).unwrap();
assert_eq!(d.to_string(), "-0.10000005");

// Test number smaller than scale
let d = VariantDecimal16::try_new(123, 20).unwrap();
assert_eq!(d.to_string(), "0.00000000000000000123");

let d = VariantDecimal16::try_new(-123, 20).unwrap();
assert_eq!(d.to_string(), "-0.00000000000000000123");

// Test zero
let d = VariantDecimal16::try_new(0, 0).unwrap();
assert_eq!(d.to_string(), "0");

let d = VariantDecimal16::try_new(0, 20).unwrap();
assert_eq!(d.to_string(), "0");

// Test max scale
let d = VariantDecimal16::try_new(12345678901234567890123456789012345678_i128, 38).unwrap();
assert_eq!(d.to_string(), "0.12345678901234567890123456789012345678");

let d =
VariantDecimal16::try_new(-12345678901234567890123456789012345678_i128, 38).unwrap();
assert_eq!(d.to_string(), "-0.12345678901234567890123456789012345678");

// Test max precision integer
let d = VariantDecimal16::try_new(99999999999999999999999999999999999999_i128, 0).unwrap();
assert_eq!(d.to_string(), "99999999999999999999999999999999999999");

let d = VariantDecimal16::try_new(-99999999999999999999999999999999999999_i128, 0).unwrap();
assert_eq!(d.to_string(), "-99999999999999999999999999999999999999");

// Test complex trailing zeros
let d = VariantDecimal16::try_new(12000000000000050000000000000_i128, 25).unwrap();
assert_eq!(d.to_string(), "1200.000000000005");

let d = VariantDecimal16::try_new(-12000000000000050000000000000_i128, 25).unwrap();
assert_eq!(d.to_string(), "-1200.000000000005");

// Test large integer that would overflow i64 but fits in i128
let large_int = 12345678901234567890123456789_i128;
let d = VariantDecimal16::try_new(large_int, 0).unwrap();
assert_eq!(d.to_string(), "12345678901234567890123456789");

let d = VariantDecimal16::try_new(-large_int, 0).unwrap();
assert_eq!(d.to_string(), "-12345678901234567890123456789");
}
}
Loading