Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arrow-schema/src/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ impl fmt::Display for DataType {
.map(|f| format!("{} {}", f.name(), f.data_type()))
.collect::<Vec<_>>()
.join(", ");
write!(f, "{}", fields_str)?;
write!(f, "{fields_str}")?;
}
write!(f, ")")?;
Ok(())
Expand Down
14 changes: 6 additions & 8 deletions arrow-schema/src/datatype_parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,9 @@ impl<'a> Parser<'a> {
Token::LargeList => self.parse_large_list(),
Token::FixedSizeList => self.parse_fixed_size_list(),
Token::Struct => self.parse_struct(),
Token::FieldName(word) => Err(make_error(
self.val,
&format!("unrecognized word: {}", word),
)),
Token::FieldName(word) => {
Err(make_error(self.val, &format!("unrecognized word: {word}")))
}
tok => Err(make_error(
self.val,
&format!("finding next type, got unexpected '{tok}'"),
Expand Down Expand Up @@ -155,10 +154,9 @@ impl<'a> Parser<'a> {
fn parse_double_quoted_string(&mut self, context: &str) -> ArrowResult<String> {
match self.next_token()? {
Token::DoubleQuotedString(s) => Ok(s),
Token::FieldName(word) => Err(make_error(
self.val,
&format!("unrecognized word: {}", word),
)),
Token::FieldName(word) => {
Err(make_error(self.val, &format!("unrecognized word: {word}")))
}
tok => Err(make_error(
self.val,
&format!("finding double quoted string for {context}, got '{tok}'"),
Expand Down
2 changes: 1 addition & 1 deletion arrow-schema/src/fields.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ impl UnionFields {
.inspect(|&idx| {
let mask = 1_u128 << idx;
if (set & mask) != 0 {
panic!("duplicate type id: {}", idx);
panic!("duplicate type id: {idx}");
} else {
set |= mask;
}
Expand Down
2 changes: 1 addition & 1 deletion parquet-variant/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,7 @@ mod tests {
let val2 = list.get(2).unwrap();
assert_eq!(val2, Variant::ShortString(ShortString("test")));
}
_ => panic!("Expected an array variant, got: {:?}", variant),
_ => panic!("Expected an array variant, got: {variant:?}"),
}
}

Expand Down
3 changes: 1 addition & 2 deletions parquet-variant/src/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,7 @@ impl TryFrom<u8> for VariantPrimitiveType {
15 => Ok(VariantPrimitiveType::Binary),
16 => Ok(VariantPrimitiveType::String),
_ => Err(ArrowError::InvalidArgumentError(format!(
"unknown primitive type: {}",
value
"unknown primitive type: {value}",
))),
}
}
Expand Down
131 changes: 28 additions & 103 deletions parquet-variant/src/to_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,51 +41,7 @@ fn format_binary_base64(bytes: &[u8]) -> String {
general_purpose::STANDARD.encode(bytes)
}

/// Write decimal using scovich's hybrid approach for i32
fn write_decimal_i32(
json_buffer: &mut impl Write,
integer: i32,
scale: u8,
) -> Result<(), ArrowError> {
let integer = if scale == 0 {
integer
} else {
let divisor = 10_i32.pow(scale as u32);
if integer % divisor != 0 {
// fall back to floating point
let result = integer as f64 / divisor as f64;
write!(json_buffer, "{}", result)?;
return Ok(());
}
integer / divisor
};
write!(json_buffer, "{}", integer)?;
Ok(())
}

/// Write decimal using scovich's hybrid approach for i64
fn write_decimal_i64(
json_buffer: &mut impl Write,
integer: i64,
scale: u8,
) -> Result<(), ArrowError> {
let integer = if scale == 0 {
integer
} else {
let divisor = 10_i64.pow(scale as u32);
if integer % divisor != 0 {
// fall back to floating point
let result = integer as f64 / divisor as f64;
write!(json_buffer, "{}", result)?;
return Ok(());
Comment on lines -77 to -80
Copy link
Contributor Author

@scovich scovich Jun 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My original suggestion to use floating point was for the serde_json::Value::Number code path, which can only support i64, u64, and f64 values. Normal to-string can use arbitrary precision -- let the reader be the one to lose information (if it must).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it! this makes more sense, thank you

}
integer / divisor
};
write!(json_buffer, "{}", integer)?;
Ok(())
}

/// Converts a Variant to JSON and writes it to the provided [`Write`]
/// Converts a Variant to JSON and writes it to the provided `Write`
///
/// This function writes JSON directly to any type that implements [`Write`],
/// making it efficient for streaming or when you want to control the output destination.
Expand Down Expand Up @@ -140,40 +96,15 @@ pub fn variant_to_json(json_buffer: &mut impl Write, variant: &Variant) -> Resul
Variant::Null => write!(json_buffer, "null")?,
Variant::BooleanTrue => write!(json_buffer, "true")?,
Variant::BooleanFalse => write!(json_buffer, "false")?,
Variant::Int8(i) => write!(json_buffer, "{}", i)?,
Variant::Int16(i) => write!(json_buffer, "{}", i)?,
Variant::Int32(i) => write!(json_buffer, "{}", i)?,
Variant::Int64(i) => write!(json_buffer, "{}", i)?,
Variant::Float(f) => write!(json_buffer, "{}", f)?,
Variant::Double(f) => write!(json_buffer, "{}", f)?,
Variant::Decimal4(VariantDecimal4 { integer, scale }) => {
write_decimal_i32(json_buffer, *integer, *scale)?;
}
Variant::Decimal8(VariantDecimal8 { integer, scale }) => {
write_decimal_i64(json_buffer, *integer, *scale)?;
}
Variant::Decimal16(VariantDecimal16 { integer, scale }) => {
let integer = if *scale == 0 {
*integer
} else {
let divisor = 10_i128.pow(*scale as u32);
if integer % divisor != 0 {
// fall back to floating point
let result = *integer as f64 / divisor as f64;
write!(json_buffer, "{}", result)?;
return Ok(());
}
integer / divisor
};
// Prefer to emit as i64, but fall back to u64 or even f64 (lossy) if necessary
if let Ok(i64_val) = i64::try_from(integer) {
write!(json_buffer, "{}", i64_val)?;
} else if let Ok(u64_val) = u64::try_from(integer) {
write!(json_buffer, "{}", u64_val)?;
} else {
write!(json_buffer, "{}", integer as f64)?;
}
}
Variant::Int8(i) => write!(json_buffer, "{i}")?,
Variant::Int16(i) => write!(json_buffer, "{i}")?,
Variant::Int32(i) => write!(json_buffer, "{i}")?,
Variant::Int64(i) => write!(json_buffer, "{i}")?,
Variant::Float(f) => write!(json_buffer, "{f}")?,
Variant::Double(f) => write!(json_buffer, "{f}")?,
Variant::Decimal4(decimal) => write!(json_buffer, "{decimal}")?,
Variant::Decimal8(decimal) => write!(json_buffer, "{decimal}")?,
Variant::Decimal16(decimal) => write!(json_buffer, "{decimal}")?,
Variant::Date(date) => write!(json_buffer, "\"{}\"", format_date_string(date))?,
Variant::TimestampMicros(ts) => write!(json_buffer, "\"{}\"", ts.to_rfc3339())?,
Variant::TimestampNtzMicros(ts) => {
Expand All @@ -183,23 +114,23 @@ pub fn variant_to_json(json_buffer: &mut impl Write, variant: &Variant) -> Resul
// Encode binary as base64 string
let base64_str = format_binary_base64(bytes);
let json_str = serde_json::to_string(&base64_str).map_err(|e| {
ArrowError::InvalidArgumentError(format!("JSON encoding error: {}", e))
ArrowError::InvalidArgumentError(format!("JSON encoding error: {e}"))
})?;
write!(json_buffer, "{}", json_str)?
write!(json_buffer, "{json_str}")?
}
Variant::String(s) => {
// Use serde_json to properly escape the string
let json_str = serde_json::to_string(s).map_err(|e| {
ArrowError::InvalidArgumentError(format!("JSON encoding error: {}", e))
ArrowError::InvalidArgumentError(format!("JSON encoding error: {e}"))
})?;
write!(json_buffer, "{}", json_str)?
write!(json_buffer, "{json_str}")?
}
Variant::ShortString(s) => {
// Use serde_json to properly escape the string
let json_str = serde_json::to_string(s.as_str()).map_err(|e| {
ArrowError::InvalidArgumentError(format!("JSON encoding error: {}", e))
ArrowError::InvalidArgumentError(format!("JSON encoding error: {e}"))
})?;
write!(json_buffer, "{}", json_str)?
write!(json_buffer, "{json_str}")?
}
Variant::Object(obj) => {
convert_object_to_json(json_buffer, obj)?;
Expand All @@ -226,9 +157,9 @@ fn convert_object_to_json(buffer: &mut impl Write, obj: &VariantObject) -> Resul

// Write the key (properly escaped)
let json_key = serde_json::to_string(key).map_err(|e| {
ArrowError::InvalidArgumentError(format!("JSON key encoding error: {}", e))
ArrowError::InvalidArgumentError(format!("JSON key encoding error: {e}"))
})?;
write!(buffer, "{}:", json_key)?;
write!(buffer, "{json_key}:")?;

// Recursively convert the value
variant_to_json(buffer, &value)?;
Expand Down Expand Up @@ -313,7 +244,7 @@ pub fn variant_to_json_string(variant: &Variant) -> Result<String, ArrowError> {
let mut buffer = Vec::new();
variant_to_json(&mut buffer, variant)?;
String::from_utf8(buffer)
.map_err(|e| ArrowError::InvalidArgumentError(format!("UTF-8 conversion error: {}", e)))
.map_err(|e| ArrowError::InvalidArgumentError(format!("UTF-8 conversion error: {e}")))
}

/// Convert [`Variant`] to [`serde_json::Value`]
Expand Down Expand Up @@ -394,7 +325,8 @@ pub fn variant_to_json_value(variant: &Variant) -> Result<Value, ArrowError> {
}
integer / divisor
};
// Prefer to emit as i64, but fall back to u64 or even f64 (lossy) if necessary
// i128 has higher precision than any 64-bit type. Try a lossless narrowing cast to
// i64 or u64 first, falling back to a lossy narrowing cast to f64 if necessary.
let value = i64::try_from(integer)
.map(Value::from)
.or_else(|_| u64::try_from(integer).map(Value::from))
Expand Down Expand Up @@ -928,8 +860,7 @@ mod tests {
let json = variant_to_json_string(&variant)?;

// Parse the JSON to verify structure - handle JSON parsing errors manually
let parsed: Value = serde_json::from_str(&json)
.map_err(|e| ArrowError::ParseError(format!("JSON parse error: {}", e)))?;
let parsed: Value = serde_json::from_str(&json).unwrap();
let obj = parsed.as_object().expect("expected JSON object");
assert_eq!(obj.get("name"), Some(&Value::String("Alice".to_string())));
assert_eq!(obj.get("age"), Some(&Value::Number(30.into())));
Expand Down Expand Up @@ -990,8 +921,7 @@ mod tests {
assert!(json.contains("😀 Smiley"));

// Verify that the JSON can be parsed back
let parsed: Value = serde_json::from_str(&json)
.map_err(|e| ArrowError::ParseError(format!("JSON parse error: {}", e)))?;
let parsed: Value = serde_json::from_str(&json).unwrap();
assert!(matches!(parsed, Value::Object(_)));

Ok(())
Expand Down Expand Up @@ -1069,8 +999,7 @@ mod tests {
let variant = Variant::try_new(&metadata, &value)?;
let json = variant_to_json_string(&variant)?;

let parsed: Value = serde_json::from_str(&json)
.map_err(|e| ArrowError::ParseError(format!("JSON parse error: {}", e)))?;
let parsed: Value = serde_json::from_str(&json).unwrap();
let arr = parsed.as_array().expect("expected JSON array");
assert_eq!(arr.len(), 5);
assert_eq!(arr[0], Value::String("hello".to_string()));
Expand Down Expand Up @@ -1102,8 +1031,7 @@ mod tests {
let json = variant_to_json_string(&variant)?;

// Parse and verify all fields are present
let parsed: Value = serde_json::from_str(&json)
.map_err(|e| ArrowError::ParseError(format!("JSON parse error: {}", e)))?;
let parsed: Value = serde_json::from_str(&json).unwrap();
let obj = parsed.as_object().expect("expected JSON object");
assert_eq!(obj.len(), 3);
assert_eq!(obj.get("alpha"), Some(&Value::String("first".to_string())));
Expand Down Expand Up @@ -1135,8 +1063,7 @@ mod tests {
let variant = Variant::try_new(&metadata, &value)?;
let json = variant_to_json_string(&variant)?;

let parsed: Value = serde_json::from_str(&json)
.map_err(|e| ArrowError::ParseError(format!("JSON parse error: {}", e)))?;
let parsed: Value = serde_json::from_str(&json).unwrap();
let arr = parsed.as_array().expect("expected JSON array");
assert_eq!(arr.len(), 7);
assert_eq!(arr[0], Value::String("string_value".to_string()));
Expand Down Expand Up @@ -1171,8 +1098,7 @@ mod tests {
let variant = Variant::try_new(&metadata, &value)?;
let json = variant_to_json_string(&variant)?;

let parsed: Value = serde_json::from_str(&json)
.map_err(|e| ArrowError::ParseError(format!("JSON parse error: {}", e)))?;
let parsed: Value = serde_json::from_str(&json).unwrap();
let obj = parsed.as_object().expect("expected JSON object");
assert_eq!(obj.len(), 6);
assert_eq!(
Expand Down Expand Up @@ -1202,8 +1128,7 @@ mod tests {

// Due to f64 precision limits, we expect precision loss for values > 2^53
// Both functions should produce consistent results (even if not exact)
let parsed: Value = serde_json::from_str(&json_string)
.map_err(|e| ArrowError::ParseError(format!("JSON parse error: {}", e)))?;
let parsed: Value = serde_json::from_str(&json_string).unwrap();
assert_eq!(parsed, json_value);

// Test a case that can be exactly represented (integer result)
Expand Down
Loading
Loading