Skip to content

Commit cbf33d1

Browse files
shifluxxcJefffreymartin-g
authored
Fix regression for negative-scale decimal128 in log (#19315)
## Which issue does this PR close? - Part of #19250 ## Rationale for this change Previously, the `log` function would fail when operating on decimal values with negative scales. Negative scales in decimals represent values where the scale indicates padding zeros to the right (e.g., `Decimal128(38, -2)` with value `100` represents `10000`). This PR restores support for negative-scale decimals in the `log` function by implementing the logarithmic property: `log_base(value * 10^(-scale)) = log_base(value) + (-scale) * log_base(10)`. ## What changes are included in this PR? 1. **Enhanced `log_decimal128` function**: - Added support for negative scales using the logarithmic property - For negative scales, computes `log_base(value) + (-scale) * log_base(10)` instead of trying to convert to unscaled value - Added detection for negative-scale decimals in both the number and base arguments - Skips simplification when negative scales are detected to avoid errors with `ScalarValue` (which doesn't support negative scales yet) 2. **Added comprehensive tests**: - Unit tests in `log.rs` for negative-scale decimals with various bases (2, 3, 10) - SQL logic tests in `decimal.slt` using scientific notation (e.g., `1e4`, `8e1`) to create decimals with negative scales ## Are these changes tested? Yes, this PR includes comprehensive tests: 1. Unit tests: - `test_log_decimal128_negative_scale`: Tests array inputs with negative scales - `test_log_decimal128_negative_scale_base2`: Tests with base 2 and negative scales - `test_log_decimal128_negative_scale_scalar`: Tests scalar inputs with negative scales 2. SQL logic tests: - Tests for unary log with negative scales (`log(1e4)`) - Tests for binary log with explicit base 10 (`log(10, 1e4)`) - Tests for binary log with base 2 (`log(2.0, 8e1)`, `log(2.0, 16e1)`) - Tests for different negative scale values (`log(5e3)`) - Tests for array operations with negative scales - Tests for different bases (2, 3, 10) with negative-scale decimals All tests pass successfully. ## Are there any user-facing changes? Yes, this is a user-facing change: **Before**: The `log` function would fail with an error when operating on decimal values with negative scales: ```sql -- This would fail SELECT log(1e4); -- Error: Negative scale is not supported ``` **After**: The `log` function now correctly handles decimal values with negative scales: ```sql -- This now works SELECT log(1e4); -- Returns 4.0 (log10(10000)) SELECT log(2.0, 8e1); -- Returns ~6.32 (log2(80)) ``` --------- Co-authored-by: Jeffrey Vo <[email protected]> Co-authored-by: Martin Grigorov <[email protected]>
1 parent c2747eb commit cbf33d1

File tree

2 files changed

+74
-5
lines changed

2 files changed

+74
-5
lines changed

datafusion/functions/src/math/log.rs

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -166,13 +166,18 @@ fn log_decimal128(value: i128, scale: i8, base: f64) -> Result<f64, ArrowError>
166166
)));
167167
}
168168

169-
let unscaled_value = decimal128_to_i128(value, scale)?;
170-
if unscaled_value > 0 {
169+
if value <= 0 {
170+
// Reflect f64::log behaviour
171+
return Ok(f64::NAN);
172+
}
173+
174+
if scale < 0 {
175+
let actual_value = (value as f64) * 10.0_f64.powi(-(scale as i32));
176+
Ok(actual_value.log(base))
177+
} else {
178+
let unscaled_value = decimal128_to_i128(value, scale)?;
171179
let log_value: u32 = unscaled_value.ilog(base as i128);
172180
Ok(log_value as f64)
173-
} else {
174-
// Reflect f64::log behaviour
175-
Ok(f64::NAN)
176181
}
177182
}
178183

@@ -342,6 +347,19 @@ impl ScalarUDFImpl for LogFunc {
342347
if num_args != 1 && num_args != 2 {
343348
return plan_err!("Expected log to have 1 or 2 arguments, got {num_args}");
344349
}
350+
351+
match arg_types.last().unwrap() {
352+
DataType::Decimal32(_, scale)
353+
| DataType::Decimal64(_, scale)
354+
| DataType::Decimal128(_, scale)
355+
| DataType::Decimal256(_, scale)
356+
if *scale < 0 =>
357+
{
358+
return Ok(ExprSimplifyResult::Original(args));
359+
}
360+
_ => (),
361+
};
362+
345363
let number = args.pop().unwrap();
346364
let number_datatype = arg_types.pop().unwrap();
347365
// default to base 10

datafusion/sqllogictest/test_files/decimal.slt

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,57 @@ select log(2.0, null);
918918
----
919919
NULL
920920

921+
# log with negative scale decimals
922+
# Using scientific notation to create decimals with negative scales
923+
# 1e4 = 10000 with scale -4, log10(10000) = 4.0
924+
query R
925+
select log(1e4);
926+
----
927+
4
928+
929+
# log with negative scale and explicit base 10
930+
query R
931+
select log(10, 1e4);
932+
----
933+
4
934+
935+
# log with negative scale and base 2
936+
# 8e1 = 80 with scale -1, log2(80) ≈ 6.321928
937+
query R
938+
select log(2.0, 8e1);
939+
----
940+
6.321928094887
941+
942+
# log with negative scale and base 2 (another value)
943+
# 16e1 = 160 with scale -1, log2(160) ≈ 7.321928
944+
query R
945+
select log(2.0, 16e1);
946+
----
947+
7.321928094887
948+
949+
# log with negative scale -3
950+
# 5e3 = 5000 with scale -3, log10(5000) ≈ 3.69897
951+
query R
952+
select log(5e3);
953+
----
954+
3.698970004336
955+
956+
# log with negative scale array values
957+
query R rowsort
958+
select log(value) from (values (1e3), (1e4), (1e5)) as t(value);
959+
----
960+
3
961+
4
962+
5
963+
964+
# log with negative scale and different bases
965+
query R rowsort
966+
select log(base, 1e4) from (values (10.0), (2.0), (3.0)) as t(base);
967+
----
968+
13.287712379549
969+
4
970+
8.383613097158
971+
921972
# power with decimals
922973

923974
query RT

0 commit comments

Comments
 (0)