fix: removed ilog and added tests

Yuvraj-cyborg · Yuvraj-cyborg · commit cb2966384c12 · 2026-01-04T02:33:46.000+05:30
diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs
@@ -102,48 +102,17 @@ impl LogFunc {
     }
 }
 
-/// Checks if the base is valid for the efficient integer logarithm algorithm.
-#[inline]
-fn is_valid_integer_base(base: f64) -> bool {
-    base.trunc() == base && base >= 2.0 && base <= u32::MAX as f64
-}
-
 /// Generic function to calculate logarithm of a decimal value using the given base.
 ///
-/// For integer bases >= 2 with non-negative scale, uses the efficient integer `ilog` algorithm.
-/// For all other cases (non-integer bases, negative bases, non-finite bases),
-/// falls back to f64 computation which naturally returns NaN for invalid inputs,
-/// matching the behavior of `f64::log`.
+/// Uses f64 computation which naturally returns NaN for invalid inputs
+/// (base <= 1, non-finite, value <= 0), matching the behavior of `f64::log`.
 fn log_decimal<T>(value: T, scale: i8, base: f64) -> Result<f64, ArrowError>
 where
     T: ToPrimitive + Copy,
 {
-    // For integer bases >= 2 and non-negative scale, try the efficient integer algorithm
-    if is_valid_integer_base(base)
-        && scale >= 0
-        && let Some(unscaled) = unscale_decimal_value(&value, scale)
-    {
-        return if unscaled > 0 {
-            Ok(unscaled.ilog(base as u128) as f64)
-        } else {
-            Ok(f64::NAN)
-        };
-    }
-
-    // Fallback to f64 computation for non-integer bases, negative scale, etc.
-    // This naturally returns NaN for invalid inputs (base <= 1, non-finite, value <= 0)
     decimal_to_f64(&value, scale).map(|v| v.log(base))
 }
 
-/// Unscale a decimal value by dividing by 10^scale, returning the result as u128.
-/// Returns None if the value is negative or the conversion fails.
-#[inline]
-fn unscale_decimal_value<T: ToPrimitive>(value: &T, scale: i8) -> Option<u128> {
-    let value_u128 = value.to_u128()?;
-    let divisor = 10u128.checked_pow(scale as u32)?;
-    Some(value_u128 / divisor)
-}
-
 /// Convert a scaled decimal value to f64.
 #[inline]
 fn decimal_to_f64<T: ToPrimitive>(value: &T, scale: i8) -> Result<f64, ArrowError> {
@@ -408,13 +377,10 @@ mod tests {
     #[test]
     fn test_log_decimal_native() {
         let value = 10_i128.pow(35);
-        assert_eq!((value as f64).log2(), 116.26748332105768);
-        assert_eq!(
-            log_decimal(value, 0, 2.0).unwrap(),
-            // TODO: see we're losing our decimal points compared to above
-            //       https://github.com/apache/datafusion/issues/18524
-            116.0
-        );
+        let expected = (value as f64).log2();
+        assert_eq!(expected, 116.26748332105768);
+        // Now using f64 computation, we get the precise value
+        assert!((log_decimal(value, 0, 2.0).unwrap() - expected).abs() < 1e-10);
     }
 
     #[test]
@@ -982,7 +948,8 @@ mod tests {
                 assert!((floats.value(1) - 2.0).abs() < 1e-10);
                 assert!((floats.value(2) - 3.0).abs() < 1e-10);
                 assert!((floats.value(3) - 4.0).abs() < 1e-10);
-                assert!((floats.value(4) - 4.0).abs() < 1e-10); // Integer rounding
+                // log10(12600) ≈ 4.1003 (not truncated to 4)
+                assert!((floats.value(4) - 12600f64.log10()).abs() < 1e-10);
                 assert!(floats.value(5).is_nan());
             }
             ColumnarValue::Scalar(_) => {
@@ -1117,8 +1084,10 @@ mod tests {
                 assert!((floats.value(1) - 2.0).abs() < 1e-10);
                 assert!((floats.value(2) - 3.0).abs() < 1e-10);
                 assert!((floats.value(3) - 4.0).abs() < 1e-10);
-                assert!((floats.value(4) - 4.0).abs() < 1e-10); // Integer rounding for float log
-                assert!((floats.value(5) - 38.0).abs() < 1e-10);
+                // log10(12600) ≈ 4.1003 (not truncated to 4)
+                assert!((floats.value(4) - 12600f64.log10()).abs() < 1e-10);
+                // log10(i128::MAX - 1000) ≈ 38.23 (not truncated to 38)
+                assert!((floats.value(5) - ((i128::MAX - 1000) as f64).log10()).abs() < 1e-10);
                 assert!(floats.value(6).is_nan());
             }
             ColumnarValue::Scalar(_) => {
@@ -1127,40 +1096,6 @@ mod tests {
         }
     }
 
-    #[test]
-    fn test_log_decimal128_invalid_base() {
-        // Invalid base (-2.0) should return NaN, matching f64::log behavior
-        let arg_fields = vec![
-            Field::new("b", DataType::Float64, false).into(),
-            Field::new("x", DataType::Decimal128(38, 0), false).into(),
-        ];
-        let args = ScalarFunctionArgs {
-            args: vec![
-                ColumnarValue::Scalar(ScalarValue::Float64(Some(-2.0))), // base
-                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(64), 38, 0)), // num
-            ],
-            arg_fields,
-            number_rows: 1,
-            return_field: Field::new("f", DataType::Float64, true).into(),
-            config_options: Arc::new(ConfigOptions::default()),
-        };
-        let result = LogFunc::new()
-            .invoke_with_args(args)
-            .expect("should not error on invalid base");
-
-        match result {
-            ColumnarValue::Array(arr) => {
-                let floats = as_float64_array(&arr)
-                    .expect("failed to convert result to a Float64Array");
-                assert_eq!(floats.len(), 1);
-                assert!(floats.value(0).is_nan());
-            }
-            ColumnarValue::Scalar(_) => {
-                panic!("Expected an array value")
-            }
-        }
-    }
-
     #[test]
     fn test_log_decimal256_large() {
         // Large Decimal256 values that don't fit in i128 now use f64 fallback
diff --git a/datafusion/sqllogictest/test_files/decimal.slt b/datafusion/sqllogictest/test_files/decimal.slt
@@ -804,7 +804,7 @@ select log(arrow_cast(100, 'Decimal32(9, 2)'));
 query R
 select log(2.0, arrow_cast(12345.67, 'Decimal32(9, 2)'));
 ----
-13
+13.591717513272
 
 # log for small decimal64
 query R
@@ -820,7 +820,7 @@ select log(arrow_cast(100, 'Decimal64(18, 2)'));
 query R
 select log(2.0, arrow_cast(12345.6789, 'Decimal64(15, 4)'));
 ----
-13
+13.591718553311
 
 
 # log for small decimal128
@@ -896,15 +896,13 @@ select log(10::decimal(38, 0), 100000000000000000000000000000000000::decimal(38,
 query R
 select log(2, 100000000000000000000000000000000000::decimal(38,0));
 ----
-116
+116.267483321058
 
 # log(10^35) for decimal128 with another base (float base)
-# TODO: this should be 116.267483321058, error with native decimal log impl
-#       https://github.com/apache/datafusion/issues/18524
 query R
 select log(2.0, 100000000000000000000000000000000000::decimal(38,0));
 ----
-116
+116.267483321058
 
 # log with non-integer base now works (fallback to f64)
 query R
@@ -1036,13 +1034,31 @@ from (values (10.0), (2.0), (3.0)) as t(base);
 query R
 SELECT log(10, arrow_cast(0.5, 'Decimal32(5, 1)'))
 ----
-NaN
+-0.301029995664
 
 query R
 SELECT log(10, arrow_cast(1 , 'Decimal32(5, 1)'))
 ----
 0
 
+# Test log with invalid base (-2.0) returns NaN, matching f64::log behavior
+query R
+SELECT log(-2.0, 64::decimal(38, 0))
+----
+NaN
+
+# Test log with base 0 returns 0 (log(x)/log(0) = log(x)/-inf = -0 ≈ 0)
+query R
+SELECT log(0.0, 64::decimal(38, 0))
+----
+0
+
+# Test log with base 1 returns Infinity (log base 1 is division by zero: log(x)/log(1) = log(x)/0)
+query R
+SELECT log(1.0, 64::decimal(38, 0))
+----
+Infinity
+
 # power with decimals
 
 query RT
@@ -1183,18 +1199,16 @@ select 100000000000000000000000000000000000::decimal(38,0)
 99999999999999996863366107917975552
 
 # log(10^35) for decimal128 with explicit decimal base
-# Float parsing is rounding down
 query R
 select log(10, 100000000000000000000000000000000000::decimal(38,0));
 ----
-34
+35
 
-# log(10^35) for large decimal128 if parsed as float
-# Float parsing is rounding down
+# log(10^35) for large decimal128
 query R
 select log(100000000000000000000000000000000000::decimal(38,0))
 ----
-34
+35
 
 # Result is decimal since argument is decimal regardless decimals-as-floats parsing
 query R