Kotlin · Jolanrensen · Oct 8, 2025 · Oct 8, 2025 · Oct 8, 2025
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt
@@ -30,9 +30,11 @@ import kotlin.experimental.ExperimentalTypeInference
 import kotlin.reflect.KProperty
 
 /* TODO KDocs
- * numbers -> Double or null
+ * primitive numbers -> Double or null
  * comparable -> itself or null
  *
+ * Careful! non-primitive numbers will thus follow comparable rules
+ *
  * TODO cases where the lambda dictates the return type require explicit type arguments for
  *  non-number, comparable overloads: https://youtrack.jetbrains.com/issue/KT-76683
  *  so, `df.median { intCol }` works, but needs `df.median<_, String> { stringCol }` or `df.median({ dateCol })`

diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/percentile.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/percentile.kt
@@ -30,9 +30,11 @@ import kotlin.experimental.ExperimentalTypeInference
 import kotlin.reflect.KProperty
 
 /* TODO KDocs
- * numbers -> Double or null
+ * primitive numbers -> Double or null
  * comparable -> itself or null
  *
+ * Careful! non-primitive numbers will thus follow comparable rules
+ *
  * TODO cases where the lambda dictates the return type require explicit type arguments for
  *  non-number, comparable overloads: https://youtrack.jetbrains.com/issue/KT-76683
  *  so, `df.percentile { intCol }` works, but needs `df.percentile<_, String> { stringCol }` or `df.percentile({ dateCol })`

diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt
@@ -7,8 +7,6 @@ import org.jetbrains.kotlinx.dataframe.impl.isIntraComparable
 import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveNumber
 import org.jetbrains.kotlinx.dataframe.impl.nothingType
 import org.jetbrains.kotlinx.dataframe.impl.renderType
-import java.math.BigDecimal
-import java.math.BigInteger
 import kotlin.math.round
 import kotlin.reflect.KType
 import kotlin.reflect.full.withNullability
@@ -40,11 +38,6 @@ internal fun <T : Comparable<T>> Sequence<T>.medianOrNull(type: KType, skipNaN:
                 }. Only primitive numbers or self-comparables are supported.",
             )
 
-        type == typeOf<BigDecimal>() || type == typeOf<BigInteger>() ->
-            throw IllegalArgumentException(
-                "Cannot calculate the median for big numbers in DataFrame. Only primitive numbers are supported.",
-            )
-
         // TODO kdocs: note about loss of precision for Long
     }
 
@@ -107,11 +100,6 @@ internal fun <T : Comparable<T & Any>?> Sequence<T>.indexOfMedian(type: KType, s
                     renderType(type)
                 }. Only primitive numbers or self-comparables are supported.",
             )
-
-        nonNullType == typeOf<BigDecimal>() || nonNullType == typeOf<BigInteger>() ->
-            throw IllegalArgumentException(
-                "Cannot calculate the median for big numbers in DataFrame. Only primitive numbers are supported.",
-            )
     }
 
     // propagate NaN to return if they are not to be skipped

diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/percentile.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/percentile.kt
@@ -6,8 +6,6 @@ import org.jetbrains.kotlinx.dataframe.impl.isIntraComparable
 import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveNumber
 import org.jetbrains.kotlinx.dataframe.impl.nothingType
 import org.jetbrains.kotlinx.dataframe.impl.renderType
-import java.math.BigDecimal
-import java.math.BigInteger
 import kotlin.math.round
 import kotlin.reflect.KType
 import kotlin.reflect.full.withNullability
@@ -33,11 +31,6 @@ internal fun <T : Comparable<T>> Sequence<T>.percentileOrNull(percentile: Double
                 }. Only primitive numbers or self-comparables are supported.",
             )
 
-        type == typeOf<BigDecimal>() || type == typeOf<BigInteger>() ->
-            throw IllegalArgumentException(
-                "Cannot calculate the percentile for big numbers in DataFrame. Only primitive numbers are supported.",
-            )
-
         // TODO kdocs: note about loss of precision for Long
     }
 
@@ -99,11 +92,6 @@ internal fun <T : Comparable<T & Any>?> Sequence<T>.indexOfPercentile(
                     renderType(type)
                 }. Only primitive numbers or self-comparables are supported.",
             )
-
-        nonNullType == typeOf<BigDecimal>() || nonNullType == typeOf<BigInteger>() ->
-            throw IllegalArgumentException(
-                "Cannot calculate the percentile for big numbers in DataFrame. Only primitive numbers are supported.",
-            )
     }
 
     val indexedSequence = this.mapIndexedNotNull { i, it ->

diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/quantile.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/quantile.kt
@@ -6,8 +6,6 @@ import org.jetbrains.kotlinx.dataframe.impl.isIntraComparable
 import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveNumber
 import org.jetbrains.kotlinx.dataframe.impl.nothingType
 import org.jetbrains.kotlinx.dataframe.impl.renderType
-import java.math.BigDecimal
-import java.math.BigInteger
 import kotlin.math.ceil
 import kotlin.math.floor
 import kotlin.math.round
@@ -52,11 +50,6 @@ internal fun <T : Comparable<T>> Sequence<Any>.quantileOrNull(
                     renderType(type)
                 }. Only primitive numbers or self-comparables are supported.",
             )
-
-        type == typeOf<BigDecimal>() || type == typeOf<BigInteger>() ->
-            throw IllegalArgumentException(
-                "Cannot calculate the $name for big numbers in DataFrame. Only primitive numbers are supported.",
-            )
     }
 
     // propagate NaN to return if they are not to be skipped

diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/median.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/median.kt
@@ -251,23 +251,25 @@ class MedianTests {
     @[Test Suppress("ktlint:standard:argument-list-wrapping")]
     fun `dataframe median`() {
         val df = dataFrameOf(
-            "a", "b", "c",
+            "a", "b", "c", "d",
         )(
-            1, 2f, 3.0,
-            4, 5f, 6.0,
-            7, 8f, 9.0,
+            1, 2f, 3.0, 1.toBigInteger(),
+            4, 5f, 6.0, 2.toBigInteger(),
+            7, 8f, 9.0, 4.toBigInteger(),
         )
 
         // Get row with median values for each column
         val medians = df.median()
-        medians["a"] shouldBe 4
-        medians["b"] shouldBe 5f
+        medians["a"] shouldBe 4.0
+        medians["b"] shouldBe 5.0
         medians["c"] shouldBe 6.0
+        medians["d"] shouldBe 2.toBigInteger() // not interpolated!
 
         // Test median for specific columns
-        val medianFor = df.medianFor("a", "c")
-        medianFor["a"] shouldBe 4
+        val medianFor = df.medianFor("a", "c", "d")
+        medianFor["a"] shouldBe 4.0
         medianFor["c"] shouldBe 6.0
+        medianFor["d"] shouldBe 2.toBigInteger() // not interpolated!
     }
 
     @[Test Suppress("ktlint:standard:argument-list-wrapping")]

diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/percentile.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/percentile.kt
@@ -225,33 +225,37 @@ class PercentileTests {
     @[Test Suppress("ktlint:standard:argument-list-wrapping")]
     fun `dataframe percentile`() {
         val df = dataFrameOf(
-            "a", "b", "c",
+            "a", "b", "c", "d",
         )(
-            1, 2f, 3.0,
-            4, 5f, 6.0,
-            7, 8f, 9.0,
+            1, 2f, 3.0, 1.toBigInteger(),
+            4, 5f, 6.0, 2.toBigInteger(),
+            7, 8f, 9.0, 4.toBigInteger(),
         )
 
         // Get row with percentile values for each column
         val percentiles50 = df.percentile(50.0)
-        percentiles50["a"] shouldBe 4
-        percentiles50["b"] shouldBe 5f
+        percentiles50["a"] shouldBe 4.0
+        percentiles50["b"] shouldBe 5.0
         percentiles50["c"] shouldBe 6.0
+        percentiles50["d"] shouldBe 2.toBigInteger() // not interpolated!
 
         val percentiles25 = df.percentile(25.0)
         percentiles25["a"] shouldBe 1.5000000000000002
-        percentiles25["b"] shouldBe 2.5f
+        percentiles25["b"] shouldBe 2.5
         percentiles25["c"] shouldBe 3.5
+        percentiles25["d"] shouldBe 1.toBigInteger() // not interpolated!
 
         val percentiles75 = df.percentile(75.0)
         percentiles75["a"] shouldBe 6.5
-        percentiles75["b"] shouldBe 7.5f
+        percentiles75["b"] shouldBe 7.5
         percentiles75["c"] shouldBe 8.5
+        percentiles75["d"] shouldBe 2.toBigInteger() // not interpolated!
 
         // Test percentile for specific columns
-        val percentileFor50 = df.percentileFor(50.0, "a", "c")
-        percentileFor50["a"] shouldBe 4
+        val percentileFor50 = df.percentileFor(50.0, "a", "c", "d")
+        percentileFor50["a"] shouldBe 4.0
         percentileFor50["c"] shouldBe 6.0
+        percentileFor50["d"] shouldBe 2.toBigInteger() // not interpolated!
     }
 
     @[Test Suppress("ktlint:standard:argument-list-wrapping")]

diff --git a/docs/StardustDocs/topics/median.md b/docs/StardustDocs/topics/median.md
@@ -20,6 +20,8 @@ The operation is also available for self-comparable columns
 (so columns of type `T : Comparable<T>`, like `DateTime`, `String`, etc.)
 In this case, the return type remains `T?`.
 When the number of values is even, the median is the low of the two middle values.
+NOTE: This logic also applies to other self-comparable `Number` types, like `BigDecimal`.
+They will not be interpolated.
 
 All operations on `Double`/`Float` have the `skipNaN` option, which is
 set to `false` by default. This means that if a `NaN` is present in the input, it will be propagated to the result.

diff --git a/docs/StardustDocs/topics/percentile.md b/docs/StardustDocs/topics/percentile.md
@@ -25,6 +25,8 @@ The operation is also available for self-comparable columns
 In this case, the return type remains `T?`.
 The index of the result of the operation on these types is rounded using
 [Quantile Estimation Method](#quantile-estimation-methods) R3.
+NOTE: This logic also applies to other self-comparable `Number` types, like `BigDecimal`.
+They will not be interpolated.
 
 All operations on `Double`/`Float` have the `skipNaN` option, which is
 set to `false` by default. This means that if a `NaN` is present in the input, it will be propagated to the result.