diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt index 6a1b29daff..91a0a0583e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt @@ -30,9 +30,11 @@ import kotlin.experimental.ExperimentalTypeInference import kotlin.reflect.KProperty /* TODO KDocs - * numbers -> Double or null + * primitive numbers -> Double or null * comparable -> itself or null * + * Careful! non-primitive numbers will thus follow comparable rules + * * TODO cases where the lambda dictates the return type require explicit type arguments for * non-number, comparable overloads: https://youtrack.jetbrains.com/issue/KT-76683 * so, `df.median { intCol }` works, but needs `df.median<_, String> { stringCol }` or `df.median({ dateCol })` diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/percentile.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/percentile.kt index b57bf16a3d..f015826149 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/percentile.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/percentile.kt @@ -30,9 +30,11 @@ import kotlin.experimental.ExperimentalTypeInference import kotlin.reflect.KProperty /* TODO KDocs - * numbers -> Double or null + * primitive numbers -> Double or null * comparable -> itself or null * + * Careful! non-primitive numbers will thus follow comparable rules + * * TODO cases where the lambda dictates the return type require explicit type arguments for * non-number, comparable overloads: https://youtrack.jetbrains.com/issue/KT-76683 * so, `df.percentile { intCol }` works, but needs `df.percentile<_, String> { stringCol }` or `df.percentile({ dateCol })` diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt index b500f62988..6819e166c2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt @@ -7,8 +7,6 @@ import org.jetbrains.kotlinx.dataframe.impl.isIntraComparable import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveNumber import org.jetbrains.kotlinx.dataframe.impl.nothingType import org.jetbrains.kotlinx.dataframe.impl.renderType -import java.math.BigDecimal -import java.math.BigInteger import kotlin.math.round import kotlin.reflect.KType import kotlin.reflect.full.withNullability @@ -40,11 +38,6 @@ internal fun > Sequence.medianOrNull(type: KType, skipNaN: }. Only primitive numbers or self-comparables are supported.", ) - type == typeOf() || type == typeOf() -> - throw IllegalArgumentException( - "Cannot calculate the median for big numbers in DataFrame. Only primitive numbers are supported.", - ) - // TODO kdocs: note about loss of precision for Long } @@ -107,11 +100,6 @@ internal fun ?> Sequence.indexOfMedian(type: KType, s renderType(type) }. Only primitive numbers or self-comparables are supported.", ) - - nonNullType == typeOf() || nonNullType == typeOf() -> - throw IllegalArgumentException( - "Cannot calculate the median for big numbers in DataFrame. Only primitive numbers are supported.", - ) } // propagate NaN to return if they are not to be skipped diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/percentile.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/percentile.kt index edf913f181..f2de660b66 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/percentile.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/percentile.kt @@ -6,8 +6,6 @@ import org.jetbrains.kotlinx.dataframe.impl.isIntraComparable import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveNumber import org.jetbrains.kotlinx.dataframe.impl.nothingType import org.jetbrains.kotlinx.dataframe.impl.renderType -import java.math.BigDecimal -import java.math.BigInteger import kotlin.math.round import kotlin.reflect.KType import kotlin.reflect.full.withNullability @@ -33,11 +31,6 @@ internal fun > Sequence.percentileOrNull(percentile: Double }. Only primitive numbers or self-comparables are supported.", ) - type == typeOf() || type == typeOf() -> - throw IllegalArgumentException( - "Cannot calculate the percentile for big numbers in DataFrame. Only primitive numbers are supported.", - ) - // TODO kdocs: note about loss of precision for Long } @@ -99,11 +92,6 @@ internal fun ?> Sequence.indexOfPercentile( renderType(type) }. Only primitive numbers or self-comparables are supported.", ) - - nonNullType == typeOf() || nonNullType == typeOf() -> - throw IllegalArgumentException( - "Cannot calculate the percentile for big numbers in DataFrame. Only primitive numbers are supported.", - ) } val indexedSequence = this.mapIndexedNotNull { i, it -> diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/quantile.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/quantile.kt index 7927ed2b8c..4e52cf97b3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/quantile.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/quantile.kt @@ -6,8 +6,6 @@ import org.jetbrains.kotlinx.dataframe.impl.isIntraComparable import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveNumber import org.jetbrains.kotlinx.dataframe.impl.nothingType import org.jetbrains.kotlinx.dataframe.impl.renderType -import java.math.BigDecimal -import java.math.BigInteger import kotlin.math.ceil import kotlin.math.floor import kotlin.math.round @@ -52,11 +50,6 @@ internal fun > Sequence.quantileOrNull( renderType(type) }. Only primitive numbers or self-comparables are supported.", ) - - type == typeOf() || type == typeOf() -> - throw IllegalArgumentException( - "Cannot calculate the $name for big numbers in DataFrame. Only primitive numbers are supported.", - ) } // propagate NaN to return if they are not to be skipped diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/median.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/median.kt index ce002b5285..5fc09c4678 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/median.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/median.kt @@ -251,23 +251,25 @@ class MedianTests { @[Test Suppress("ktlint:standard:argument-list-wrapping")] fun `dataframe median`() { val df = dataFrameOf( - "a", "b", "c", + "a", "b", "c", "d", )( - 1, 2f, 3.0, - 4, 5f, 6.0, - 7, 8f, 9.0, + 1, 2f, 3.0, 1.toBigInteger(), + 4, 5f, 6.0, 2.toBigInteger(), + 7, 8f, 9.0, 4.toBigInteger(), ) // Get row with median values for each column val medians = df.median() - medians["a"] shouldBe 4 - medians["b"] shouldBe 5f + medians["a"] shouldBe 4.0 + medians["b"] shouldBe 5.0 medians["c"] shouldBe 6.0 + medians["d"] shouldBe 2.toBigInteger() // not interpolated! // Test median for specific columns - val medianFor = df.medianFor("a", "c") - medianFor["a"] shouldBe 4 + val medianFor = df.medianFor("a", "c", "d") + medianFor["a"] shouldBe 4.0 medianFor["c"] shouldBe 6.0 + medianFor["d"] shouldBe 2.toBigInteger() // not interpolated! } @[Test Suppress("ktlint:standard:argument-list-wrapping")] diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/percentile.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/percentile.kt index ceee06e325..51ab401492 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/percentile.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/percentile.kt @@ -225,33 +225,37 @@ class PercentileTests { @[Test Suppress("ktlint:standard:argument-list-wrapping")] fun `dataframe percentile`() { val df = dataFrameOf( - "a", "b", "c", + "a", "b", "c", "d", )( - 1, 2f, 3.0, - 4, 5f, 6.0, - 7, 8f, 9.0, + 1, 2f, 3.0, 1.toBigInteger(), + 4, 5f, 6.0, 2.toBigInteger(), + 7, 8f, 9.0, 4.toBigInteger(), ) // Get row with percentile values for each column val percentiles50 = df.percentile(50.0) - percentiles50["a"] shouldBe 4 - percentiles50["b"] shouldBe 5f + percentiles50["a"] shouldBe 4.0 + percentiles50["b"] shouldBe 5.0 percentiles50["c"] shouldBe 6.0 + percentiles50["d"] shouldBe 2.toBigInteger() // not interpolated! val percentiles25 = df.percentile(25.0) percentiles25["a"] shouldBe 1.5000000000000002 - percentiles25["b"] shouldBe 2.5f + percentiles25["b"] shouldBe 2.5 percentiles25["c"] shouldBe 3.5 + percentiles25["d"] shouldBe 1.toBigInteger() // not interpolated! val percentiles75 = df.percentile(75.0) percentiles75["a"] shouldBe 6.5 - percentiles75["b"] shouldBe 7.5f + percentiles75["b"] shouldBe 7.5 percentiles75["c"] shouldBe 8.5 + percentiles75["d"] shouldBe 2.toBigInteger() // not interpolated! // Test percentile for specific columns - val percentileFor50 = df.percentileFor(50.0, "a", "c") - percentileFor50["a"] shouldBe 4 + val percentileFor50 = df.percentileFor(50.0, "a", "c", "d") + percentileFor50["a"] shouldBe 4.0 percentileFor50["c"] shouldBe 6.0 + percentileFor50["d"] shouldBe 2.toBigInteger() // not interpolated! } @[Test Suppress("ktlint:standard:argument-list-wrapping")] diff --git a/docs/StardustDocs/topics/median.md b/docs/StardustDocs/topics/median.md index 3a20c256fa..8247a44912 100644 --- a/docs/StardustDocs/topics/median.md +++ b/docs/StardustDocs/topics/median.md @@ -20,6 +20,8 @@ The operation is also available for self-comparable columns (so columns of type `T : Comparable`, like `DateTime`, `String`, etc.) In this case, the return type remains `T?`. When the number of values is even, the median is the low of the two middle values. +NOTE: This logic also applies to other self-comparable `Number` types, like `BigDecimal`. +They will not be interpolated. All operations on `Double`/`Float` have the `skipNaN` option, which is set to `false` by default. This means that if a `NaN` is present in the input, it will be propagated to the result. diff --git a/docs/StardustDocs/topics/percentile.md b/docs/StardustDocs/topics/percentile.md index 51c2cc8598..5d55be0285 100644 --- a/docs/StardustDocs/topics/percentile.md +++ b/docs/StardustDocs/topics/percentile.md @@ -25,6 +25,8 @@ The operation is also available for self-comparable columns In this case, the return type remains `T?`. The index of the result of the operation on these types is rounded using [Quantile Estimation Method](#quantile-estimation-methods) R3. +NOTE: This logic also applies to other self-comparable `Number` types, like `BigDecimal`. +They will not be interpolated. All operations on `Double`/`Float` have the `skipNaN` option, which is set to `false` by default. This means that if a `NaN` is present in the input, it will be propagated to the result.