Skip to content

Commit df16ffd

Browse files
corr docs fixes
1 parent 5b9db55 commit df16ffd

File tree

1 file changed

+32
-11
lines changed
  • core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api

1 file changed

+32
-11
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,30 @@ import org.jetbrains.kotlinx.dataframe.documentation.LineBreak
1616
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
1717
import org.jetbrains.kotlinx.dataframe.impl.api.corrImpl
1818
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
19+
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
1920
import kotlin.reflect.KProperty
2021
import kotlin.reflect.typeOf
2122

2223
/**
23-
* Calculates the correlation between values in the specified [columns\].
24+
* Calculates the Pearson pairwise correlation between values in the specified [columns\].
2425
*
2526
* This function does not compute the correlation immediately.
2627
* Instead, it defines the primary set of columns
2728
* and returns a [Corr] instance that allows configuring how the correlation should be computed.
2829
*
30+
* The function is available for numeric- and [Boolean] columns.
31+
* [Boolean] values are converted into 1 for true and 0 for false.
32+
* All other columns are ignored.
33+
* If a [ColumnGroup] instance is passed as the target column for correlation,
34+
* it will be unpacked into suitable nested columns.
35+
*
2936
* The [Corr] object provides two methods to perform correlation calculations:
3037
* - [with][Corr.with] — computes correlations between the initially selected columns and a second set of columns.
3138
* - [withItself][Corr.withItself] — computes pairwise correlations within the initially selected columns.
3239
*
33-
* Each method returns a square or rectangular correlation matrix represented as a [DataFrame],
40+
* Each method returns a square or rectangular correlation matrix represented by a [DataFrame],
3441
* where rows and columns correspond to the selected column sets,
35-
* and each cell contains the correlation coefficient between the corresponding pair of columns.
42+
* and each cell contains the Pearson correlation coefficient between the corresponding pair of columns.
3643
*
3744
* To compute correlations between all suitable columns in the [DataFrame], use [DataFrame.corr()][DataFrame.corr].
3845
*
@@ -92,7 +99,7 @@ internal fun AnyCol.isSuitableForCorr() = isSubtypeOf<Number>() || type() == typ
9299
* It must be followed by one of the computation methods to produce a correlation [DataFrame].
93100
*
94101
* The resulting [DataFrame] is a correlation matrix where rows correspond to one set of columns,
95-
* columns to the other set, and each cell contains the correlation coefficient
102+
* columns to the other set, and each cell contains the Pearson correlation coefficient
96103
* between the respective pair of columns.
97104
*
98105
* Use the following methods to perform the computation:
@@ -108,11 +115,13 @@ public data class Corr<T, C>(internal val df: DataFrame<T>, internal val columns
108115
* Computes the correlation matrix between all suitable columns in this [DataFrame],
109116
* including nested columns at any depth.
110117
*
111-
* The result is a square correlation matrix represented as a [DataFrame],
118+
* The result is a square correlation matrix represented by a [DataFrame],
112119
* where both rows and columns correspond to the original columns,
113-
* and each cell contains the correlation coefficient between the respective pair of columns.
120+
* and each cell contains the Pearson correlation coefficient between the respective pair of columns.
114121
*
115-
* Only columns suitable for correlation (e.g., numeric types) are included in the result.
122+
* The function is available for numeric- and [Boolean] columns.
123+
* [Boolean] values are converted into 1 for true and 0 for false.
124+
* All other columns are ignored.
116125
*
117126
* For more information, see: {@include [DocumentationUrls.Corr]}
118127
*
@@ -127,6 +136,12 @@ public fun <T> DataFrame<T>.corr(): DataFrame<T> =
127136
* {@include [CommonCorrDocs]}
128137
* @include [SelectingColumns.Dsl] {@include [SetCorrOperationArg]}
129138
*
139+
* The function is available for numeric- and [Boolean] columns.
140+
* [Boolean] values are converted into 1 for true and 0 for false.
141+
* All other columns are ignored.
142+
* If a [ColumnGroup] instance is passed as the target column for correlation,
143+
* it will be unpacked into suitable nested columns.
144+
*
130145
* ### Examples
131146
* ```kotlin
132147
* // Compute correlations between the "age" column and the "weight" and "height" columns
@@ -145,6 +160,12 @@ public fun <T, C> DataFrame<T>.corr(columns: ColumnsSelector<T, C>): Corr<T, C>
145160
* {@include [CommonCorrDocs]}
146161
* @include [SelectingColumns.ColumnNames] {@include [SetCorrOperationArg]}
147162
*
163+
* The function is available for numeric- and [Boolean] columns.
164+
* [Boolean] values are converted into 1 for true and 0 for false.
165+
* All other columns are ignored.
166+
* If a [ColumnGroup] instance is passed as the target column for correlation,
167+
* it will be unpacked into suitable nested columns.
168+
*
148169
* ### Examples
149170
* ```kotlin
150171
* // Compute correlations between the "age" column and the "weight" and "height" columns
@@ -171,9 +192,9 @@ public fun <T, C> DataFrame<T>.corr(vararg columns: ColumnReference<C>): Corr<T,
171192
* Calculates the correlation of specified [columns][otherColumns]
172193
* with values in the columns previously selected with [corr].
173194
*
174-
* Returns a correlation matrix represented as a [DataFrame],
195+
* Returns a correlation matrix represented by a [DataFrame],
175196
* where rows and columns correspond to the selected column sets,
176-
* and each cell contains the correlation coefficient between the corresponding pair of columns.
197+
* and each cell contains the Pearson correlation coefficient between the corresponding pair of columns.
177198
*
178199
* Check out [Grammar].
179200
*
@@ -244,9 +265,9 @@ public fun <T, C, R> Corr<T, C>.with(vararg otherColumns: ColumnReference<R>): D
244265
* Calculates pairwise correlations between the columns
245266
* previously selected with [corr].
246267
*
247-
* Returns a square correlation matrix represented as a [DataFrame],
268+
* Returns a square correlation matrix represented by a [DataFrame],
248269
* where both rows and columns correspond to the selected columns,
249-
* and each cell contains the correlation coefficient between the respective pair of columns.
270+
* and each cell contains the Pearson correlation coefficient between the respective pair of columns.
250271
*
251272
* Check out [Grammar].
252273
*

0 commit comments

Comments
 (0)