@@ -16,23 +16,30 @@ import org.jetbrains.kotlinx.dataframe.documentation.LineBreak
16
16
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
17
17
import org.jetbrains.kotlinx.dataframe.impl.api.corrImpl
18
18
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
19
+ import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
19
20
import kotlin.reflect.KProperty
20
21
import kotlin.reflect.typeOf
21
22
22
23
/* *
23
- * Calculates the correlation between values in the specified [columns\].
24
+ * Calculates the Pearson pairwise correlation between values in the specified [columns\].
24
25
*
25
26
* This function does not compute the correlation immediately.
26
27
* Instead, it defines the primary set of columns
27
28
* and returns a [Corr] instance that allows configuring how the correlation should be computed.
28
29
*
30
+ * The function is available for numeric- and [Boolean] columns.
31
+ * [Boolean] values are converted into 1 for true and 0 for false.
32
+ * All other columns are ignored.
33
+ * If a [ColumnGroup] instance is passed as the target column for correlation,
34
+ * it will be unpacked into suitable nested columns.
35
+ *
29
36
* The [Corr] object provides two methods to perform correlation calculations:
30
37
* - [with][Corr.with] — computes correlations between the initially selected columns and a second set of columns.
31
38
* - [withItself][Corr.withItself] — computes pairwise correlations within the initially selected columns.
32
39
*
33
- * Each method returns a square or rectangular correlation matrix represented as a [DataFrame],
40
+ * Each method returns a square or rectangular correlation matrix represented by a [DataFrame],
34
41
* where rows and columns correspond to the selected column sets,
35
- * and each cell contains the correlation coefficient between the corresponding pair of columns.
42
+ * and each cell contains the Pearson correlation coefficient between the corresponding pair of columns.
36
43
*
37
44
* To compute correlations between all suitable columns in the [DataFrame], use [DataFrame.corr()][DataFrame.corr].
38
45
*
@@ -92,7 +99,7 @@ internal fun AnyCol.isSuitableForCorr() = isSubtypeOf<Number>() || type() == typ
92
99
* It must be followed by one of the computation methods to produce a correlation [DataFrame].
93
100
*
94
101
* The resulting [DataFrame] is a correlation matrix where rows correspond to one set of columns,
95
- * columns to the other set, and each cell contains the correlation coefficient
102
+ * columns to the other set, and each cell contains the Pearson correlation coefficient
96
103
* between the respective pair of columns.
97
104
*
98
105
* Use the following methods to perform the computation:
@@ -108,11 +115,13 @@ public data class Corr<T, C>(internal val df: DataFrame<T>, internal val columns
108
115
* Computes the correlation matrix between all suitable columns in this [DataFrame],
109
116
* including nested columns at any depth.
110
117
*
111
- * The result is a square correlation matrix represented as a [DataFrame],
118
+ * The result is a square correlation matrix represented by a [DataFrame],
112
119
* where both rows and columns correspond to the original columns,
113
- * and each cell contains the correlation coefficient between the respective pair of columns.
120
+ * and each cell contains the Pearson correlation coefficient between the respective pair of columns.
114
121
*
115
- * Only columns suitable for correlation (e.g., numeric types) are included in the result.
122
+ * The function is available for numeric- and [Boolean] columns.
123
+ * [Boolean] values are converted into 1 for true and 0 for false.
124
+ * All other columns are ignored.
116
125
*
117
126
* For more information, see: {@include [DocumentationUrls.Corr]}
118
127
*
@@ -127,6 +136,12 @@ public fun <T> DataFrame<T>.corr(): DataFrame<T> =
127
136
* {@include [CommonCorrDocs]}
128
137
* @include [SelectingColumns.Dsl] {@include [SetCorrOperationArg]}
129
138
*
139
+ * The function is available for numeric- and [Boolean] columns.
140
+ * [Boolean] values are converted into 1 for true and 0 for false.
141
+ * All other columns are ignored.
142
+ * If a [ColumnGroup] instance is passed as the target column for correlation,
143
+ * it will be unpacked into suitable nested columns.
144
+ *
130
145
* ### Examples
131
146
* ```kotlin
132
147
* // Compute correlations between the "age" column and the "weight" and "height" columns
@@ -145,6 +160,12 @@ public fun <T, C> DataFrame<T>.corr(columns: ColumnsSelector<T, C>): Corr<T, C>
145
160
* {@include [CommonCorrDocs]}
146
161
* @include [SelectingColumns.ColumnNames] {@include [SetCorrOperationArg]}
147
162
*
163
+ * The function is available for numeric- and [Boolean] columns.
164
+ * [Boolean] values are converted into 1 for true and 0 for false.
165
+ * All other columns are ignored.
166
+ * If a [ColumnGroup] instance is passed as the target column for correlation,
167
+ * it will be unpacked into suitable nested columns.
168
+ *
148
169
* ### Examples
149
170
* ```kotlin
150
171
* // Compute correlations between the "age" column and the "weight" and "height" columns
@@ -171,9 +192,9 @@ public fun <T, C> DataFrame<T>.corr(vararg columns: ColumnReference<C>): Corr<T,
171
192
* Calculates the correlation of specified [columns][otherColumns]
172
193
* with values in the columns previously selected with [corr].
173
194
*
174
- * Returns a correlation matrix represented as a [DataFrame],
195
+ * Returns a correlation matrix represented by a [DataFrame],
175
196
* where rows and columns correspond to the selected column sets,
176
- * and each cell contains the correlation coefficient between the corresponding pair of columns.
197
+ * and each cell contains the Pearson correlation coefficient between the corresponding pair of columns.
177
198
*
178
199
* Check out [Grammar].
179
200
*
@@ -244,9 +265,9 @@ public fun <T, C, R> Corr<T, C>.with(vararg otherColumns: ColumnReference<R>): D
244
265
* Calculates pairwise correlations between the columns
245
266
* previously selected with [corr].
246
267
*
247
- * Returns a square correlation matrix represented as a [DataFrame],
268
+ * Returns a square correlation matrix represented by a [DataFrame],
248
269
* where both rows and columns correspond to the selected columns,
249
- * and each cell contains the correlation coefficient between the respective pair of columns.
270
+ * and each cell contains the Pearson correlation coefficient between the respective pair of columns.
250
271
*
251
272
* Check out [Grammar].
252
273
*
0 commit comments