1
1
package org.jetbrains.kotlinx.dataframe.api
2
2
3
- import kotlinx.datetime.Instant
4
- import kotlinx.datetime.LocalDate
5
- import kotlinx.datetime.LocalDateTime
6
- import kotlinx.datetime.LocalTime
7
3
import org.jetbrains.kotlinx.dataframe.AnyCol
8
4
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
9
5
import org.jetbrains.kotlinx.dataframe.DataFrame
10
- import org.jetbrains.kotlinx.dataframe.RowColumnExpression
11
- import org.jetbrains.kotlinx.dataframe.RowValueExpression
12
6
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
7
+ import org.jetbrains.kotlinx.dataframe.api.CorrDocs.Grammar
8
+ import org.jetbrains.kotlinx.dataframe.api.CorrDocs.SelectingOptions
13
9
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
14
10
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
15
- import org.jetbrains.kotlinx.dataframe.dataTypes.IFRAME
16
- import org.jetbrains.kotlinx.dataframe.dataTypes.IMG
17
11
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
18
- import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls.Convert
19
12
import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarLink
20
13
import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
21
14
import org.jetbrains.kotlinx.dataframe.documentation.Indent
22
15
import org.jetbrains.kotlinx.dataframe.documentation.LineBreak
23
16
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
24
- import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnGroupsAndNestedColumnsMention
25
17
import org.jetbrains.kotlinx.dataframe.impl.api.corrImpl
26
18
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
27
- import java.math.BigDecimal
28
- import java.math.BigInteger
29
- import java.net.URL
30
19
import kotlin.reflect.KProperty
31
- import kotlin.reflect.KType
32
20
import kotlin.reflect.typeOf
33
21
34
22
/* *
35
23
* Calculates the correlation between values in the specified [columns\].
36
24
*
37
- * This function does not perform the calculation immediately. Instead, it selects a primary set of columns
38
- * and returns a [Corr] object, which serves as an intermediate step in the correlation analysis.
25
+ * This function does not compute the correlation immediately.
26
+ * Instead, it defines the primary set of columns
27
+ * and returns a [Corr] instance that allows configuring how the correlation should be computed.
39
28
*
40
- * The [Corr] object provides two methods to perform correlation computations:
41
- * - [with][Corr.with] — allows you to specify a second set of columns and computes correlations between
42
- * the initially selected columns and this second set.
43
- * - [withItself][Corr.withItself] — computes correlations within the initially selected columns.
29
+ * The [Corr] object provides two methods to perform correlation calculations:
30
+ * - [with][Corr.with] — computes correlations between the initially selected columns and a second set of columns.
31
+ * - [withItself][Corr.withItself] — computes pairwise correlations within the initially selected columns.
44
32
*
45
- * Each of these methods returns a [DataFrame] where rows correspond to one set of columns, columns to the other set,
33
+ * Each method returns a square or rectangular correlation matrix represented as a [DataFrame],
34
+ * where rows and columns correspond to the selected column sets,
46
35
* and each cell contains the correlation coefficient between the corresponding pair of columns.
47
36
*
48
- * If you need to compute correlations between all columns in a DataFrame, use [DataFrame.corr()][DataFrame.corr].
37
+ * To compute correlations between all suitable columns in the [ DataFrame] , use [DataFrame.corr()][DataFrame.corr].
49
38
*
50
39
* Check out [Grammar].
51
40
*
52
41
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
53
42
*
54
- * See [Selecting Columns][ConvertSelectingOptions ].
43
+ * See also: [Selecting Columns][SelectingOptions ].
55
44
*
56
- * For more information: {@include [DocumentationUrls.Corr]}
45
+ * For more information, see : {@include [DocumentationUrls.Corr]}
57
46
*/
58
47
internal interface CorrDocs {
59
48
60
49
/* *
61
50
* {@comment Version of [SelectingColumns] with correctly filled in examples}
62
51
* @include [SelectingColumns] {@include [SetCorrOperationArg]}
63
52
*/
64
- interface ConvertSelectingOptions
53
+ interface SelectingOptions
65
54
66
55
/* *
67
56
* ## Corr Operation Grammar
@@ -98,30 +87,76 @@ internal fun AnyCol.isSuitableForCorr() = isSubtypeOf<Number>() || type() == typ
98
87
/* *
99
88
* An intermediate class used in the [corr] operation.
100
89
*
101
- * This class itself does not perform any computations — it is a transitional step
102
- * before specifying how to compute correlation.
103
- * It must be followed by one of the methods specifying correlation
104
- * computation to produce a new correlation [DataFrame].
90
+ * This class does not perform any computation by itself — it serves as a transitional step
91
+ * before specifying how the correlation should be calculated.
92
+ * It must be followed by one of the computation methods to produce a correlation [DataFrame].
105
93
*
106
- * Each of these methods returns a [DataFrame] where rows correspond to one set of columns, columns to the other set,
107
- * and each cell contains the correlation coefficient between the corresponding pair of columns.
94
+ * The resulting [DataFrame] is a correlation matrix where rows correspond to one set of columns,
95
+ * columns to the other set, and each cell contains the correlation coefficient
96
+ * between the respective pair of columns.
108
97
*
109
98
* Use the following methods to perform the computation:
110
- * - [with { columnsSelector }][with] – selects a second set of columns and computes correlations between
111
- * the initially selected columns and this second set.
112
- * - [withItself()][withItself] - computes correlations within the initially selected columns.
99
+ * - [with] — selects a second set of columns and computes correlations between
100
+ * the initially selected columns and this second set.
101
+ * - [withItself] — computes pairwise correlations within the initially selected columns.
113
102
*
114
103
* See [Grammar][CorrDocs.Grammar] for more details.
115
104
*/
116
105
public data class Corr <T , C >(internal val df : DataFrame <T >, internal val columns : ColumnsSelector <T , C >)
117
106
107
+ /* *
108
+ * Computes the correlation matrix between all suitable columns in this [DataFrame],
109
+ * including nested columns at any depth.
110
+ *
111
+ * The result is a square correlation matrix represented as a [DataFrame],
112
+ * where both rows and columns correspond to the original columns,
113
+ * and each cell contains the correlation coefficient between the respective pair of columns.
114
+ *
115
+ * Only columns suitable for correlation (e.g., numeric types) are included in the result.
116
+ *
117
+ * For more information, see: {@include [DocumentationUrls.Corr]}
118
+ *
119
+ * @return A square correlation matrix as a [DataFrame], where both rows and columns correspond to the original columns.
120
+ */
118
121
public fun <T > DataFrame<T>.corr (): DataFrame <T > =
119
122
corr {
120
123
colsAtAnyDepth().filter { it.isSuitableForCorr() }
121
124
}.withItself()
122
125
126
+ /* *
127
+ * {@include [CommonCorrDocs]}
128
+ * @include [SelectingColumns.Dsl] {@include [SetCorrOperationArg]}
129
+ *
130
+ * ### Examples
131
+ * ```kotlin
132
+ * // Compute correlations between the "age" column and the "weight" and "height" columns
133
+ * df.corr { age }.with { weight and height }
134
+ *
135
+ * // Compute pairwise correlations between all columns of type `Number`
136
+ * df.corr { colsOf<Number>() }.withItself()
137
+ * ```
138
+ * @param [columns\] The [Columns Selector][ColumnsSelector] used to select the columns
139
+ * of this [DataFrame] to compute a correlation.
140
+ * @return A [Corr] intermediate object with the selected columns.
141
+ */
123
142
public fun <T , C > DataFrame<T>.corr (columns : ColumnsSelector <T , C >): Corr <T , C > = Corr (this , columns)
124
143
144
+ /* *
145
+ * {@include [CommonCorrDocs]}
146
+ * @include [SelectingColumns.ColumnNames] {@include [SetCorrOperationArg]}
147
+ *
148
+ * ### Examples
149
+ * ```kotlin
150
+ * // Compute correlations between the "age" column and the "weight" and "height" columns
151
+ * df.corr { age }.with { weight and height }
152
+ *
153
+ * // Compute pairwise correlations between all columns of type `Number`
154
+ * df.corr { colsOf<Number>() }.withItself()
155
+ * ```
156
+ * @param [columns\] The [Column Names][String] used to select the columns
157
+ * of this [DataFrame] to compute a correlation.
158
+ * @return A [Corr] intermediate object with the selected columns.
159
+ */
125
160
public fun <T > DataFrame<T>.corr (vararg columns : String ): Corr <T , Any ?> = corr { columns.toColumnSet() }
126
161
127
162
@Deprecated(DEPRECATED_ACCESS_API )
@@ -132,8 +167,67 @@ public fun <T, C> DataFrame<T>.corr(vararg columns: KProperty<C>): Corr<T, C> =
132
167
@AccessApiOverload
133
168
public fun <T , C > DataFrame<T>.corr (vararg columns : ColumnReference <C >): Corr <T , C > = corr { columns.toColumnSet() }
134
169
170
+ /* *
171
+ * Calculates the correlation of specified [columns][otherColumns]
172
+ * with values in the columns previously selected with [corr].
173
+ *
174
+ * Returns a correlation matrix represented as a [DataFrame],
175
+ * where rows and columns correspond to the selected column sets,
176
+ * and each cell contains the correlation coefficient between the corresponding pair of columns.
177
+ *
178
+ * Check out [Grammar].
179
+ *
180
+ * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
181
+ *
182
+ * See also: [Selecting Columns][SelectingOptions].
183
+ *
184
+ * For more information, see: {@include [DocumentationUrls.Corr]}
185
+ */
186
+ internal interface CorrWithDocs
187
+
188
+ /* *
189
+ * {@include [CorrWithDocs]}
190
+ * ### This Corr With Overload
191
+ */
192
+ @ExcludeFromSources
193
+ private interface CommonCorrWithDocs
194
+
195
+ /* *
196
+ * {@include [CommonCorrWithDocs]}
197
+ * @include [SelectingColumns.Dsl] {@include [SetCorrOperationArg]}
198
+ *
199
+ * ### Examples
200
+ * ```kotlin
201
+ * // Compute correlations between the "age" column and the "weight" and "height" columns
202
+ * df.corr { age }.with { weight and height }
203
+ *
204
+ * // Compute correlations between the "speed" column and all columns of type `Double` (excluding itself)
205
+ * df.corr { speed }.with { colsOf<Double>() except speed }
206
+ * ```
207
+ *
208
+ * @param otherColumns The [ColumnsSelector] used to select the second set of columns
209
+ * from this [DataFrame] to compute correlations against the initially selected columns.
210
+ * @return A [DataFrame] containing the resulting correlation matrix.
211
+ */
135
212
public fun <T , C , R > Corr <T , C >.with (otherColumns : ColumnsSelector <T , R >): DataFrame <T > = corrImpl(otherColumns)
136
213
214
+ /* *
215
+ * {@include [CommonCorrWithDocs]}
216
+ * @include [SelectingColumns.ColumnNames] {@include [SetCorrOperationArg]}
217
+ *
218
+ * ### Examples
219
+ * ```kotlin
220
+ * // Compute correlations between the "age" column and the "weight" and "height" columns
221
+ * df.corr("age").with("weight", "height")
222
+ *
223
+ * // Compute correlations between the "speed" column and all columns of type `Number`
224
+ * df.corr { colsOf<Number>() }.with("speed")
225
+ * ```
226
+ *
227
+ * @param otherColumns The [Column Names][String] used to select the second set of columns
228
+ * from this [DataFrame] to compute correlations against the initially selected columns.
229
+ * @return A [DataFrame] containing the resulting correlation matrix.
230
+ */
137
231
public fun <T , C > Corr <T , C >.with (vararg otherColumns : String ): DataFrame <T > = with { otherColumns.toColumnSet() }
138
232
139
233
@Deprecated(DEPRECATED_ACCESS_API )
@@ -146,6 +240,20 @@ public fun <T, C, R> Corr<T, C>.with(vararg otherColumns: KProperty<R>): DataFra
146
240
public fun <T , C , R > Corr <T , C >.with (vararg otherColumns : ColumnReference <R >): DataFrame <T > =
147
241
with { otherColumns.toColumnSet() }
148
242
243
+ /* *
244
+ * Calculates pairwise correlations between the columns
245
+ * previously selected with [corr].
246
+ *
247
+ * Returns a square correlation matrix represented as a [DataFrame],
248
+ * where both rows and columns correspond to the selected columns,
249
+ * and each cell contains the correlation coefficient between the respective pair of columns.
250
+ *
251
+ * Check out [Grammar].
252
+ *
253
+ * For more information, see: {@include [DocumentationUrls.Corr]}
254
+ *
255
+ * @return A [DataFrame] containing the pairwise correlation matrix.
256
+ */
149
257
public fun <T , C > Corr <T , C >.withItself (): DataFrame <T > = with (columns)
150
258
151
259
// endregion
0 commit comments