Skip to content

Commit 5b9db55

Browse files
corr kdocs complete
1 parent c0a09a0 commit 5b9db55

File tree

1 file changed

+142
-34
lines changed
  • core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api

1 file changed

+142
-34
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt

Lines changed: 142 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,56 @@
11
package org.jetbrains.kotlinx.dataframe.api
22

3-
import kotlinx.datetime.Instant
4-
import kotlinx.datetime.LocalDate
5-
import kotlinx.datetime.LocalDateTime
6-
import kotlinx.datetime.LocalTime
73
import org.jetbrains.kotlinx.dataframe.AnyCol
84
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
95
import org.jetbrains.kotlinx.dataframe.DataFrame
10-
import org.jetbrains.kotlinx.dataframe.RowColumnExpression
11-
import org.jetbrains.kotlinx.dataframe.RowValueExpression
126
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
7+
import org.jetbrains.kotlinx.dataframe.api.CorrDocs.Grammar
8+
import org.jetbrains.kotlinx.dataframe.api.CorrDocs.SelectingOptions
139
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1410
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
15-
import org.jetbrains.kotlinx.dataframe.dataTypes.IFRAME
16-
import org.jetbrains.kotlinx.dataframe.dataTypes.IMG
1711
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
18-
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls.Convert
1912
import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarLink
2013
import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
2114
import org.jetbrains.kotlinx.dataframe.documentation.Indent
2215
import org.jetbrains.kotlinx.dataframe.documentation.LineBreak
2316
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
24-
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnGroupsAndNestedColumnsMention
2517
import org.jetbrains.kotlinx.dataframe.impl.api.corrImpl
2618
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
27-
import java.math.BigDecimal
28-
import java.math.BigInteger
29-
import java.net.URL
3019
import kotlin.reflect.KProperty
31-
import kotlin.reflect.KType
3220
import kotlin.reflect.typeOf
3321

3422
/**
3523
* Calculates the correlation between values in the specified [columns\].
3624
*
37-
* This function does not perform the calculation immediately. Instead, it selects a primary set of columns
38-
* and returns a [Corr] object, which serves as an intermediate step in the correlation analysis.
25+
* This function does not compute the correlation immediately.
26+
* Instead, it defines the primary set of columns
27+
* and returns a [Corr] instance that allows configuring how the correlation should be computed.
3928
*
40-
* The [Corr] object provides two methods to perform correlation computations:
41-
* - [with][Corr.with] — allows you to specify a second set of columns and computes correlations between
42-
* the initially selected columns and this second set.
43-
* - [withItself][Corr.withItself] — computes correlations within the initially selected columns.
29+
* The [Corr] object provides two methods to perform correlation calculations:
30+
* - [with][Corr.with] — computes correlations between the initially selected columns and a second set of columns.
31+
* - [withItself][Corr.withItself] — computes pairwise correlations within the initially selected columns.
4432
*
45-
* Each of these methods returns a [DataFrame] where rows correspond to one set of columns, columns to the other set,
33+
* Each method returns a square or rectangular correlation matrix represented as a [DataFrame],
34+
* where rows and columns correspond to the selected column sets,
4635
* and each cell contains the correlation coefficient between the corresponding pair of columns.
4736
*
48-
* If you need to compute correlations between all columns in a DataFrame, use [DataFrame.corr()][DataFrame.corr].
37+
* To compute correlations between all suitable columns in the [DataFrame], use [DataFrame.corr()][DataFrame.corr].
4938
*
5039
* Check out [Grammar].
5140
*
5241
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
5342
*
54-
* See [Selecting Columns][ConvertSelectingOptions].
43+
* See also: [Selecting Columns][SelectingOptions].
5544
*
56-
* For more information: {@include [DocumentationUrls.Corr]}
45+
* For more information, see: {@include [DocumentationUrls.Corr]}
5746
*/
5847
internal interface CorrDocs {
5948

6049
/**
6150
* {@comment Version of [SelectingColumns] with correctly filled in examples}
6251
* @include [SelectingColumns] {@include [SetCorrOperationArg]}
6352
*/
64-
interface ConvertSelectingOptions
53+
interface SelectingOptions
6554

6655
/**
6756
* ## Corr Operation Grammar
@@ -98,30 +87,76 @@ internal fun AnyCol.isSuitableForCorr() = isSubtypeOf<Number>() || type() == typ
9887
/**
9988
* An intermediate class used in the [corr] operation.
10089
*
101-
* This class itself does not perform any computations — it is a transitional step
102-
* before specifying how to compute correlation.
103-
* It must be followed by one of the methods specifying correlation
104-
* computation to produce a new correlation [DataFrame].
90+
* This class does not perform any computation by itself — it serves as a transitional step
91+
* before specifying how the correlation should be calculated.
92+
* It must be followed by one of the computation methods to produce a correlation [DataFrame].
10593
*
106-
* Each of these methods returns a [DataFrame] where rows correspond to one set of columns, columns to the other set,
107-
* and each cell contains the correlation coefficient between the corresponding pair of columns.
94+
* The resulting [DataFrame] is a correlation matrix where rows correspond to one set of columns,
95+
* columns to the other set, and each cell contains the correlation coefficient
96+
* between the respective pair of columns.
10897
*
10998
* Use the following methods to perform the computation:
110-
* - [with { columnsSelector }][with] – selects a second set of columns and computes correlations between
111-
* the initially selected columns and this second set.
112-
* - [withItself()][withItself] - computes correlations within the initially selected columns.
99+
* - [with] — selects a second set of columns and computes correlations between
100+
* the initially selected columns and this second set.
101+
* - [withItself] — computes pairwise correlations within the initially selected columns.
113102
*
114103
* See [Grammar][CorrDocs.Grammar] for more details.
115104
*/
116105
public data class Corr<T, C>(internal val df: DataFrame<T>, internal val columns: ColumnsSelector<T, C>)
117106

107+
/**
108+
* Computes the correlation matrix between all suitable columns in this [DataFrame],
109+
* including nested columns at any depth.
110+
*
111+
* The result is a square correlation matrix represented as a [DataFrame],
112+
* where both rows and columns correspond to the original columns,
113+
* and each cell contains the correlation coefficient between the respective pair of columns.
114+
*
115+
* Only columns suitable for correlation (e.g., numeric types) are included in the result.
116+
*
117+
* For more information, see: {@include [DocumentationUrls.Corr]}
118+
*
119+
* @return A square correlation matrix as a [DataFrame], where both rows and columns correspond to the original columns.
120+
*/
118121
public fun <T> DataFrame<T>.corr(): DataFrame<T> =
119122
corr {
120123
colsAtAnyDepth().filter { it.isSuitableForCorr() }
121124
}.withItself()
122125

126+
/**
127+
* {@include [CommonCorrDocs]}
128+
* @include [SelectingColumns.Dsl] {@include [SetCorrOperationArg]}
129+
*
130+
* ### Examples
131+
* ```kotlin
132+
* // Compute correlations between the "age" column and the "weight" and "height" columns
133+
* df.corr { age }.with { weight and height }
134+
*
135+
* // Compute pairwise correlations between all columns of type `Number`
136+
* df.corr { colsOf<Number>() }.withItself()
137+
* ```
138+
* @param [columns\] The [Columns Selector][ColumnsSelector] used to select the columns
139+
* of this [DataFrame] to compute a correlation.
140+
* @return A [Corr] intermediate object with the selected columns.
141+
*/
123142
public fun <T, C> DataFrame<T>.corr(columns: ColumnsSelector<T, C>): Corr<T, C> = Corr(this, columns)
124143

144+
/**
145+
* {@include [CommonCorrDocs]}
146+
* @include [SelectingColumns.ColumnNames] {@include [SetCorrOperationArg]}
147+
*
148+
* ### Examples
149+
* ```kotlin
150+
* // Compute correlations between the "age" column and the "weight" and "height" columns
151+
* df.corr { age }.with { weight and height }
152+
*
153+
* // Compute pairwise correlations between all columns of type `Number`
154+
* df.corr { colsOf<Number>() }.withItself()
155+
* ```
156+
* @param [columns\] The [Column Names][String] used to select the columns
157+
* of this [DataFrame] to compute a correlation.
158+
* @return A [Corr] intermediate object with the selected columns.
159+
*/
125160
public fun <T> DataFrame<T>.corr(vararg columns: String): Corr<T, Any?> = corr { columns.toColumnSet() }
126161

127162
@Deprecated(DEPRECATED_ACCESS_API)
@@ -132,8 +167,67 @@ public fun <T, C> DataFrame<T>.corr(vararg columns: KProperty<C>): Corr<T, C> =
132167
@AccessApiOverload
133168
public fun <T, C> DataFrame<T>.corr(vararg columns: ColumnReference<C>): Corr<T, C> = corr { columns.toColumnSet() }
134169

170+
/**
171+
* Calculates the correlation of specified [columns][otherColumns]
172+
* with values in the columns previously selected with [corr].
173+
*
174+
* Returns a correlation matrix represented as a [DataFrame],
175+
* where rows and columns correspond to the selected column sets,
176+
* and each cell contains the correlation coefficient between the corresponding pair of columns.
177+
*
178+
* Check out [Grammar].
179+
*
180+
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
181+
*
182+
* See also: [Selecting Columns][SelectingOptions].
183+
*
184+
* For more information, see: {@include [DocumentationUrls.Corr]}
185+
*/
186+
internal interface CorrWithDocs
187+
188+
/**
189+
* {@include [CorrWithDocs]}
190+
* ### This Corr With Overload
191+
*/
192+
@ExcludeFromSources
193+
private interface CommonCorrWithDocs
194+
195+
/**
196+
* {@include [CommonCorrWithDocs]}
197+
* @include [SelectingColumns.Dsl] {@include [SetCorrOperationArg]}
198+
*
199+
* ### Examples
200+
* ```kotlin
201+
* // Compute correlations between the "age" column and the "weight" and "height" columns
202+
* df.corr { age }.with { weight and height }
203+
*
204+
* // Compute correlations between the "speed" column and all columns of type `Double` (excluding itself)
205+
* df.corr { speed }.with { colsOf<Double>() except speed }
206+
* ```
207+
*
208+
* @param otherColumns The [ColumnsSelector] used to select the second set of columns
209+
* from this [DataFrame] to compute correlations against the initially selected columns.
210+
* @return A [DataFrame] containing the resulting correlation matrix.
211+
*/
135212
public fun <T, C, R> Corr<T, C>.with(otherColumns: ColumnsSelector<T, R>): DataFrame<T> = corrImpl(otherColumns)
136213

214+
/**
215+
* {@include [CommonCorrWithDocs]}
216+
* @include [SelectingColumns.ColumnNames] {@include [SetCorrOperationArg]}
217+
*
218+
* ### Examples
219+
* ```kotlin
220+
* // Compute correlations between the "age" column and the "weight" and "height" columns
221+
* df.corr("age").with("weight", "height")
222+
*
223+
* // Compute correlations between the "speed" column and all columns of type `Number`
224+
* df.corr { colsOf<Number>() }.with("speed")
225+
* ```
226+
*
227+
* @param otherColumns The [Column Names][String] used to select the second set of columns
228+
* from this [DataFrame] to compute correlations against the initially selected columns.
229+
* @return A [DataFrame] containing the resulting correlation matrix.
230+
*/
137231
public fun <T, C> Corr<T, C>.with(vararg otherColumns: String): DataFrame<T> = with { otherColumns.toColumnSet() }
138232

139233
@Deprecated(DEPRECATED_ACCESS_API)
@@ -146,6 +240,20 @@ public fun <T, C, R> Corr<T, C>.with(vararg otherColumns: KProperty<R>): DataFra
146240
public fun <T, C, R> Corr<T, C>.with(vararg otherColumns: ColumnReference<R>): DataFrame<T> =
147241
with { otherColumns.toColumnSet() }
148242

243+
/**
244+
* Calculates pairwise correlations between the columns
245+
* previously selected with [corr].
246+
*
247+
* Returns a square correlation matrix represented as a [DataFrame],
248+
* where both rows and columns correspond to the selected columns,
249+
* and each cell contains the correlation coefficient between the respective pair of columns.
250+
*
251+
* Check out [Grammar].
252+
*
253+
* For more information, see: {@include [DocumentationUrls.Corr]}
254+
*
255+
* @return A [DataFrame] containing the pairwise correlation matrix.
256+
*/
149257
public fun <T, C> Corr<T, C>.withItself(): DataFrame<T> = with(columns)
150258

151259
// endregion

0 commit comments

Comments
 (0)