Skip to content

Commit a84c30c

Browse files
Merge pull request #1082 from Kotlin/dynamic_df_builder
DynamicDataFrameBuilder improvements
2 parents e760eea + a65a5c9 commit a84c30c

File tree

5 files changed

+182
-26
lines changed

5 files changed

+182
-26
lines changed

core/api/core.api

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2200,7 +2200,10 @@ public final class org/jetbrains/kotlinx/dataframe/api/DuplicateKt {
22002200

22012201
public final class org/jetbrains/kotlinx/dataframe/api/DynamicDataFrameBuilder {
22022202
public fun <init> ()V
2203+
public fun <init> (Z)V
2204+
public synthetic fun <init> (ZILkotlin/jvm/internal/DefaultConstructorMarker;)V
22032205
public final fun add (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Ljava/lang/String;
2206+
public final fun get (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
22042207
public final fun toDataFrame ()Lorg/jetbrains/kotlinx/dataframe/DataFrame;
22052208
}
22062209

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -413,29 +413,78 @@ public class DataFrameBuilder(private val header: List<String>) {
413413
}
414414

415415
/**
416-
* Helper class for implementing operations when column names can be potentially duplicated.
417-
* For example, operations involving multiple dataframes, computed columns or parsing some third-party data
416+
* A builder class for dynamically constructing a DataFrame with provided columns.
417+
* Allows adding columns manually while automatically handling duplicate column names by assigning unique names.
418+
*
419+
* @property checkDuplicateValues Whether to check for duplicate column (with identical names and values)
420+
* when adding new columns. `true` by default.
418421
*/
419-
public class DynamicDataFrameBuilder {
420-
private var cols: MutableList<AnyCol> = mutableListOf()
422+
public class DynamicDataFrameBuilder(private val checkDuplicateValues: Boolean = true) {
423+
private var cols: MutableMap<String, AnyCol> = mutableMapOf()
421424
private val generator = ColumnNameGenerator()
422425

426+
/**
427+
* Adds a column to the builder, ensuring its name is unique.
428+
*
429+
* - If a column with the same name already exists, the new column is renamed to a unique name.
430+
* - If [checkDuplicateValues] is `true`, the method checks whether the new column has identical values
431+
* to an existing column with the same name. If the values match, the column is not added.
432+
*
433+
* @param col The column to add to the DataFrame builder.
434+
* @return The final unique name assigned to the column.
435+
*/
423436
public fun add(col: AnyCol): String {
424-
val uniqueName = if (col.name().isEmpty()) {
437+
val originalName = col.name()
438+
if (checkDuplicateValues && generator.contains(originalName)) {
439+
if (cols[originalName] == col) return originalName
440+
}
441+
val uniqueName = if (originalName.isEmpty()) {
425442
generator.addUnique(UNNAMED_COLUMN_PREFIX)
426443
} else {
427-
generator.addUnique(col.name())
444+
generator.addUnique(originalName)
428445
}
429-
val renamed = if (uniqueName != col.name()) {
446+
val renamed = if (uniqueName != originalName) {
430447
col.rename(uniqueName)
431448
} else {
432449
col
433450
}
434-
cols.add(renamed)
451+
cols.put(uniqueName, renamed)
435452
return uniqueName
436453
}
437454

438-
public fun toDataFrame(): DataFrame<*> = dataFrameOf(cols)
455+
/**
456+
* Adds a column to the builder from the given iterable of values, ensuring the column's name is unique.
457+
*
458+
* The method automatically converts the given iterable into a column using the specified or default name
459+
* and infers the type of the column's elements.
460+
*
461+
* - If a column with the same name already exists, the new column is renamed to a unique name.
462+
* - If the [checkDuplicateValues] property of the builder is `true`, the method checks whether the new column
463+
* has identical values to an existing column with the same name. If the values match, the column is not added.
464+
*
465+
* @param T The inferred type of the elements in the column.
466+
* @param values The iterable collection of values to be added as a new column.
467+
* @param name The name of the new column. If empty, a unique name will be generated automatically.
468+
* @return The final unique name assigned to the column.
469+
*/
470+
public inline fun <reified T> add(values: Iterable<T>, name: String = ""): String =
471+
add(values.toColumn(name, Infer.Type))
472+
473+
/**
474+
* Retrieves a column from the builder by its name.
475+
*
476+
* @param column The name of the column to retrieve.
477+
* @return The column corresponding to the specified name, or `null` if no such column exists.
478+
*/
479+
public fun get(column: String): AnyCol? = cols[column]
480+
481+
/**
482+
* Converts the current `DynamicDataFrameBuilder` instance into a `DataFrame`.
483+
* The resulting `DataFrame` is constructed from the columns stored in the builder.
484+
*
485+
* @return A `DataFrame` containing the columns defined in the `DynamicDataFrameBuilder`.
486+
*/
487+
public fun toDataFrame(): DataFrame<*> = cols.values.toDataFrame()
439488
}
440489

441490
/**

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,39 @@ class ConstructorsTests {
3131
@Test
3232
fun `duplicated name`() {
3333
val builder = DynamicDataFrameBuilder()
34-
val column by columnOf(1, 2, 3)
35-
builder.add(column)
36-
builder.add(column)
34+
val columnName = "columnName"
35+
val columnA = columnOf(1, 2, 3) named columnName
36+
val columnB = columnOf(4, 5, 6) named columnName
37+
builder.add(columnA)
38+
builder.add(columnB)
3739
val df = builder.toDataFrame()
3840
df.columnsCount() shouldBe 2
39-
df.columnNames() shouldBe listOf(column.name(), "${column.name()}1")
41+
df.columnNames() shouldBe listOf(columnName, "${columnName}1")
42+
}
43+
44+
@Test
45+
fun `get by new name`() {
46+
val builder = DynamicDataFrameBuilder()
47+
val columnName = "columnName"
48+
val columnA = columnOf(1, 2, 3) named columnName
49+
val columnB = columnOf(4, 5, 6) named columnName
50+
builder.add(columnA)
51+
val newName = builder.add(columnB)
52+
builder.get(newName)!!.values shouldBe columnB.values
53+
}
54+
55+
@Test
56+
fun `duplicated column`() {
57+
val builder = DynamicDataFrameBuilder()
58+
val columnName = "columnName"
59+
val columnA = columnOf(1, 2, 3) named columnName
60+
val columnB = columnOf(4, 5, 6) named columnName
61+
builder.add(columnA)
62+
builder.add(columnB)
63+
builder.add(columnA)
64+
val df = builder.toDataFrame()
65+
df.columnsCount() shouldBe 2
66+
df.columnNames() shouldBe listOf(columnName, "${columnName}1")
4067
}
4168

4269
@Test

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt

Lines changed: 59 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -413,29 +413,79 @@ public class DataFrameBuilder(private val header: List<String>) {
413413
}
414414

415415
/**
416-
* Helper class for implementing operations when column names can be potentially duplicated.
417-
* For example, operations involving multiple dataframes, computed columns or parsing some third-party data
416+
* A builder class for dynamically constructing a DataFrame with provided columns.
417+
* Allows adding columns manually while automatically handling duplicate column names by assigning unique names.
418+
*
419+
* @property checkDuplicateValues Whether to check for duplicate column (with identical names and values). If `true`,
420+
* doesn't add a new column if the identical one is already in the builder.
421+
* when adding new columns. `true` by default.
418422
*/
419-
public class DynamicDataFrameBuilder {
420-
private var cols: MutableList<AnyCol> = mutableListOf()
423+
public class DynamicDataFrameBuilder(private val checkDuplicateValues: Boolean = true) {
424+
private var cols: MutableMap<String, AnyCol> = mutableMapOf()
421425
private val generator = ColumnNameGenerator()
422426

427+
/**
428+
* Adds a column to the builder, ensuring its name is unique.
429+
*
430+
* - If a column with the same name already exists, the new column is renamed to a unique name.
431+
* - If [checkDuplicateValues] is `true`, the method checks whether the new column has identical values
432+
* to an existing column with the same name. If the values match, the column is not added.
433+
*
434+
* @param col The column to add to the DataFrame builder.
435+
* @return The final unique name assigned to the column.
436+
*/
423437
public fun add(col: AnyCol): String {
424-
val uniqueName = if (col.name().isEmpty()) {
438+
val originalName = col.name()
439+
if (checkDuplicateValues && generator.contains(originalName)) {
440+
if (cols[originalName] == col) return originalName
441+
}
442+
val uniqueName = if (originalName.isEmpty()) {
425443
generator.addUnique(UNNAMED_COLUMN_PREFIX)
426444
} else {
427-
generator.addUnique(col.name())
445+
generator.addUnique(originalName)
428446
}
429-
val renamed = if (uniqueName != col.name()) {
447+
val renamed = if (uniqueName != originalName) {
430448
col.rename(uniqueName)
431449
} else {
432450
col
433451
}
434-
cols.add(renamed)
452+
cols.put(uniqueName, renamed)
435453
return uniqueName
436454
}
437455

438-
public fun toDataFrame(): DataFrame<*> = dataFrameOf(cols)
456+
/**
457+
* Adds a column to the builder from the given iterable of values, ensuring the column's name is unique.
458+
*
459+
* The method automatically converts the given iterable into a column using the specified or default name
460+
* and infers the type of the column's elements.
461+
*
462+
* - If a column with the same name already exists, the new column is renamed to a unique name.
463+
* - If the [checkDuplicateValues] property of the builder is `true`, the method checks whether the new column
464+
* has identical values to an existing column with the same name. If the values match, the column is not added.
465+
*
466+
* @param T The inferred type of the elements in the column.
467+
* @param values The iterable collection of values to be added as a new column.
468+
* @param name The name of the new column. If empty, a unique name will be generated automatically.
469+
* @return The final unique name assigned to the column.
470+
*/
471+
public inline fun <reified T> add(values: Iterable<T>, name: String = ""): String =
472+
add(values.toColumn(name, Infer.Type))
473+
474+
/**
475+
* Retrieves a column from the builder by its name.
476+
*
477+
* @param column The name of the column to retrieve.
478+
* @return The column corresponding to the specified name, or `null` if no such column exists.
479+
*/
480+
public fun get(column: String): AnyCol? = cols[column]
481+
482+
/**
483+
* Converts the current [DynamicDataFrameBuilder] instance into a [DataFrame].
484+
* The resulting [DataFrame] is constructed from the columns stored in the builder.
485+
*
486+
* @return A [DataFrame] containing the columns defined in the [DynamicDataFrameBuilder].
487+
*/
488+
public fun toDataFrame(): DataFrame<*> = cols.values.toDataFrame()
439489
}
440490

441491
/**

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,39 @@ class ConstructorsTests {
3131
@Test
3232
fun `duplicated name`() {
3333
val builder = DynamicDataFrameBuilder()
34-
val column by columnOf(1, 2, 3)
35-
builder.add(column)
36-
builder.add(column)
34+
val columnName = "columnName"
35+
val columnA = columnOf(1, 2, 3) named columnName
36+
val columnB = columnOf(4, 5, 6) named columnName
37+
builder.add(columnA)
38+
builder.add(columnB)
3739
val df = builder.toDataFrame()
3840
df.columnsCount() shouldBe 2
39-
df.columnNames() shouldBe listOf(column.name(), "${column.name()}1")
41+
df.columnNames() shouldBe listOf(columnName, "${columnName}1")
42+
}
43+
44+
@Test
45+
fun `get by new name`() {
46+
val builder = DynamicDataFrameBuilder()
47+
val columnName = "columnName"
48+
val columnA = columnOf(1, 2, 3) named columnName
49+
val columnB = columnOf(4, 5, 6) named columnName
50+
builder.add(columnA)
51+
val newName = builder.add(columnB)
52+
builder.get(newName)!!.values shouldBe columnB.values
53+
}
54+
55+
@Test
56+
fun `duplicated column`() {
57+
val builder = DynamicDataFrameBuilder()
58+
val columnName = "columnName"
59+
val columnA = columnOf(1, 2, 3) named columnName
60+
val columnB = columnOf(4, 5, 6) named columnName
61+
builder.add(columnA)
62+
builder.add(columnB)
63+
builder.add(columnA)
64+
val df = builder.toDataFrame()
65+
df.columnsCount() shouldBe 2
66+
df.columnNames() shouldBe listOf(columnName, "${columnName}1")
4067
}
4168

4269
@Test

0 commit comments

Comments
 (0)