Skip to content

Commit 675cd69

Browse files
committed
Improve dataframe sorting in KTNB UI by handling non-comparable columns
Enhanced the sorting function to handle columns with non-comparable types by converting them to their string representation.
1 parent d60b5de commit 675cd69

File tree

2 files changed

+158
-34
lines changed

2 files changed

+158
-34
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt

Lines changed: 79 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.jupyter
33
import org.jetbrains.kotlinx.dataframe.AnyCol
44
import org.jetbrains.kotlinx.dataframe.AnyFrame
55
import org.jetbrains.kotlinx.dataframe.AnyRow
6+
import org.jetbrains.kotlinx.dataframe.DataRow
67
import org.jetbrains.kotlinx.dataframe.api.Convert
78
import org.jetbrains.kotlinx.dataframe.api.FormatClause
89
import org.jetbrains.kotlinx.dataframe.api.FormattedFrame
@@ -25,13 +26,20 @@ import org.jetbrains.kotlinx.dataframe.api.Update
2526
import org.jetbrains.kotlinx.dataframe.api.at
2627
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
2728
import org.jetbrains.kotlinx.dataframe.api.frames
29+
import org.jetbrains.kotlinx.dataframe.api.getColumn
30+
import org.jetbrains.kotlinx.dataframe.api.getValueOrNull
2831
import org.jetbrains.kotlinx.dataframe.api.into
29-
import org.jetbrains.kotlinx.dataframe.api.sortBy
32+
import org.jetbrains.kotlinx.dataframe.api.sortWith
3033
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
3134
import org.jetbrains.kotlinx.dataframe.api.values
3235
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
33-
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
3436
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
37+
import org.jetbrains.kotlinx.dataframe.type
38+
import kotlin.collections.sortWith
39+
import kotlin.reflect.KType
40+
import kotlin.reflect.full.isSubtypeOf
41+
import kotlin.reflect.typeOf
42+
import kotlin.toString
3543

3644
/**
3745
* A class with utility methods for Kotlin Notebook Plugin integration.
@@ -62,6 +70,7 @@ public object KotlinNotebookPluginUtils {
6270

6371
/**
6472
* Sorts a dataframe-like object by multiple columns.
73+
* If a column type is not comparable, sorting by string representation is applied instead.
6574
*
6675
* @param dataFrameLike The dataframe-like object to sort.
6776
* @param columnPaths The list of columns to sort by. Each element in the list represents a column path
@@ -79,27 +88,80 @@ public object KotlinNotebookPluginUtils {
7988
}
8089

8190
/**
82-
* Sorts the given data frame by the specified columns.
91+
* Sorts a dataframe by multiple columns with specified sorting order for each column.
92+
* If a column type is not comparable, sorting by string representation is applied instead.
8393
*
84-
* @param df The data frame to be sorted.
85-
* @param columnPaths The paths of the columns to be sorted. Each path is represented as a list of strings.
86-
* @param isDesc A list of booleans indicating whether each column should be sorted in descending order.
87-
* The size of this list must be equal to the size of the columnPaths list.
88-
* @return The sorted data frame.
94+
* @param df The dataframe to be sorted.
95+
* @param columnPaths A list of column paths where each path is a list of strings representing the hierarchical path of the column.
96+
* @param isDesc A list of boolean values indicating whether each column should be sorted in descending order;
97+
* true for descending, false for ascending. The size of this list should match the size of `columnPaths`.
98+
* @return The sorted dataframe.
8999
*/
90-
public fun sortByColumns(df: AnyFrame, columnPaths: List<List<String>>, isDesc: List<Boolean>): AnyFrame =
91-
df.sortBy {
92-
require(columnPaths.all { it.isNotEmpty() })
93-
require(columnPaths.size == isDesc.size)
100+
public fun sortByColumns(df: AnyFrame, columnPaths: List<List<String>>, isDesc: List<Boolean>): AnyFrame {
101+
require(columnPaths.all { it.isNotEmpty() })
102+
require(columnPaths.size == isDesc.size)
103+
104+
val sortKeys = columnPaths.map { path ->
105+
ColumnPath(path)
106+
}
107+
108+
val comparator = createComparator(sortKeys, isDesc)
94109

95-
val sortKeys = columnPaths.map { path ->
96-
ColumnPath(path)
110+
return df.sortWith(comparator)
111+
}
112+
113+
private fun createComparator(sortKeys: List<ColumnPath>, isDesc: List<Boolean>): Comparator<DataRow<*>> {
114+
return Comparator { row1, row2 ->
115+
for ((key, desc) in sortKeys.zip(isDesc)) {
116+
val comparisonResult = if (row1.df().getColumn(key).type.isComparable()) {
117+
compareComparableValues(row1, row2, key, desc)
118+
} else {
119+
compareStringValues(row1, row2, key, desc)
120+
}
121+
// If a comparison result is non-zero, we have resolved the ordering
122+
if (comparisonResult != 0) return@Comparator comparisonResult
97123
}
124+
// All comparisons are equal
125+
0
126+
}
127+
}
98128

99-
(sortKeys zip isDesc).map { (key, desc) ->
100-
if (desc) key.desc() else key
101-
}.toColumnSet()
129+
@Suppress("UNCHECKED_CAST")
130+
private fun compareComparableValues(
131+
row1: DataRow<*>,
132+
row2: DataRow<*>,
133+
key: ColumnPath,
134+
desc: Boolean,
135+
): Int {
136+
val firstValue = row1.getValueOrNull(key) as Comparable<Any?>?
137+
val secondValue = row2.getValueOrNull(key) as Comparable<Any?>?
138+
139+
return when {
140+
firstValue == null && secondValue == null -> 0
141+
firstValue == null -> if (desc) 1 else -1
142+
secondValue == null -> if (desc) -1 else 1
143+
desc -> secondValue.compareTo(firstValue)
144+
else -> firstValue.compareTo(secondValue)
102145
}
146+
}
147+
148+
private fun compareStringValues(
149+
row1: DataRow<*>,
150+
row2: DataRow<*>,
151+
key: ColumnPath,
152+
desc: Boolean,
153+
): Int {
154+
val firstValue = (row1.getValueOrNull(key)?.toString() ?: "")
155+
val secondValue = (row2.getValueOrNull(key)?.toString() ?: "")
156+
157+
return if (desc) {
158+
secondValue.compareTo(firstValue)
159+
} else {
160+
firstValue.compareTo(secondValue)
161+
}
162+
}
163+
164+
private fun KType.isComparable(): Boolean = this.isSubtypeOf(typeOf<Comparable<*>>())
103165

104166
internal fun isDataframeConvertable(dataframeLike: Any?): Boolean =
105167
when (dataframeLike) {

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt

Lines changed: 79 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.jupyter
33
import org.jetbrains.kotlinx.dataframe.AnyCol
44
import org.jetbrains.kotlinx.dataframe.AnyFrame
55
import org.jetbrains.kotlinx.dataframe.AnyRow
6+
import org.jetbrains.kotlinx.dataframe.DataRow
67
import org.jetbrains.kotlinx.dataframe.api.Convert
78
import org.jetbrains.kotlinx.dataframe.api.FormatClause
89
import org.jetbrains.kotlinx.dataframe.api.FormattedFrame
@@ -25,13 +26,20 @@ import org.jetbrains.kotlinx.dataframe.api.Update
2526
import org.jetbrains.kotlinx.dataframe.api.at
2627
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
2728
import org.jetbrains.kotlinx.dataframe.api.frames
29+
import org.jetbrains.kotlinx.dataframe.api.getColumn
30+
import org.jetbrains.kotlinx.dataframe.api.getValueOrNull
2831
import org.jetbrains.kotlinx.dataframe.api.into
29-
import org.jetbrains.kotlinx.dataframe.api.sortBy
32+
import org.jetbrains.kotlinx.dataframe.api.sortWith
3033
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
3134
import org.jetbrains.kotlinx.dataframe.api.values
3235
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
33-
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
3436
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
37+
import org.jetbrains.kotlinx.dataframe.type
38+
import kotlin.collections.sortWith
39+
import kotlin.reflect.KType
40+
import kotlin.reflect.full.isSubtypeOf
41+
import kotlin.reflect.typeOf
42+
import kotlin.toString
3543

3644
/**
3745
* A class with utility methods for Kotlin Notebook Plugin integration.
@@ -62,6 +70,7 @@ public object KotlinNotebookPluginUtils {
6270

6371
/**
6472
* Sorts a dataframe-like object by multiple columns.
73+
* If a column type is not comparable, sorting by string representation is applied instead.
6574
*
6675
* @param dataFrameLike The dataframe-like object to sort.
6776
* @param columnPaths The list of columns to sort by. Each element in the list represents a column path
@@ -79,27 +88,80 @@ public object KotlinNotebookPluginUtils {
7988
}
8089

8190
/**
82-
* Sorts the given data frame by the specified columns.
91+
* Sorts a dataframe by multiple columns with specified sorting order for each column.
92+
* If a column type is not comparable, sorting by string representation is applied instead.
8393
*
84-
* @param df The data frame to be sorted.
85-
* @param columnPaths The paths of the columns to be sorted. Each path is represented as a list of strings.
86-
* @param isDesc A list of booleans indicating whether each column should be sorted in descending order.
87-
* The size of this list must be equal to the size of the columnPaths list.
88-
* @return The sorted data frame.
94+
* @param df The dataframe to be sorted.
95+
* @param columnPaths A list of column paths where each path is a list of strings representing the hierarchical path of the column.
96+
* @param isDesc A list of boolean values indicating whether each column should be sorted in descending order;
97+
* true for descending, false for ascending. The size of this list should match the size of `columnPaths`.
98+
* @return The sorted dataframe.
8999
*/
90-
public fun sortByColumns(df: AnyFrame, columnPaths: List<List<String>>, isDesc: List<Boolean>): AnyFrame =
91-
df.sortBy {
92-
require(columnPaths.all { it.isNotEmpty() })
93-
require(columnPaths.size == isDesc.size)
100+
public fun sortByColumns(df: AnyFrame, columnPaths: List<List<String>>, isDesc: List<Boolean>): AnyFrame {
101+
require(columnPaths.all { it.isNotEmpty() })
102+
require(columnPaths.size == isDesc.size)
103+
104+
val sortKeys = columnPaths.map { path ->
105+
ColumnPath(path)
106+
}
107+
108+
val comparator = createComparator(sortKeys, isDesc)
94109

95-
val sortKeys = columnPaths.map { path ->
96-
ColumnPath(path)
110+
return df.sortWith(comparator)
111+
}
112+
113+
private fun createComparator(sortKeys: List<ColumnPath>, isDesc: List<Boolean>): Comparator<DataRow<*>> {
114+
return Comparator { row1, row2 ->
115+
for ((key, desc) in sortKeys.zip(isDesc)) {
116+
val comparisonResult = if (row1.df().getColumn(key).type.isComparable()) {
117+
compareComparableValues(row1, row2, key, desc)
118+
} else {
119+
compareStringValues(row1, row2, key, desc)
120+
}
121+
// If a comparison result is non-zero, we have resolved the ordering
122+
if (comparisonResult != 0) return@Comparator comparisonResult
97123
}
124+
// All comparisons are equal
125+
0
126+
}
127+
}
98128

99-
(sortKeys zip isDesc).map { (key, desc) ->
100-
if (desc) key.desc() else key
101-
}.toColumnSet()
129+
@Suppress("UNCHECKED_CAST")
130+
private fun compareComparableValues(
131+
row1: DataRow<*>,
132+
row2: DataRow<*>,
133+
key: ColumnPath,
134+
desc: Boolean,
135+
): Int {
136+
val firstValue = row1.getValueOrNull(key) as Comparable<Any?>?
137+
val secondValue = row2.getValueOrNull(key) as Comparable<Any?>?
138+
139+
return when {
140+
firstValue == null && secondValue == null -> 0
141+
firstValue == null -> if (desc) 1 else -1
142+
secondValue == null -> if (desc) -1 else 1
143+
desc -> secondValue.compareTo(firstValue)
144+
else -> firstValue.compareTo(secondValue)
102145
}
146+
}
147+
148+
private fun compareStringValues(
149+
row1: DataRow<*>,
150+
row2: DataRow<*>,
151+
key: ColumnPath,
152+
desc: Boolean,
153+
): Int {
154+
val firstValue = (row1.getValueOrNull(key)?.toString() ?: "")
155+
val secondValue = (row2.getValueOrNull(key)?.toString() ?: "")
156+
157+
return if (desc) {
158+
secondValue.compareTo(firstValue)
159+
} else {
160+
firstValue.compareTo(secondValue)
161+
}
162+
}
163+
164+
private fun KType.isComparable(): Boolean = this.isSubtypeOf(typeOf<Comparable<*>>())
103165

104166
internal fun isDataframeConvertable(dataframeLike: Any?): Boolean =
105167
when (dataframeLike) {

0 commit comments

Comments
 (0)