@@ -13,9 +13,9 @@ import org.jetbrains.kotlinx.dataframe.DataColumn
13
13
import org.jetbrains.kotlinx.dataframe.DataFrame
14
14
import org.jetbrains.kotlinx.dataframe.DataRow
15
15
import org.jetbrains.kotlinx.dataframe.api.cast
16
+ import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
16
17
import org.jetbrains.kotlinx.dataframe.api.getColumn
17
18
import org.jetbrains.kotlinx.dataframe.api.indices
18
- import org.jetbrains.kotlinx.dataframe.api.map
19
19
import org.jetbrains.kotlinx.dataframe.api.mapIndexed
20
20
import org.jetbrains.kotlinx.dataframe.api.name
21
21
import org.jetbrains.kotlinx.dataframe.api.rows
@@ -71,17 +71,25 @@ public fun DataRow.Companion.readJson(stream: InputStream, header: List<String>
71
71
public fun DataFrame.Companion.readJsonStr (text : String , header : List <String > = emptyList()): AnyFrame = readJson(Parser .default().parse(StringBuilder (text)), header)
72
72
public fun DataRow.Companion.readJsonStr (text : String , header : List <String > = emptyList()): AnyRow = DataFrame .readJsonStr(text, header).single()
73
73
74
- private fun readJson (parsed : Any? , header : List <String >) = when (parsed) {
75
- is JsonArray <* > -> fromJsonList(parsed.value, header)
76
- else -> fromJsonList(listOf (parsed))
74
+ private fun readJson (parsed : Any? , header : List <String >): DataFrame <* > {
75
+ val df = when (parsed) {
76
+ is JsonArray <* > -> fromJsonList(parsed.value, header)
77
+ else -> fromJsonList(listOf (parsed))
78
+ }
79
+ return df.unwrapUnnamedColumns()
77
80
}
78
81
82
+ private fun DataFrame<Any?>.unwrapUnnamedColumns () =
83
+ dataFrameOf(columns().map { it.unwrapUnnamedColumn() })
84
+
85
+ private fun AnyCol.unwrapUnnamedColumn () = if (this is UnnamedColumn ) col else this
86
+
79
87
private val arrayColumnName = " array"
80
88
81
89
internal val valueColumnName = " value"
82
90
83
91
internal fun fromJsonList (records : List <* >, header : List <String > = emptyList()): AnyFrame {
84
- fun AnyFrame.isSingleUnnamedColumn () = ncol == 1 && getColumn(0 ).name. let { it == org.jetbrains.kotlinx.dataframe.io.valueColumnName || it == org.jetbrains.kotlinx.dataframe.io.arrayColumnName }
92
+ fun AnyFrame.isSingleUnnamedColumn () = ncol == 1 && getColumn(0 ) is UnnamedColumn
85
93
86
94
var hasPrimitive = false
87
95
var hasArray = false
@@ -108,7 +116,7 @@ internal fun fromJsonList(records: List<*>, header: List<String> = emptyList()):
108
116
109
117
val columns: List <AnyCol > = nameGenerator.names.map { colName ->
110
118
when {
111
- colName == valueColumn -> {
119
+ colName == valueColumn && hasPrimitive -> {
112
120
val collector = createDataCollector(records.size)
113
121
val nanIndices = mutableListOf<Int >()
114
122
records.forEachIndexed { i, v ->
@@ -120,7 +128,7 @@ internal fun fromJsonList(records: List<*>, header: List<String> = emptyList()):
120
128
}
121
129
}
122
130
val column = collector.toColumn(colName)
123
- if (nanIndices.isNotEmpty()) {
131
+ val res = if (nanIndices.isNotEmpty()) {
124
132
fun <C > DataColumn<C>.updateNaNs (nanValue : C ): DataColumn <C > {
125
133
var j = 0
126
134
var nextNanIndex = nanIndices[j]
@@ -139,24 +147,27 @@ internal fun fromJsonList(records: List<*>, header: List<String> = emptyList()):
139
147
else -> column
140
148
}
141
149
} else column
150
+ UnnamedColumn (res)
142
151
}
143
- colName == arrayColumn -> {
152
+ colName == arrayColumn && hasArray -> {
144
153
val values = mutableListOf<Any ?>()
145
154
val startIndices = ArrayList <Int >()
146
155
records.forEach {
147
156
startIndices.add(values.size)
148
157
if (it is JsonArray <* >) values.addAll(it.value)
149
158
}
150
159
val parsed = fromJsonList(values)
151
- when {
160
+
161
+ val res = when {
152
162
parsed.isSingleUnnamedColumn() -> {
153
- val col = parsed.getColumn(0 )
163
+ val col = ( parsed.getColumn(0 ) as UnnamedColumn ).col
154
164
val elementType = col.type
155
165
val values = col.values.asList().splitByIndices(startIndices.asSequence()).toList()
156
166
DataColumn .createValueColumn(colName, values, List ::class .createType(listOf (KTypeProjection .invariant(elementType))))
157
167
}
158
- else -> DataColumn .createFrameColumn(colName, parsed, startIndices)
168
+ else -> DataColumn .createFrameColumn(colName, parsed.unwrapUnnamedColumns() , startIndices)
159
169
}
170
+ UnnamedColumn (res)
160
171
}
161
172
else -> {
162
173
val values = ArrayList <Any ?>(records.size)
@@ -171,19 +182,26 @@ internal fun fromJsonList(records: List<*>, header: List<String> = emptyList()):
171
182
val parsed = fromJsonList(values)
172
183
when {
173
184
parsed.ncol == 0 -> DataColumn .createValueColumn(colName, arrayOfNulls<Any ?>(values.size).toList(), typeOf<Any ?>())
174
- parsed.isSingleUnnamedColumn() -> parsed.getColumn(0 ).rename(colName)
175
- else -> DataColumn .createColumnGroup(colName, parsed) as AnyCol
185
+ parsed.isSingleUnnamedColumn() -> ( parsed.getColumn(0 ) as UnnamedColumn ).col .rename(colName)
186
+ else -> DataColumn .createColumnGroup(colName, parsed.unwrapUnnamedColumns() ) as AnyCol
176
187
}
177
188
}
178
189
}
179
190
}
191
+
180
192
return when {
181
193
columns.isEmpty() -> DataFrame .empty(records.size)
182
194
columns.size == 1 && hasArray && header.isNotEmpty() && columns[0 ].typeClass == List ::class -> columns[0 ].cast<List <* >>().splitInto(* header.toTypedArray())
183
195
else -> columns.toDataFrame()
184
196
}
185
197
}
186
198
199
+ // we need it to check if AnyFrame created by recursive call has single unnamed column,
200
+ // unnamed column means this column is not created from field of a record [{"value": 1}, {"value": 2}],
201
+ // but filtered values [1, { ... }, []] -> [1, null, null]
202
+ // or arrays: [1, { ...}, []] -> [null, null, []]
203
+ private class UnnamedColumn (val col : DataColumn <Any ?>) : DataColumn<Any?> by col
204
+
187
205
private val valueTypes = setOf (Boolean ::class , Double ::class , Int ::class , Float ::class , Long ::class , Short ::class , Byte ::class )
188
206
189
207
internal fun KlaxonJson.encodeRow (frame : ColumnsContainer <* >, index : Int ): JsonObject ? {
0 commit comments