@@ -4,10 +4,12 @@ import kotlinx.datetime.Instant
4
4
import kotlinx.datetime.LocalDate
5
5
import kotlinx.datetime.LocalDateTime
6
6
import kotlinx.datetime.LocalTime
7
+ import kotlinx.datetime.format.DateTimeComponents
7
8
import kotlinx.datetime.toKotlinLocalDate
8
9
import kotlinx.datetime.toKotlinLocalDateTime
9
10
import kotlinx.datetime.toKotlinLocalTime
10
11
import org.jetbrains.kotlinx.dataframe.AnyFrame
12
+ import org.jetbrains.kotlinx.dataframe.AnyRow
11
13
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
12
14
import org.jetbrains.kotlinx.dataframe.DataColumn
13
15
import org.jetbrains.kotlinx.dataframe.DataFrame
@@ -32,6 +34,7 @@ import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType
32
34
import org.jetbrains.kotlinx.dataframe.io.isURL
33
35
import org.jetbrains.kotlinx.dataframe.io.readJsonStr
34
36
import org.jetbrains.kotlinx.dataframe.typeClass
37
+ import java.math.BigDecimal
35
38
import java.net.URL
36
39
import java.text.NumberFormat
37
40
import java.text.ParsePosition
@@ -55,10 +58,17 @@ internal interface StringParser<T> {
55
58
56
59
fun applyOptions (options : ParserOptions ? ): (String ) -> T ?
57
60
61
+ /* * If a parser with one of these types is run, this parser can be skipped. */
62
+ val coveredBy: Collection <KType >
63
+
58
64
val type: KType
59
65
}
60
66
61
- internal open class DelegatedStringParser <T >(override val type : KType , val handle : (String ) -> T ? ) : StringParser<T> {
67
+ internal open class DelegatedStringParser <T >(
68
+ override val type : KType ,
69
+ override val coveredBy : Collection <KType >,
70
+ val handle : (String ) -> T ? ,
71
+ ) : StringParser<T> {
62
72
override fun toConverter (options : ParserOptions ? ): TypeConverter {
63
73
val nulls = options?.nullStrings ? : Parsers .nulls
64
74
return {
@@ -76,6 +86,7 @@ internal open class DelegatedStringParser<T>(override val type: KType, val handl
76
86
77
87
internal class StringParserWithFormat <T >(
78
88
override val type : KType ,
89
+ override val coveredBy : Collection <KType >,
79
90
val getParser : (ParserOptions ? ) -> ((String ) -> T ? ),
80
91
) : StringParser<T> {
81
92
override fun toConverter (options : ParserOptions ? ): TypeConverter {
@@ -219,21 +230,23 @@ internal object Parsers : GlobalParserOptions {
219
230
}
220
231
}
221
232
222
- inline fun <reified T : Any > stringParser (catch : Boolean = false, noinline body : (String ) -> T ? ): StringParser <T > =
233
+ inline fun <reified T : Any > stringParser (
234
+ catch : Boolean = false,
235
+ coveredBy : Set <KType > = emptySet(),
236
+ noinline body : (String ) -> T ? ,
237
+ ): StringParser <T > =
223
238
if (catch ) {
224
- DelegatedStringParser (typeOf<T >()) {
225
- try {
226
- body(it)
227
- } catch (e: Throwable ) {
228
- null
229
- }
239
+ DelegatedStringParser (typeOf<T >(), coveredBy) {
240
+ catchSilent { body(it) }
230
241
}
231
242
} else {
232
- DelegatedStringParser (typeOf<T >(), body)
243
+ DelegatedStringParser (typeOf<T >(), coveredBy, body)
233
244
}
234
245
235
- inline fun <reified T : Any > stringParserWithOptions (noinline body : (ParserOptions ? ) -> ((String ) -> T ? )) =
236
- StringParserWithFormat (typeOf<T >(), body)
246
+ inline fun <reified T : Any > stringParserWithOptions (
247
+ coveredBy : Set <KType > = emptySet(),
248
+ noinline body : (ParserOptions ? ) -> ((String ) -> T ? ),
249
+ ): StringParserWithFormat <T > = StringParserWithFormat (typeOf<T >(), coveredBy, body)
237
250
238
251
private val parserToDoubleWithOptions = stringParserWithOptions { options ->
239
252
val numberFormat = NumberFormat .getInstance(options?.locale ? : Locale .getDefault())
@@ -243,69 +256,91 @@ internal object Parsers : GlobalParserOptions {
243
256
244
257
private val parsersOrder = listOf (
245
258
// Int
246
- stringParser { it.toIntOrNull() },
259
+ stringParser< Int > { it.toIntOrNull() },
247
260
// Long
248
- stringParser { it.toLongOrNull() },
261
+ stringParser< Long > { it.toLongOrNull() },
249
262
// kotlinx.datetime.Instant
250
- stringParser { catchSilent { Instant .parse(it) } },
251
- // java.time.Instant
252
- stringParser { catchSilent { JavaInstant .parse(it) } },
263
+ stringParser<Instant >(catch = true ) {
264
+ // same as Instant.parse(it), but with one fewer potential exception thrown/caught
265
+ val format = DateTimeComponents .Formats .ISO_DATE_TIME_OFFSET
266
+ format.parse(it).toInstantUsingOffset()
267
+ },
268
+ // java.time.Instant, will be skipped if kotlinx.datetime.Instant is already checked
269
+ stringParser<JavaInstant >(catch = true , coveredBy = setOf (typeOf<Instant >())) { JavaInstant .parse(it) },
253
270
// kotlinx.datetime.LocalDateTime
254
- stringParserWithOptions { options ->
271
+ stringParserWithOptions< LocalDateTime > { options ->
255
272
val formatter = options?.getDateTimeFormatter()
256
273
val parser = { it: String -> it.toLocalDateTimeOrNull(formatter) }
257
274
parser
258
275
},
259
- // java.time.LocalDateTime
260
- stringParserWithOptions { options ->
276
+ // java.time.LocalDateTime, will be skipped if kotlinx.datetime.LocalDateTime is already checked
277
+ stringParserWithOptions< JavaLocalDateTime >(coveredBy = setOf (typeOf< LocalDateTime >())) { options ->
261
278
val formatter = options?.getDateTimeFormatter()
262
279
val parser = { it: String -> it.toJavaLocalDateTimeOrNull(formatter) }
263
280
parser
264
281
},
265
282
// kotlinx.datetime.LocalDate
266
- stringParserWithOptions { options ->
283
+ stringParserWithOptions< LocalDate > { options ->
267
284
val formatter = options?.getDateTimeFormatter()
268
285
val parser = { it: String -> it.toLocalDateOrNull(formatter) }
269
286
parser
270
287
},
271
- // java.time.LocalDate
272
- stringParserWithOptions { options ->
288
+ // java.time.LocalDate, will be skipped if kotlinx.datetime.LocalDate is already checked
289
+ stringParserWithOptions< JavaLocalDate >(coveredBy = setOf (typeOf< LocalDate >())) { options ->
273
290
val formatter = options?.getDateTimeFormatter()
274
291
val parser = { it: String -> it.toJavaLocalDateOrNull(formatter) }
275
292
parser
276
293
},
277
294
// kotlin.time.Duration
278
- stringParser { catchSilent { Duration .parse(it) } },
279
- // java.time.Duration
280
- stringParser { catchSilent { JavaDuration .parse(it) } },
295
+ stringParser< Duration >( catch = true ) { Duration .parse(it) },
296
+ // java.time.Duration, will be skipped if kotlin.time.Duration is already checked
297
+ stringParser< JavaDuration >( catch = true , coveredBy = setOf (typeOf< Duration >())) { JavaDuration .parse(it) },
281
298
// kotlinx.datetime.LocalTime
282
- stringParserWithOptions { options ->
299
+ stringParserWithOptions< LocalTime > { options ->
283
300
val formatter = options?.getDateTimeFormatter()
284
301
val parser = { it: String -> it.toLocalTimeOrNull(formatter) }
285
302
parser
286
303
},
287
- // java.time.LocalTime
288
- stringParserWithOptions { options ->
304
+ // java.time.LocalTime, will be skipped if kotlinx.datetime.LocalTime is already checked
305
+ stringParserWithOptions< JavaLocalTime >(coveredBy = setOf (typeOf< LocalTime >())) { options ->
289
306
val formatter = options?.getDateTimeFormatter()
290
307
val parser = { it: String -> it.toJavaLocalTimeOrNull(formatter) }
291
308
parser
292
309
},
293
310
// java.net.URL
294
- stringParser { it.toUrlOrNull() },
311
+ stringParser< URL > { it.toUrlOrNull() },
295
312
// Double, with explicit number format or taken from current locale
296
313
parserToDoubleWithOptions,
297
314
// Double, with POSIX format
298
- stringParser { it.parseDouble(NumberFormat .getInstance(Locale .forLanguageTag(" C.UTF-8" ))) },
315
+ stringParser< Double > { it.parseDouble(NumberFormat .getInstance(Locale .forLanguageTag(" C.UTF-8" ))) },
299
316
// Boolean
300
- stringParser { it.toBooleanOrNull() },
317
+ stringParser< Boolean > { it.toBooleanOrNull() },
301
318
// BigDecimal
302
- stringParser { it.toBigDecimalOrNull() },
303
- stringParser(catch = true ) { if (it.startsWith(" [" )) DataFrame .readJsonStr(it) else null },
304
- stringParser(catch = true ) { if (it.startsWith(" {" )) DataFrame .readJsonStr(it).single() else null },
305
- stringParser { it }, // must be last in the list of parsers to return original unparsed string
319
+ stringParser<BigDecimal > { it.toBigDecimalOrNull() },
320
+ // JSON array as DataFrame<*>
321
+ stringParser<AnyFrame >(catch = true ) {
322
+ val trimmed = it.trim()
323
+ if (trimmed.startsWith(" [" ) && trimmed.endsWith(" ]" )) {
324
+ DataFrame .readJsonStr(it)
325
+ } else {
326
+ null
327
+ }
328
+ },
329
+ // JSON object as DataRow<*>
330
+ stringParser<AnyRow >(catch = true ) {
331
+ val trimmed = it.trim()
332
+ if (trimmed.startsWith(" {" ) && trimmed.endsWith(" }" )) {
333
+ DataFrame .readJsonStr(it).single()
334
+ } else {
335
+ null
336
+ }
337
+ },
338
+ // No parser found, return as String
339
+ // must be last in the list of parsers to return original unparsed string
340
+ stringParser<String > { it },
306
341
)
307
342
308
- private val parsersMap = parsersOrder.associateBy { it.type }
343
+ internal val parsersMap = parsersOrder.associateBy { it.type }
309
344
310
345
val size: Int = parsersOrder.size
311
346
@@ -352,49 +387,76 @@ internal object Parsers : GlobalParserOptions {
352
387
}
353
388
}
354
389
390
+ /* *
391
+ * Tries to parse a column of strings into a column of a different type.
392
+ * Each parser in [Parsers] is run in order until a valid parser is found,
393
+ * a.k.a. that parser was able to parse all values in the column successfully. If a parser
394
+ * fails to parse any value, the next parser is tried. If all the others fail, the final parser
395
+ * simply returns the original string, leaving the column unchanged.
396
+ *
397
+ * Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
398
+ *
399
+ * @param options options for parsing, like providing a locale or a custom date-time formatter
400
+ * @throws IllegalStateException if no valid parser is found (unlikely, unless the `String` parser is disabled)
401
+ * @return a new column with parsed values
402
+ */
355
403
internal fun DataColumn<String?>.tryParseImpl (options : ParserOptions ? ): DataColumn <* > {
356
- var parserId = 0
357
- val parsedValues = mutableListOf <Any ?>()
358
- var hasNulls: Boolean
359
- var hasNotNulls: Boolean
360
- var nullStringParsed: Boolean
404
+ val columnSize = size
405
+ val parsedValues = ArrayList <Any ?>(columnSize )
406
+ var hasNulls: Boolean = false
407
+ var hasNotNulls: Boolean = false
408
+ var nullStringParsed: Boolean = false
361
409
val nulls = options?.nullStrings ? : Parsers .nulls
362
- do {
363
- val parser = Parsers [parserId].applyOptions(options)
410
+
411
+ val parsersToCheck = Parsers .parsersMap
412
+ val parserTypesToCheck = parsersToCheck.keys
413
+
414
+ var correctParser: StringParser <* >? = null
415
+ for ((_, parser) in parsersToCheck) {
416
+ if (parser.coveredBy.any { it in parserTypesToCheck }) continue
417
+
418
+ val parserWithOptions = parser.applyOptions(options)
364
419
parsedValues.clear()
365
420
hasNulls = false
366
421
hasNotNulls = false
367
422
nullStringParsed = false
368
- for (str in values ) {
423
+ for (str in this ) {
369
424
when {
370
425
str == null -> {
371
- parsedValues.add( null )
426
+ parsedValues + = null
372
427
hasNulls = true
373
428
}
374
429
375
- nulls.contains( str) -> {
376
- parsedValues.add( null )
430
+ str in nulls -> {
431
+ parsedValues + = null
377
432
hasNulls = true
378
433
nullStringParsed = true
379
434
}
380
435
381
436
else -> {
382
437
val trimmed = str.trim()
383
- val res = parser (trimmed)
438
+ val res = parserWithOptions (trimmed)
384
439
if (res == null ) {
385
- parserId++
386
- break
440
+ continue
387
441
}
388
- parsedValues.add( res)
442
+ parsedValues + = res
389
443
hasNotNulls = true
390
444
}
391
445
}
392
446
}
393
- } while (parserId < Parsers .size && parsedValues.size != size)
394
- check(parserId < Parsers .size) { " Valid parser not found" }
395
447
396
- val type = (if (hasNotNulls) Parsers [parserId].type else this .type()).withNullability(hasNulls)
397
- if (type.jvmErasure == String ::class && ! nullStringParsed) return this // nothing parsed
448
+ // break when everything is parsed
449
+ if (parsedValues.size >= columnSize) {
450
+ correctParser = parser
451
+ break
452
+ }
453
+ }
454
+ check(correctParser != null ) { " Valid parser not found" }
455
+
456
+ val type = (if (hasNotNulls) correctParser.type else this .type()).withNullability(hasNulls)
457
+ if (type.jvmErasure == String ::class && ! nullStringParsed) {
458
+ return this // nothing parsed
459
+ }
398
460
return DataColumn .create(name(), parsedValues, type)
399
461
}
400
462
0 commit comments