Skip to content

Commit 0ca3883

Browse files
committed
small logic rewrite for tryParseImpl and added kdocs. StringParsers can now be "covered by" another parser, meaning they will be skipped if the other parser is run. parsersOrder was also cleaned up a tiny bit
1 parent feb491b commit 0ca3883

File tree

1 file changed

+117
-55
lines changed
  • core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api

1 file changed

+117
-55
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

Lines changed: 117 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@ import kotlinx.datetime.Instant
44
import kotlinx.datetime.LocalDate
55
import kotlinx.datetime.LocalDateTime
66
import kotlinx.datetime.LocalTime
7+
import kotlinx.datetime.format.DateTimeComponents
78
import kotlinx.datetime.toKotlinLocalDate
89
import kotlinx.datetime.toKotlinLocalDateTime
910
import kotlinx.datetime.toKotlinLocalTime
1011
import org.jetbrains.kotlinx.dataframe.AnyFrame
12+
import org.jetbrains.kotlinx.dataframe.AnyRow
1113
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
1214
import org.jetbrains.kotlinx.dataframe.DataColumn
1315
import org.jetbrains.kotlinx.dataframe.DataFrame
@@ -32,6 +34,7 @@ import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType
3234
import org.jetbrains.kotlinx.dataframe.io.isURL
3335
import org.jetbrains.kotlinx.dataframe.io.readJsonStr
3436
import org.jetbrains.kotlinx.dataframe.typeClass
37+
import java.math.BigDecimal
3538
import java.net.URL
3639
import java.text.NumberFormat
3740
import java.text.ParsePosition
@@ -55,10 +58,17 @@ internal interface StringParser<T> {
5558

5659
fun applyOptions(options: ParserOptions?): (String) -> T?
5760

61+
/** If a parser with one of these types is run, this parser can be skipped. */
62+
val coveredBy: Collection<KType>
63+
5864
val type: KType
5965
}
6066

61-
internal open class DelegatedStringParser<T>(override val type: KType, val handle: (String) -> T?) : StringParser<T> {
67+
internal open class DelegatedStringParser<T>(
68+
override val type: KType,
69+
override val coveredBy: Collection<KType>,
70+
val handle: (String) -> T?,
71+
) : StringParser<T> {
6272
override fun toConverter(options: ParserOptions?): TypeConverter {
6373
val nulls = options?.nullStrings ?: Parsers.nulls
6474
return {
@@ -76,6 +86,7 @@ internal open class DelegatedStringParser<T>(override val type: KType, val handl
7686

7787
internal class StringParserWithFormat<T>(
7888
override val type: KType,
89+
override val coveredBy: Collection<KType>,
7990
val getParser: (ParserOptions?) -> ((String) -> T?),
8091
) : StringParser<T> {
8192
override fun toConverter(options: ParserOptions?): TypeConverter {
@@ -219,21 +230,23 @@ internal object Parsers : GlobalParserOptions {
219230
}
220231
}
221232

222-
inline fun <reified T : Any> stringParser(catch: Boolean = false, noinline body: (String) -> T?): StringParser<T> =
233+
inline fun <reified T : Any> stringParser(
234+
catch: Boolean = false,
235+
coveredBy: Set<KType> = emptySet(),
236+
noinline body: (String) -> T?,
237+
): StringParser<T> =
223238
if (catch) {
224-
DelegatedStringParser(typeOf<T>()) {
225-
try {
226-
body(it)
227-
} catch (e: Throwable) {
228-
null
229-
}
239+
DelegatedStringParser(typeOf<T>(), coveredBy) {
240+
catchSilent { body(it) }
230241
}
231242
} else {
232-
DelegatedStringParser(typeOf<T>(), body)
243+
DelegatedStringParser(typeOf<T>(), coveredBy, body)
233244
}
234245

235-
inline fun <reified T : Any> stringParserWithOptions(noinline body: (ParserOptions?) -> ((String) -> T?)) =
236-
StringParserWithFormat(typeOf<T>(), body)
246+
inline fun <reified T : Any> stringParserWithOptions(
247+
coveredBy: Set<KType> = emptySet(),
248+
noinline body: (ParserOptions?) -> ((String) -> T?),
249+
): StringParserWithFormat<T> = StringParserWithFormat(typeOf<T>(), coveredBy, body)
237250

238251
private val parserToDoubleWithOptions = stringParserWithOptions { options ->
239252
val numberFormat = NumberFormat.getInstance(options?.locale ?: Locale.getDefault())
@@ -243,69 +256,91 @@ internal object Parsers : GlobalParserOptions {
243256

244257
private val parsersOrder = listOf(
245258
// Int
246-
stringParser { it.toIntOrNull() },
259+
stringParser<Int> { it.toIntOrNull() },
247260
// Long
248-
stringParser { it.toLongOrNull() },
261+
stringParser<Long> { it.toLongOrNull() },
249262
// kotlinx.datetime.Instant
250-
stringParser { catchSilent { Instant.parse(it) } },
251-
// java.time.Instant
252-
stringParser { catchSilent { JavaInstant.parse(it) } },
263+
stringParser<Instant>(catch = true) {
264+
// same as Instant.parse(it), but with one fewer potential exception thrown/caught
265+
val format = DateTimeComponents.Formats.ISO_DATE_TIME_OFFSET
266+
format.parse(it).toInstantUsingOffset()
267+
},
268+
// java.time.Instant, will be skipped if kotlinx.datetime.Instant is already checked
269+
stringParser<JavaInstant>(catch = true, coveredBy = setOf(typeOf<Instant>())) { JavaInstant.parse(it) },
253270
// kotlinx.datetime.LocalDateTime
254-
stringParserWithOptions { options ->
271+
stringParserWithOptions<LocalDateTime> { options ->
255272
val formatter = options?.getDateTimeFormatter()
256273
val parser = { it: String -> it.toLocalDateTimeOrNull(formatter) }
257274
parser
258275
},
259-
// java.time.LocalDateTime
260-
stringParserWithOptions { options ->
276+
// java.time.LocalDateTime, will be skipped if kotlinx.datetime.LocalDateTime is already checked
277+
stringParserWithOptions<JavaLocalDateTime>(coveredBy = setOf(typeOf<LocalDateTime>())) { options ->
261278
val formatter = options?.getDateTimeFormatter()
262279
val parser = { it: String -> it.toJavaLocalDateTimeOrNull(formatter) }
263280
parser
264281
},
265282
// kotlinx.datetime.LocalDate
266-
stringParserWithOptions { options ->
283+
stringParserWithOptions<LocalDate> { options ->
267284
val formatter = options?.getDateTimeFormatter()
268285
val parser = { it: String -> it.toLocalDateOrNull(formatter) }
269286
parser
270287
},
271-
// java.time.LocalDate
272-
stringParserWithOptions { options ->
288+
// java.time.LocalDate, will be skipped if kotlinx.datetime.LocalDate is already checked
289+
stringParserWithOptions<JavaLocalDate>(coveredBy = setOf(typeOf<LocalDate>())) { options ->
273290
val formatter = options?.getDateTimeFormatter()
274291
val parser = { it: String -> it.toJavaLocalDateOrNull(formatter) }
275292
parser
276293
},
277294
// kotlin.time.Duration
278-
stringParser { catchSilent { Duration.parse(it) } },
279-
// java.time.Duration
280-
stringParser { catchSilent { JavaDuration.parse(it) } },
295+
stringParser<Duration>(catch = true) { Duration.parse(it) },
296+
// java.time.Duration, will be skipped if kotlin.time.Duration is already checked
297+
stringParser<JavaDuration>(catch = true, coveredBy = setOf(typeOf<Duration>())) { JavaDuration.parse(it) },
281298
// kotlinx.datetime.LocalTime
282-
stringParserWithOptions { options ->
299+
stringParserWithOptions<LocalTime> { options ->
283300
val formatter = options?.getDateTimeFormatter()
284301
val parser = { it: String -> it.toLocalTimeOrNull(formatter) }
285302
parser
286303
},
287-
// java.time.LocalTime
288-
stringParserWithOptions { options ->
304+
// java.time.LocalTime, will be skipped if kotlinx.datetime.LocalTime is already checked
305+
stringParserWithOptions<JavaLocalTime>(coveredBy = setOf(typeOf<LocalTime>())) { options ->
289306
val formatter = options?.getDateTimeFormatter()
290307
val parser = { it: String -> it.toJavaLocalTimeOrNull(formatter) }
291308
parser
292309
},
293310
// java.net.URL
294-
stringParser { it.toUrlOrNull() },
311+
stringParser<URL> { it.toUrlOrNull() },
295312
// Double, with explicit number format or taken from current locale
296313
parserToDoubleWithOptions,
297314
// Double, with POSIX format
298-
stringParser { it.parseDouble(NumberFormat.getInstance(Locale.forLanguageTag("C.UTF-8"))) },
315+
stringParser<Double> { it.parseDouble(NumberFormat.getInstance(Locale.forLanguageTag("C.UTF-8"))) },
299316
// Boolean
300-
stringParser { it.toBooleanOrNull() },
317+
stringParser<Boolean> { it.toBooleanOrNull() },
301318
// BigDecimal
302-
stringParser { it.toBigDecimalOrNull() },
303-
stringParser(catch = true) { if (it.startsWith("[")) DataFrame.readJsonStr(it) else null },
304-
stringParser(catch = true) { if (it.startsWith("{")) DataFrame.readJsonStr(it).single() else null },
305-
stringParser { it }, // must be last in the list of parsers to return original unparsed string
319+
stringParser<BigDecimal> { it.toBigDecimalOrNull() },
320+
// JSON array as DataFrame<*>
321+
stringParser<AnyFrame>(catch = true) {
322+
val trimmed = it.trim()
323+
if (trimmed.startsWith("[") && trimmed.endsWith("]")) {
324+
DataFrame.readJsonStr(it)
325+
} else {
326+
null
327+
}
328+
},
329+
// JSON object as DataRow<*>
330+
stringParser<AnyRow>(catch = true) {
331+
val trimmed = it.trim()
332+
if (trimmed.startsWith("{") && trimmed.endsWith("}")) {
333+
DataFrame.readJsonStr(it).single()
334+
} else {
335+
null
336+
}
337+
},
338+
// No parser found, return as String
339+
// must be last in the list of parsers to return original unparsed string
340+
stringParser<String> { it },
306341
)
307342

308-
private val parsersMap = parsersOrder.associateBy { it.type }
343+
internal val parsersMap = parsersOrder.associateBy { it.type }
309344

310345
val size: Int = parsersOrder.size
311346

@@ -352,49 +387,76 @@ internal object Parsers : GlobalParserOptions {
352387
}
353388
}
354389

390+
/**
391+
* Tries to parse a column of strings into a column of a different type.
392+
* Each parser in [Parsers] is run in order until a valid parser is found,
393+
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
394+
* fails to parse any value, the next parser is tried. If all the others fail, the final parser
395+
* simply returns the original string, leaving the column unchanged.
396+
*
397+
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
398+
*
399+
* @param options options for parsing, like providing a locale or a custom date-time formatter
400+
* @throws IllegalStateException if no valid parser is found (unlikely, unless the `String` parser is disabled)
401+
* @return a new column with parsed values
402+
*/
355403
internal fun DataColumn<String?>.tryParseImpl(options: ParserOptions?): DataColumn<*> {
356-
var parserId = 0
357-
val parsedValues = mutableListOf<Any?>()
358-
var hasNulls: Boolean
359-
var hasNotNulls: Boolean
360-
var nullStringParsed: Boolean
404+
val columnSize = size
405+
val parsedValues = ArrayList<Any?>(columnSize)
406+
var hasNulls: Boolean = false
407+
var hasNotNulls: Boolean = false
408+
var nullStringParsed: Boolean = false
361409
val nulls = options?.nullStrings ?: Parsers.nulls
362-
do {
363-
val parser = Parsers[parserId].applyOptions(options)
410+
411+
val parsersToCheck = Parsers.parsersMap
412+
val parserTypesToCheck = parsersToCheck.keys
413+
414+
var correctParser: StringParser<*>? = null
415+
for ((_, parser) in parsersToCheck) {
416+
if (parser.coveredBy.any { it in parserTypesToCheck }) continue
417+
418+
val parserWithOptions = parser.applyOptions(options)
364419
parsedValues.clear()
365420
hasNulls = false
366421
hasNotNulls = false
367422
nullStringParsed = false
368-
for (str in values) {
423+
for (str in this) {
369424
when {
370425
str == null -> {
371-
parsedValues.add(null)
426+
parsedValues += null
372427
hasNulls = true
373428
}
374429

375-
nulls.contains(str) -> {
376-
parsedValues.add(null)
430+
str in nulls -> {
431+
parsedValues += null
377432
hasNulls = true
378433
nullStringParsed = true
379434
}
380435

381436
else -> {
382437
val trimmed = str.trim()
383-
val res = parser(trimmed)
438+
val res = parserWithOptions(trimmed)
384439
if (res == null) {
385-
parserId++
386-
break
440+
continue
387441
}
388-
parsedValues.add(res)
442+
parsedValues += res
389443
hasNotNulls = true
390444
}
391445
}
392446
}
393-
} while (parserId < Parsers.size && parsedValues.size != size)
394-
check(parserId < Parsers.size) { "Valid parser not found" }
395447

396-
val type = (if (hasNotNulls) Parsers[parserId].type else this.type()).withNullability(hasNulls)
397-
if (type.jvmErasure == String::class && !nullStringParsed) return this // nothing parsed
448+
// break when everything is parsed
449+
if (parsedValues.size >= columnSize) {
450+
correctParser = parser
451+
break
452+
}
453+
}
454+
check(correctParser != null) { "Valid parser not found" }
455+
456+
val type = (if (hasNotNulls) correctParser.type else this.type()).withNullability(hasNulls)
457+
if (type.jvmErasure == String::class && !nullStringParsed) {
458+
return this // nothing parsed
459+
}
398460
return DataColumn.create(name(), parsedValues, type)
399461
}
400462

0 commit comments

Comments
 (0)