Skip to content

Commit 95b9df6

Browse files
committed
added "canParse" functions for Kotlin/Java duration to avoid exceptions in parsing
1 parent 0ca3883 commit 95b9df6

File tree

2 files changed

+170
-2
lines changed

2 files changed

+170
-2
lines changed
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
package org.jetbrains.kotlinx.dataframe.impl
2+
3+
import kotlin.time.Duration
4+
import kotlin.time.DurationUnit
5+
6+
/**
7+
* Checks if the string can be parsed as a duration without throwing an exception.
8+
*
9+
* The logic is taken from [Duration.parse] (Kotlin version 2.0.20),
10+
* so it should return the same result.
11+
*
12+
* @param value the string to check
13+
*/
14+
internal fun Duration.Companion.canParse(value: String): Boolean {
15+
var length = value.length
16+
if (length == 0) return false
17+
var index = 0
18+
val infinityString = "Infinity"
19+
when (value[index]) {
20+
'+', '-' -> index++
21+
}
22+
val hasSign = index > 0
23+
when {
24+
length <= index -> return false
25+
26+
value[index] == 'P' -> {
27+
if (++index == length) return false
28+
val nonDigitSymbols = "+-."
29+
var isTimeComponent = false
30+
var prevUnit: DurationUnit? = null
31+
while (index < length) {
32+
if (value[index] == 'T') {
33+
if (isTimeComponent || ++index == length) return false
34+
isTimeComponent = true
35+
continue
36+
}
37+
val component = value.substringWhile(index) { it in '0'..'9' || it in nonDigitSymbols }
38+
if (component.isEmpty()) return false
39+
index += component.length
40+
val unitChar = value.getOrElse(index) { return false }
41+
index++
42+
val unit = durationUnitByIsoCharOrNull(unitChar, isTimeComponent) ?: return false
43+
if (prevUnit != null && prevUnit <= unit) return false
44+
prevUnit = unit
45+
}
46+
}
47+
48+
value.regionMatches(
49+
thisOffset = index,
50+
other = infinityString,
51+
otherOffset = 0,
52+
length = maxOf(length - index, infinityString.length),
53+
ignoreCase = true,
54+
) -> return true
55+
56+
else -> {
57+
// parse default string format
58+
var prevUnit: DurationUnit? = null
59+
var afterFirst = false
60+
var allowSpaces = !hasSign
61+
if (hasSign && value[index] == '(' && value.last() == ')') {
62+
allowSpaces = true
63+
if (++index == --length) return false
64+
}
65+
while (index < length) {
66+
if (afterFirst && allowSpaces) {
67+
index = value.skipWhile(index) { it == ' ' }
68+
}
69+
afterFirst = true
70+
val component = value.substringWhile(index) { it in '0'..'9' || it == '.' }
71+
if (component.isEmpty()) return false
72+
index += component.length
73+
val unitName = value.substringWhile(index) { it in 'a'..'z' }
74+
index += unitName.length
75+
val unit = durationUnitByShortNameOrNull(unitName) ?: return false
76+
if (prevUnit != null && prevUnit <= unit) return false
77+
prevUnit = unit
78+
val dotIndex = component.indexOf('.')
79+
if (dotIndex > 0) {
80+
if (index < length) return false
81+
}
82+
}
83+
}
84+
}
85+
return true
86+
}
87+
88+
/**
89+
* Checks if the string can be parsed as a java duration without throwing an exception.
90+
*/
91+
internal fun javaDurationCanParse(value: String): Boolean = isoDurationRegex.matches(value)
92+
93+
/**
94+
* regex from [java.time.Duration.Lazy.PATTERN], it represents the ISO-8601 duration format.
95+
*/
96+
private val isoDurationRegex = Regex(
97+
pattern = "[-+]?P?:[-+]?[0-9]+D?T?:[-+]?[0-9]+H??:[-+]?[0-9]+M??:[-+]?[0-9]+?:[.,][0-9]{0,9}?S??",
98+
option = RegexOption.IGNORE_CASE,
99+
)
100+
101+
/**
102+
* Copy of [kotlin.time.substringWhile] (Kotlin version 2.0.20).
103+
*/
104+
private inline fun String.substringWhile(startIndex: Int, predicate: (Char) -> Boolean): String =
105+
substring(startIndex, skipWhile(startIndex, predicate))
106+
107+
/**
108+
* Copy of [kotlin.time.skipWhile] (Kotlin version 2.0.20).
109+
*/
110+
private inline fun String.skipWhile(startIndex: Int, predicate: (Char) -> Boolean): Int {
111+
var i = startIndex
112+
while (i < length && predicate(this[i])) i++
113+
return i
114+
}
115+
116+
/**
117+
* Copy of [kotlin.time.durationUnitByIsoChar] (Kotlin version 2.0.20).
118+
*/
119+
private fun durationUnitByIsoCharOrNull(isoChar: Char, isTimeComponent: Boolean): DurationUnit? =
120+
when {
121+
!isTimeComponent -> {
122+
when (isoChar) {
123+
'D' -> DurationUnit.DAYS
124+
125+
else -> null
126+
}
127+
}
128+
129+
else -> {
130+
when (isoChar) {
131+
'H' -> DurationUnit.HOURS
132+
'M' -> DurationUnit.MINUTES
133+
'S' -> DurationUnit.SECONDS
134+
else -> null
135+
}
136+
}
137+
}
138+
139+
/**
140+
* Copy of [kotlin.time.durationUnitByShortName] (Kotlin version 2.0.20).
141+
*/
142+
private fun durationUnitByShortNameOrNull(shortName: String): DurationUnit? =
143+
when (shortName) {
144+
"ns" -> DurationUnit.NANOSECONDS
145+
"us" -> DurationUnit.MICROSECONDS
146+
"ms" -> DurationUnit.MILLISECONDS
147+
"s" -> DurationUnit.SECONDS
148+
"m" -> DurationUnit.MINUTES
149+
"h" -> DurationUnit.HOURS
150+
"d" -> DurationUnit.DAYS
151+
else -> null
152+
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,10 @@ import org.jetbrains.kotlinx.dataframe.columns.size
2929
import org.jetbrains.kotlinx.dataframe.columns.values
3030
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
3131
import org.jetbrains.kotlinx.dataframe.hasNulls
32+
import org.jetbrains.kotlinx.dataframe.impl.canParse
3233
import org.jetbrains.kotlinx.dataframe.impl.catchSilent
3334
import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType
35+
import org.jetbrains.kotlinx.dataframe.impl.javaDurationCanParse
3436
import org.jetbrains.kotlinx.dataframe.io.isURL
3537
import org.jetbrains.kotlinx.dataframe.io.readJsonStr
3638
import org.jetbrains.kotlinx.dataframe.typeClass
@@ -207,6 +209,20 @@ internal object Parsers : GlobalParserOptions {
207209
private fun String.toLocalTimeOrNull(formatter: DateTimeFormatter?): LocalTime? =
208210
toJavaLocalTimeOrNull(formatter)?.toKotlinLocalTime()
209211

212+
private fun String.toJavaDurationOrNull(): JavaDuration? =
213+
if (javaDurationCanParse(this)) {
214+
catchSilent { JavaDuration.parse(this) } // will likely succeed
215+
} else {
216+
null
217+
}
218+
219+
private fun String.toDurationOrNull(): Duration? =
220+
if (Duration.canParse(this)) {
221+
catchSilent { Duration.parse(this) } // will likely succeed
222+
} else {
223+
null
224+
}
225+
210226
private fun String.parseDouble(format: NumberFormat) =
211227
when (uppercase(Locale.getDefault())) {
212228
"NAN" -> Double.NaN
@@ -292,9 +308,9 @@ internal object Parsers : GlobalParserOptions {
292308
parser
293309
},
294310
// kotlin.time.Duration
295-
stringParser<Duration>(catch = true) { Duration.parse(it) },
311+
stringParser<Duration> { it.toDurationOrNull() },
296312
// java.time.Duration, will be skipped if kotlin.time.Duration is already checked
297-
stringParser<JavaDuration>(catch = true, coveredBy = setOf(typeOf<Duration>())) { JavaDuration.parse(it) },
313+
stringParser<JavaDuration>(coveredBy = setOf(typeOf<Duration>())) { it.toJavaDurationOrNull() },
298314
// kotlinx.datetime.LocalTime
299315
stringParserWithOptions<LocalTime> { options ->
300316
val formatter = options?.getDateTimeFormatter()

0 commit comments

Comments
 (0)