Skip to content

Commit 4677665

Browse files
committed
Added parseExperimentalUuid parser option for parsing to experimental kotlin.uuid.Uuid. False by default until it is no longer experimental
1 parent c3aafb8 commit 4677665

File tree

5 files changed

+121
-14
lines changed

5 files changed

+121
-14
lines changed

core/api/core.api

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2419,10 +2419,12 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/api/GlobalParser
24192419
public abstract fun addSkipType (Lkotlin/reflect/KType;)V
24202420
public abstract fun getLocale ()Ljava/util/Locale;
24212421
public abstract fun getNulls ()Ljava/util/Set;
2422+
public abstract fun getParseExperimentalUuid ()Z
24222423
public abstract fun getSkipTypes ()Ljava/util/Set;
24232424
public abstract fun getUseFastDoubleParser ()Z
24242425
public abstract fun resetToDefault ()V
24252426
public abstract fun setLocale (Ljava/util/Locale;)V
2427+
public abstract fun setParseExperimentalUuid (Z)V
24262428
public abstract fun setUseFastDoubleParser (Z)V
24272429
}
24282430

@@ -3522,17 +3524,22 @@ public final class org/jetbrains/kotlinx/dataframe/api/ParserOptions {
35223524
public fun <init> ()V
35233525
public synthetic fun <init> (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;)V
35243526
public synthetic fun <init> (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;ILkotlin/jvm/internal/DefaultConstructorMarker;)V
3525-
public fun <init> (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;)V
3527+
public synthetic fun <init> (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;)V
35263528
public synthetic fun <init> (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;ILkotlin/jvm/internal/DefaultConstructorMarker;)V
3529+
public fun <init> (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;)V
3530+
public synthetic fun <init> (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;ILkotlin/jvm/internal/DefaultConstructorMarker;)V
35273531
public final synthetic fun copy (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;
3528-
public final fun copy (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;
3532+
public final synthetic fun copy (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;
3533+
public final fun copy (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;
35293534
public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;
35303535
public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;
3536+
public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;
35313537
public fun equals (Ljava/lang/Object;)Z
35323538
public final fun getDateTimeFormatter ()Ljava/time/format/DateTimeFormatter;
35333539
public final fun getDateTimePattern ()Ljava/lang/String;
35343540
public final fun getLocale ()Ljava/util/Locale;
35353541
public final fun getNullStrings ()Ljava/util/Set;
3542+
public final fun getParseExperimentalUuid ()Ljava/lang/Boolean;
35363543
public final fun getSkipTypes ()Ljava/util/Set;
35373544
public final fun getUseFastDoubleParser ()Ljava/lang/Boolean;
35383545
public fun hashCode ()I

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,12 @@ public interface GlobalParserOptions {
7373
public val nulls: Set<String>
7474

7575
public val skipTypes: Set<KType>
76+
77+
/**
78+
* Whether to allow parsing UUIDs to the experimental [kotlin.uuid.Uuid] type.
79+
* By default, this is false and UUIDs are not recognized.
80+
*/
81+
public var parseExperimentalUuid: Boolean
7682
}
7783

7884
/**
@@ -101,6 +107,8 @@ public interface GlobalParserOptions {
101107
* @param skipTypes a set of types that should be skipped during parsing. Parsing will be attempted for all other types.
102108
* By default, it's an empty set. To skip all types except a specified one, use [convertTo] instead.
103109
* @param useFastDoubleParser whether to use [FastDoubleParser], defaults to `true`. Please report any issues you encounter.
110+
* @param parseExperimentalUuid whether to allow parsing UUIDs to the experimental [kotlin.uuid.Uuid] type.
111+
* By default, this is false and UUIDs are not recognized.
104112
*/
105113
public class ParserOptions(
106114
public val locale: Locale? = null,
@@ -110,8 +118,31 @@ public class ParserOptions(
110118
public val nullStrings: Set<String>? = null,
111119
public val skipTypes: Set<KType>? = null,
112120
public val useFastDoubleParser: Boolean? = null,
121+
public val parseExperimentalUuid: Boolean? = null,
113122
) {
114123

124+
/** For binary compatibility. */
125+
@Deprecated(
126+
message = PARSER_OPTIONS,
127+
level = DeprecationLevel.HIDDEN,
128+
)
129+
public constructor(
130+
locale: Locale? = null,
131+
dateTimeFormatter: DateTimeFormatter? = null,
132+
dateTimePattern: String? = null,
133+
nullStrings: Set<String>? = null,
134+
skipTypes: Set<KType>? = null,
135+
useFastDoubleParser: Boolean? = null,
136+
) : this(
137+
locale = locale,
138+
dateTimeFormatter = dateTimeFormatter,
139+
dateTimePattern = dateTimePattern,
140+
nullStrings = nullStrings,
141+
skipTypes = skipTypes,
142+
useFastDoubleParser = useFastDoubleParser,
143+
parseExperimentalUuid = null,
144+
)
145+
115146
/** For binary compatibility. */
116147
@Deprecated(
117148
message = PARSER_OPTIONS,
@@ -129,7 +160,31 @@ public class ParserOptions(
129160
nullStrings = nullStrings,
130161
skipTypes = null,
131162
useFastDoubleParser = null,
163+
parseExperimentalUuid = null,
164+
)
165+
166+
/** For binary compatibility. */
167+
@Deprecated(
168+
message = PARSER_OPTIONS_COPY,
169+
level = DeprecationLevel.HIDDEN,
132170
)
171+
public fun copy(
172+
locale: Locale? = this.locale,
173+
dateTimeFormatter: DateTimeFormatter? = this.dateTimeFormatter,
174+
dateTimePattern: String? = this.dateTimePattern,
175+
nullStrings: Set<String>? = this.nullStrings,
176+
skipTypes: Set<KType>? = this.skipTypes,
177+
useFastDoubleParser: Boolean? = this.useFastDoubleParser,
178+
): ParserOptions =
179+
ParserOptions(
180+
locale = locale,
181+
dateTimeFormatter = dateTimeFormatter,
182+
dateTimePattern = dateTimePattern,
183+
nullStrings = nullStrings,
184+
skipTypes = skipTypes,
185+
useFastDoubleParser = useFastDoubleParser,
186+
parseExperimentalUuid = null,
187+
)
133188

134189
/** For binary compatibility. */
135190
@Deprecated(
@@ -149,6 +204,7 @@ public class ParserOptions(
149204
nullStrings = nullStrings,
150205
skipTypes = skipTypes,
151206
useFastDoubleParser = useFastDoubleParser,
207+
parseExperimentalUuid = null,
152208
)
153209

154210
internal fun getDateTimeFormatter(): DateTimeFormatter? =
@@ -166,6 +222,7 @@ public class ParserOptions(
166222
nullStrings: Set<String>? = this.nullStrings,
167223
skipTypes: Set<KType>? = this.skipTypes,
168224
useFastDoubleParser: Boolean? = this.useFastDoubleParser,
225+
parseExperimentalUuid: Boolean? = this.parseExperimentalUuid,
169226
): ParserOptions =
170227
ParserOptions(
171228
locale = locale,
@@ -174,6 +231,7 @@ public class ParserOptions(
174231
nullStrings = nullStrings,
175232
skipTypes = skipTypes,
176233
useFastDoubleParser = useFastDoubleParser,
234+
parseExperimentalUuid = parseExperimentalUuid,
177235
)
178236

179237
override fun equals(other: Any?): Boolean {
@@ -188,6 +246,7 @@ public class ParserOptions(
188246
if (dateTimePattern != other.dateTimePattern) return false
189247
if (nullStrings != other.nullStrings) return false
190248
if (skipTypes != other.skipTypes) return false
249+
if (parseExperimentalUuid != other.parseExperimentalUuid) return false
191250

192251
return true
193252
}
@@ -199,11 +258,12 @@ public class ParserOptions(
199258
result = 31 * result + (dateTimePattern?.hashCode() ?: 0)
200259
result = 31 * result + (nullStrings?.hashCode() ?: 0)
201260
result = 31 * result + (skipTypes?.hashCode() ?: 0)
261+
result = 31 * result + (parseExperimentalUuid?.hashCode() ?: 0)
202262
return result
203263
}
204264

205265
override fun toString(): String =
206-
"ParserOptions(locale=$locale, dateTimeFormatter=$dateTimeFormatter, dateTimePattern=$dateTimePattern, nullStrings=$nullStrings, skipTypes=$skipTypes, useFastDoubleParser=$useFastDoubleParser)"
266+
"ParserOptions(locale=$locale, dateTimeFormatter=$dateTimeFormatter, dateTimePattern=$dateTimePattern, nullStrings=$nullStrings, skipTypes=$skipTypes, useFastDoubleParser=$useFastDoubleParser, parseExperimentalUuid=$parseExperimentalUuid)"
207267
}
208268

209269
/** @include [tryParseImpl] */

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@ internal object Parsers : GlobalParserOptions {
142142
override val skipTypes: Set<KType>
143143
get() = skipTypesSet
144144

145+
override var parseExperimentalUuid: Boolean = false
146+
145147
override fun addDateTimePattern(pattern: String) {
146148
formatters.add(DateTimeFormatter.ofPattern(pattern))
147149
}
@@ -180,6 +182,7 @@ internal object Parsers : GlobalParserOptions {
180182
.let { formatters.add(it) }
181183

182184
useFastDoubleParser = true
185+
parseExperimentalUuid = false
183186
_locale = null
184187
nullStrings.addAll(listOf("null", "NULL", "NA", "N/A"))
185188
}
@@ -428,6 +431,8 @@ internal object Parsers : GlobalParserOptions {
428431
}
429432
}
430433

434+
private val uuidRegex = Regex("[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}")
435+
431436
@OptIn(ExperimentalUuidApi::class)
432437
internal val parsersOrder = listOf(
433438
// Int
@@ -494,20 +499,25 @@ internal object Parsers : GlobalParserOptions {
494499
posixParserToDoubleWithOptions,
495500
// Boolean
496501
stringParser<Boolean> { it.toBooleanOrNull() },
497-
// UUID
498-
stringParser<Uuid> { str ->
499-
500-
val uuidRegex = Regex("[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}")
502+
// Uuid
503+
stringParserWithOptions<Uuid> { options ->
504+
val parser = { str: String ->
505+
val parseExperimentalUuid = options?.parseExperimentalUuid ?: this.parseExperimentalUuid
506+
when {
507+
!parseExperimentalUuid -> null
508+
509+
uuidRegex.matches(str) -> {
510+
try {
511+
Uuid.parse(str)
512+
} catch (_: IllegalArgumentException) {
513+
null
514+
}
515+
}
501516

502-
if (uuidRegex.matches(str)) {
503-
try {
504-
Uuid.parse(str)
505-
} catch (e: IllegalArgumentException) {
506-
null
517+
else -> null
507518
}
508-
} else {
509-
null
510519
}
520+
parser
511521
},
512522
// BigInteger
513523
stringParser<BigInteger> { it.toBigIntegerOrNull() },

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,20 @@ class ParseTests {
489489
fun `parse valid Uuid`() {
490490
val validUUID = "550e8400-e29b-41d4-a716-446655440000"
491491
val column by columnOf(validUUID)
492+
val parsed = column.parse(ParserOptions(parseExperimentalUuid = true))
493+
494+
parsed.type() shouldBe typeOf<Uuid>()
495+
(parsed[0] as Uuid).toString() shouldBe validUUID // Change UUID to Uuid
496+
}
497+
498+
@OptIn(ExperimentalUuidApi::class)
499+
@Test
500+
fun `parse valid Uuid with GlobalParserOptions`() {
501+
val validUUID = "550e8400-e29b-41d4-a716-446655440000"
502+
val column by columnOf(validUUID)
503+
DataFrame.parser.parseExperimentalUuid = true
492504
val parsed = column.parse()
505+
DataFrame.parser.resetToDefault()
493506

494507
parsed.type() shouldBe typeOf<Uuid>()
495508
(parsed[0] as Uuid).toString() shouldBe validUUID // Change UUID to Uuid
@@ -500,6 +513,20 @@ class ParseTests {
500513
fun `parse invalid Uuid`() {
501514
val invalidUUID = "this is not a UUID"
502515
val column = columnOf(invalidUUID)
516+
// tryParse as string is not formatted.
517+
val parsed = column.tryParse(
518+
ParserOptions(parseExperimentalUuid = true),
519+
)
520+
521+
parsed.type() shouldNotBe typeOf<Uuid>()
522+
parsed.type() shouldBe typeOf<String>()
523+
}
524+
525+
@OptIn(ExperimentalUuidApi::class)
526+
@Test
527+
fun `do not parse Uuid by default`() {
528+
val validUUID = "550e8400-e29b-41d4-a716-446655440000"
529+
val column = columnOf(validUUID)
503530
val parsed = column.tryParse() // tryParse as string is not formatted.
504531

505532
parsed.type() shouldNotBe typeOf<Uuid>()

docs/StardustDocs/topics/parse.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ df.parse { age and weight }
4242
* `URL` (`java.net`)
4343
* [`Double` (with optional locale settings)](#parsing-doubles)
4444
* `Boolean`
45+
* `Uuid` ([`kotlin.uuid.Uuid`](https://kotlinlang.org/api/core/kotlin-stdlib/kotlin.uuid/-uuid/)) (requires `parseExperimentalUuid = true`)
4546
* `BigDecimal`
4647
* `JSON` (arrays and objects) (requires the `org.jetbrains.kotlinx:dataframe-json` dependency)
4748

@@ -69,6 +70,8 @@ Available parser options:
6970
* Empty set by global default; parsing can result in any supported type
7071
* `useFastDoubleParser: Boolean` is used to enable or disable the [new fast double parser](#parsing-doubles)
7172
* Enabled by global default
73+
* `parseExperimentalUuid: Boolean` is used to enable or disable parsing to the experimental [`kotlin.uuid.Uuid` class](https://kotlinlang.org/api/core/kotlin-stdlib/kotlin.uuid/-uuid/).
74+
* Disabled by global default
7275

7376
<!---FUN parseWithOptions-->
7477

0 commit comments

Comments
 (0)