From 8655adc6a8936dc299af3a65b1b22368b5a09d92 Mon Sep 17 00:00:00 2001
From: Jolan Rensen <jolan.rensen@jetbrains.com>
Date: Wed, 29 Jan 2025 15:12:40 +0100
Subject: [PATCH 1/6] set useFastDoubleParser parser option to true by default
 and updated KDocs

---
 .../org/jetbrains/kotlinx/dataframe/api/convert.kt |  9 +++++----
 .../org/jetbrains/kotlinx/dataframe/api/parse.kt   | 11 +++++++++--
 .../jetbrains/kotlinx/dataframe/impl/api/parse.kt  | 14 ++++++++++++--
 .../kotlinx/dataframe/impl/io/FastDoubleParser.kt  |  4 ++--
 .../kotlinx/dataframe/documentation/DelimParams.kt |  2 --
 .../kotlinx/dataframe/impl/io/readDelim.kt         |  6 +-----
 6 files changed, 29 insertions(+), 17 deletions(-)
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt
index 61d03b3ff7..071fb15fc9 100644
--- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt
+++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt
@@ -37,6 +37,7 @@ import org.jetbrains.kotlinx.dataframe.impl.api.toLocalTime
 import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl
 import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
 import org.jetbrains.kotlinx.dataframe.io.toDataFrame
+import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
 import java.math.BigDecimal
 import java.math.BigInteger
 import java.net.URL
@@ -223,8 +224,8 @@ public fun DataColumn<String>.convertToDouble(locale: Locale? = null): DataColum
  * @include [DataColumnStringConvertToDoubleDoc]
  * @param nullStrings a set of strings that should be treated as `null` values.
  *   The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
- * @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
- *   The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
+ * @param useFastDoubleParser whether to use [FastDoubleParser].
+ *   The default in [DataFrame.parser][DataFrame.Companion.parser] is `true`.
  */
 @JvmName("convertToDoubleFromString")
 public fun DataColumn<String>.convertToDouble(
@@ -243,8 +244,8 @@ public fun DataColumn<String?>.convertToDouble(locale: Locale? = null): DataColu
  * @include [DataColumnStringConvertToDoubleDoc]
  * @param nullStrings a set of strings that should be treated as `null` values.
  *   The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
- * @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
- *   The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
+ * @param useFastDoubleParser whether to use [FastDoubleParser].
+ *   The default in [DataFrame.parser][DataFrame.Companion.parser] is `true`.
  */
 @JvmName("convertToDoubleFromStringNullable")
 public fun DataColumn<String?>.convertToDouble(
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt
index c208e2a4ac..b68f234a80 100644
--- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt
+++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt
@@ -11,6 +11,7 @@ import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
 import org.jetbrains.kotlinx.dataframe.impl.api.StringParser
 import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl
 import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl
+import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
 import org.jetbrains.kotlinx.dataframe.io.readCSV
 import org.jetbrains.kotlinx.dataframe.typeClass
 import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS
@@ -45,6 +46,12 @@ public fun <T, C> DataFrame<T>.parse(vararg columns: ColumnReference<C>, options
 public fun <T, C> DataFrame<T>.parse(vararg columns: KProperty<C>, options: ParserOptions? = null): DataFrame<T> =
     parse(options) { columns.toColumnSet() }
 
+/**
+ * Global counterpart of [ParserOptions].
+ * Settings changed here will affect the defaults for all parsing operations.
+ *
+ * The default values are set by [Parsers.resetToDefault].
+ */
 public interface GlobalParserOptions {
 
     public fun addDateTimePattern(pattern: String)
@@ -54,7 +61,7 @@ public interface GlobalParserOptions {
     /** This function can be called to skip some types. Parsing will be attempted for all other types. */
     public fun addSkipType(type: KType)
 
-    /** Whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now. */
+    /** Whether to use [FastDoubleParser], defaults to `true`. Please report any issues you encounter. */
     public var useFastDoubleParser: Boolean
 
     public fun resetToDefault()
@@ -91,7 +98,7 @@ public interface GlobalParserOptions {
  *   `["null", "NULL", "NA", "N/A"]`.
  * @param skipTypes a set of types that should be skipped during parsing. Parsing will be attempted for all other types.
  *   By default, it's an empty set. To skip all types except a specified one, use [convertTo] instead.
- * @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
+ * @param useFastDoubleParser whether to use [FastDoubleParser], defaults to `true`. Please report any issues you encounter.
  */
 public class ParserOptions(
     public val locale: Locale? = null,
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt
index 239c22d5c4..50d12b3db1 100644
--- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt
+++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt
@@ -25,11 +25,13 @@ import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
 import org.jetbrains.kotlinx.dataframe.api.isFrameColumn
 import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf
 import org.jetbrains.kotlinx.dataframe.api.map
+import org.jetbrains.kotlinx.dataframe.api.parser
 import org.jetbrains.kotlinx.dataframe.api.to
 import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
 import org.jetbrains.kotlinx.dataframe.columns.size
 import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
 import org.jetbrains.kotlinx.dataframe.hasNulls
+import org.jetbrains.kotlinx.dataframe.impl.asNullable
 import org.jetbrains.kotlinx.dataframe.impl.canParse
 import org.jetbrains.kotlinx.dataframe.impl.catchSilent
 import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType
@@ -47,6 +49,7 @@ import java.time.format.DateTimeFormatterBuilder
 import java.time.temporal.Temporal
 import java.time.temporal.TemporalQuery
 import java.util.Locale
+import kotlin.properties.Delegates
 import kotlin.reflect.KClass
 import kotlin.reflect.KType
 import kotlin.reflect.full.withNullability
@@ -114,6 +117,13 @@ internal class StringParserWithFormat<T>(
     }
 }
 
+/**
+ * Central implementation for [GlobalParserOptions].
+ *
+ * Can be obtained by a user by calling [DataFrame.parser][DataFrame.Companion.parser].
+ *
+ * Defaults are set by [resetToDefault].
+ */
 internal object Parsers : GlobalParserOptions {
 
     private val formatters: MutableList<DateTimeFormatter> = mutableListOf()
@@ -140,7 +150,7 @@ internal object Parsers : GlobalParserOptions {
         skipTypesSet.add(type)
     }
 
-    override var useFastDoubleParser: Boolean = false
+    override var useFastDoubleParser by Delegates.notNull<Boolean>()
 
     private var _locale: Locale? = null
 
@@ -165,7 +175,7 @@ internal object Parsers : GlobalParserOptions {
             .toFormatter()
             .let { formatters.add(it) }
 
-        useFastDoubleParser = false
+        useFastDoubleParser = true
         _locale = null
         nullStrings.addAll(listOf("null", "NULL", "NA", "N/A"))
     }
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/FastDoubleParser.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/FastDoubleParser.kt
index 47361dd0a5..ad597e284a 100644
--- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/FastDoubleParser.kt
+++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/FastDoubleParser.kt
@@ -24,8 +24,8 @@ private val NANS = arrayOf("nan", "na", "n/a")
 /**
  * Parses a [String]/[CharSequence], [CharArray], or [ByteArray] into a [Double].
  *
- * If [ParserOptions.useFastDoubleParser] is enabled, it will try to parse the input with an _EXPERIMENTAL_
- * fast double parser, [FastDoubleParser](https://github.com/wrandelshofer/FastDoubleParser).
+ * If [ParserOptions.useFastDoubleParser] is enabled, it will try to parse the input with the
+ * fast double parser library, [FastDoubleParser](https://github.com/wrandelshofer/FastDoubleParser).
  * If not, or if it fails, it will use [NumberFormat] to parse the input.
  *
  * Public, so it can be used in other modules.
diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt
index aa75feb6a2..c0f39c79c3 100644
--- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt
+++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt
@@ -138,8 +138,6 @@ internal object DelimParams {
      *   ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
      *
      *   The only exceptions are:
-     *   - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`,
-     *   regardless of the global setting.
      *   - [nullStrings][ParserOptions.nullStrings], which, if `null`,
      *   will take the global setting + {@include [DefaultNullStringsContentLink]}.
      *   - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses] to
diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt
index bf37bc86f3..dec844836c 100644
--- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt
+++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt
@@ -112,11 +112,7 @@ internal fun readDelimImpl(
 ): DataFrame<*> {
     // set up the csv specs
     val csvSpecs = with(CsvSpecs.builder()) {
-        // turn on fast double parser if not explicitly set regardless of the global parser options
-        @Suppress("NullableBooleanElvis")
-        val adjustedParserOptions = (parserOptions ?: ParserOptions())
-            .copy(useFastDoubleParser = parserOptions?.useFastDoubleParser ?: true)
-        customDoubleParser(DataFrameCustomDoubleParser(adjustedParserOptions))
+        customDoubleParser(DataFrameCustomDoubleParser(parserOptions))
 
         // use the given nullStrings if provided, else take the global ones + some extras
         val nullStrings = parserOptions?.nullStrings ?: (DataFrame.parser.nulls + DEFAULT_DELIM_NULL_STRINGS)

From db7c57dd02b40d755d12ffd62f4fa89dd35e3162 Mon Sep 17 00:00:00 2001
From: Jolan Rensen <jolan.rensen@jetbrains.com>
Date: Wed, 29 Jan 2025 16:46:53 +0100
Subject: [PATCH 2/6] Fixed test: converting String to Double in different
 locales

---
 .../kotlinx/dataframe/io/ParserTests.kt       | 41 ++++++++++++-------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt
index 64d2ced7b1..3efc180f08 100644
--- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt
+++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt
@@ -1,6 +1,5 @@
 package org.jetbrains.kotlinx.dataframe.io
 
-import io.kotest.assertions.throwables.shouldThrow
 import io.kotest.matchers.shouldBe
 import kotlinx.datetime.LocalDateTime
 import kotlinx.datetime.LocalTime
@@ -145,9 +144,9 @@ class ParserTests {
         parsed.toList() shouldBe listOf(1, 2, null, 3, null, null, 4.0, 5.0)
     }
 
-    @Test // This does not yet use fastDoubleParser!
+    @Test
     fun `converting String to Double in different locales`() {
-        val currentLocale = Locale.getDefault()
+        val systemLocale = Locale.getDefault()
         try {
             // Test 45 behaviour combinations:
 
@@ -157,11 +156,12 @@ class ParserTests {
             val columnMixed = columnOf("12.345", "67,890")
             // *
             // (3 locales as converting parameter + original converting + original converting to nullable)
-            val parsingLocaleNotDefined: Locale? = null
+            val parsingLocaleNotDefined: Locale? = null // takes parserOptions.locale ?: Locale.getDefault()
             val parsingLocaleUsesDot: Locale = Locale.forLanguageTag("en-US")
             val parsingLocaleUsesComma: Locale = Locale.forLanguageTag("ru-RU")
             // *
             // 3 system locales
+            // --------------------------------------------------------------------------------
 
             Locale.setDefault(Locale.forLanguageTag("C.UTF-8"))
 
@@ -181,9 +181,13 @@ class ParserTests {
             columnComma.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(12345.0, 67890.0)
             columnMixed.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(12.345, 67890.0)
 
-            shouldThrow<TypeConversionException> { columnDot.convertToDouble(parsingLocaleUsesComma) }
+            // uses fallback to ROOT locale
+            columnDot.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
             columnComma.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
-            shouldThrow<TypeConversionException> { columnMixed.convertToDouble(parsingLocaleUsesComma) }
+            // uses fallback to ROOT locale
+            columnMixed.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
+
+            // --------------------------------------------------------------------------------
 
             Locale.setDefault(Locale.forLanguageTag("en-US"))
 
@@ -203,33 +207,42 @@ class ParserTests {
             columnComma.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(12345.0, 67890.0)
             columnMixed.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(12.345, 67890.0)
 
-            shouldThrow<TypeConversionException> { columnDot.convertToDouble(parsingLocaleUsesComma) }
+            // uses fallback to ROOT locale
+            columnDot.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
             columnComma.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
-            shouldThrow<TypeConversionException> { columnMixed.convertToDouble(parsingLocaleUsesComma) }
+            // uses fallback to ROOT locale
+            columnMixed.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
+
+            // --------------------------------------------------------------------------------
 
             Locale.setDefault(Locale.forLanguageTag("ru-RU"))
 
             columnDot.convertTo<Double>() shouldBe columnOf(12.345, 67.89)
             columnComma.convertTo<Double>() shouldBe columnOf(12.345, 67.89)
-            columnMixed.convertTo<Double>() shouldBe columnOf(12.345, 67890.0)
+            // uses fallback to ROOT locale
+            columnMixed.convertTo<Double>() shouldBe columnOf(12.345, 67.89)
 
             columnDot.convertTo<Double?>() shouldBe columnOf(12.345, 67.89)
             columnComma.convertTo<Double?>() shouldBe columnOf(12.345, 67.89)
-            columnMixed.convertTo<Double?>() shouldBe columnOf(12.345, 67890.0)
+            // uses fallback to ROOT locale
+            columnMixed.convertTo<Double?>() shouldBe columnOf(12.345, 67.89)
 
             columnDot.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(12.345, 67.89)
             columnComma.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(12.345, 67.89)
-            columnMixed.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(12.345, 67890.0)
+            // uses fallback to ROOT locale
+            columnMixed.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(12.345, 67.89)
 
             columnDot.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(12.345, 67.89)
             columnComma.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(12345.0, 67890.0)
             columnMixed.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(12.345, 67890.0)
 
-            shouldThrow<TypeConversionException> { columnDot.convertToDouble(parsingLocaleUsesComma) }
+            // uses fallback to ROOT locale
+            columnDot.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
             columnComma.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
-            shouldThrow<TypeConversionException> { columnMixed.convertToDouble(parsingLocaleUsesComma) }
+            // uses fallback to ROOT locale
+            columnMixed.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
         } finally {
-            Locale.setDefault(currentLocale)
+            Locale.setDefault(systemLocale)
         }
     }
 

From 8044d953ce80341a83464fd153f9ca9c786ab1e6 Mon Sep 17 00:00:00 2001
From: Jolan Rensen <jolan.rensen@jetbrains.com>
Date: Wed, 29 Jan 2025 17:32:22 +0100
Subject: [PATCH 3/6] adding comma grouping double parser test

---
 .../kotlinx/dataframe/io/ParserTests.kt       | 96 +++++++++++++++++++
 1 file changed, 96 insertions(+)

diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt
index 3efc180f08..6d0f06f72f 100644
--- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt
+++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt
@@ -1,5 +1,6 @@
 package org.jetbrains.kotlinx.dataframe.io
 
+import io.kotest.assertions.throwables.shouldThrow
 import io.kotest.matchers.shouldBe
 import kotlinx.datetime.LocalDateTime
 import kotlinx.datetime.LocalTime
@@ -157,7 +158,9 @@ class ParserTests {
             // *
             // (3 locales as converting parameter + original converting + original converting to nullable)
             val parsingLocaleNotDefined: Locale? = null // takes parserOptions.locale ?: Locale.getDefault()
+            // uses dot as decimal separator, comma as grouping separator
             val parsingLocaleUsesDot: Locale = Locale.forLanguageTag("en-US")
+            // uses comma as decimal separator, NBSP as grouping separator
             val parsingLocaleUsesComma: Locale = Locale.forLanguageTag("ru-RU")
             // *
             // 3 system locales
@@ -246,6 +249,99 @@ class ParserTests {
         }
     }
 
+    @Test
+    fun `converting String to Double in different locales with comma grouping`() {
+        val systemLocale = Locale.getDefault()
+        try {
+            // Test 45 behaviour combinations:
+
+            // 3 source columns
+            val columnDot = columnOf("123,456.789", "0,987,654.321")
+            val columnComma = columnOf("123.456,789", "0.987.654,321")
+            val columnMixed = columnOf("123,456.789", "0.987.654,321")
+            // *
+            // (3 locales as converting parameter + original converting + original converting to nullable)
+            val parsingLocaleNotDefined: Locale? = null // takes parserOptions.locale ?: Locale.getDefault()
+            val parsingLocaleUsesDot: Locale = Locale.forLanguageTag("en-US")
+            val parsingLocaleUsesComma: Locale = Locale.forLanguageTag("nl-NL")
+            // *
+            // 3 system locales
+            // --------------------------------------------------------------------------------
+
+            Locale.setDefault(Locale.forLanguageTag("C.UTF-8"))
+
+            columnDot.convertTo<Double>() shouldBe columnOf(123_456.789, 987_654.321)
+            shouldThrow<TypeConversionException> { columnComma.convertTo<Double>() }
+            shouldThrow<TypeConversionException> { columnMixed.convertTo<Double>() }
+
+            columnDot.convertTo<Double?>() shouldBe columnOf(123_456.789, 987_654.321)
+            shouldThrow<TypeConversionException> { columnComma.convertTo<Double?>() }
+            shouldThrow<TypeConversionException> { columnMixed.convertTo<Double?>() }
+
+            columnDot.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(123_456.789, 987_654.321)
+            shouldThrow<TypeConversionException> { columnComma.convertToDouble(parsingLocaleNotDefined) }
+            shouldThrow<TypeConversionException> { columnMixed.convertToDouble(parsingLocaleNotDefined) }
+
+            columnDot.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(123_456.789, 987_654.321)
+            shouldThrow<TypeConversionException> { columnComma.convertToDouble(parsingLocaleUsesDot) }
+            shouldThrow<TypeConversionException> { columnMixed.convertToDouble(parsingLocaleUsesDot) }
+
+            shouldThrow<TypeConversionException> { columnDot.convertToDouble(parsingLocaleUsesComma) }
+            columnComma.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(123_456.789, 987_654.321)
+            shouldThrow<TypeConversionException> { columnMixed.convertToDouble(parsingLocaleUsesComma) }
+
+            // --------------------------------------------------------------------------------
+
+            Locale.setDefault(Locale.forLanguageTag("en-US"))
+
+            columnDot.convertTo<Double>() shouldBe columnOf(123_456.789, 987_654.321)
+            shouldThrow<TypeConversionException> { columnComma.convertTo<Double>() }
+            shouldThrow<TypeConversionException> { columnMixed.convertTo<Double>() }
+
+            columnDot.convertTo<Double?>() shouldBe columnOf(123_456.789, 987_654.321)
+            shouldThrow<TypeConversionException> { columnComma.convertTo<Double?>() }
+            shouldThrow<TypeConversionException> { columnMixed.convertTo<Double?>() }
+
+            columnDot.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(123_456.789, 987_654.321)
+            shouldThrow<TypeConversionException> { columnComma.convertToDouble(parsingLocaleNotDefined) }
+            shouldThrow<TypeConversionException> { columnMixed.convertToDouble(parsingLocaleNotDefined) }
+
+            columnDot.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(123_456.789, 987_654.321)
+            shouldThrow<TypeConversionException> { columnComma.convertToDouble(parsingLocaleUsesDot) }
+            shouldThrow<TypeConversionException> { columnMixed.convertToDouble(parsingLocaleUsesDot) }
+
+            shouldThrow<TypeConversionException> { columnDot.convertToDouble(parsingLocaleUsesComma) }
+            columnComma.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(123_456.789, 987_654.321)
+            shouldThrow<TypeConversionException> { columnMixed.convertToDouble(parsingLocaleUsesComma) }
+
+            // --------------------------------------------------------------------------------
+
+            Locale.setDefault(Locale.forLanguageTag("nl-NL"))
+
+            columnDot.convertTo<Double>() shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertTo<Double>() shouldBe columnOf(123_456.789, 987_654.321)
+            shouldThrow<TypeConversionException> { columnMixed.convertTo<Double>() }
+
+            columnDot.convertTo<Double?>() shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertTo<Double?>() shouldBe columnOf(123_456.789, 987_654.321)
+            shouldThrow<TypeConversionException> { columnMixed.convertTo<Double?>() }
+
+            columnDot.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(123_456.789, 987_654.321)
+            shouldThrow<TypeConversionException> { columnMixed.convertToDouble(parsingLocaleNotDefined) }
+
+            columnDot.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(123_456.789, 987_654.321)
+            shouldThrow<TypeConversionException> { columnComma.convertToDouble(parsingLocaleUsesDot) }
+            shouldThrow<TypeConversionException> { columnMixed.convertToDouble(parsingLocaleUsesDot) }
+
+            shouldThrow<TypeConversionException> { columnDot.convertToDouble(parsingLocaleUsesComma) }
+            columnComma.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(123_456.789, 987_654.321)
+            shouldThrow<TypeConversionException> { columnMixed.convertToDouble(parsingLocaleUsesComma) }
+        } finally {
+            Locale.setDefault(systemLocale)
+        }
+    }
+
     /** Checks fix for [Issue #593](https://github.com/Kotlin/dataframe/issues/593) */
     @Test
     fun `Mixing null and json`() {

From 89e0d416b0127e0b49b09af5531333b774eb9731 Mon Sep 17 00:00:00 2001
From: Jolan Rensen <jolan.rensen@jetbrains.com>
Date: Mon, 10 Feb 2025 21:33:07 +0100
Subject: [PATCH 4/6] Improved fallback mechanism of FastDoubleParser to take
 into account all other locales, not just ROOT. Finished parse tests

---
 .../kotlinx/dataframe/api/convert.kt          |   2 +-
 .../kotlinx/dataframe/impl/api/parse.kt       |   1 -
 .../dataframe/impl/io/FastDoubleParser.kt     | 220 +++++++++++-------
 .../kotlinx/dataframe/io/ParserTests.kt       | 131 ++++++++++-
 4 files changed, 255 insertions(+), 99 deletions(-)

diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt
index 973a2340da..69075f2f6c 100644
--- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt
+++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt
@@ -36,8 +36,8 @@ import org.jetbrains.kotlinx.dataframe.impl.api.toLocalDateTime
 import org.jetbrains.kotlinx.dataframe.impl.api.toLocalTime
 import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl
 import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
-import org.jetbrains.kotlinx.dataframe.io.toDataFrame
 import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
+import org.jetbrains.kotlinx.dataframe.io.toDataFrame
 import java.math.BigDecimal
 import java.math.BigInteger
 import java.net.URL
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt
index 50d12b3db1..d2da7201f7 100644
--- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt
+++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt
@@ -31,7 +31,6 @@ import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
 import org.jetbrains.kotlinx.dataframe.columns.size
 import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
 import org.jetbrains.kotlinx.dataframe.hasNulls
-import org.jetbrains.kotlinx.dataframe.impl.asNullable
 import org.jetbrains.kotlinx.dataframe.impl.canParse
 import org.jetbrains.kotlinx.dataframe.impl.catchSilent
 import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/FastDoubleParser.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/FastDoubleParser.kt
index ad597e284a..815b404cb8 100644
--- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/FastDoubleParser.kt
+++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/FastDoubleParser.kt
@@ -5,7 +5,6 @@ import ch.randelshofer.fastdoubleparser.NumberFormatSymbols
 import io.github.oshai.kotlinlogging.KotlinLogging
 import org.jetbrains.kotlinx.dataframe.DataFrame
 import org.jetbrains.kotlinx.dataframe.api.ParserOptions
-import org.jetbrains.kotlinx.dataframe.api.parser
 import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
 import java.nio.charset.Charset
 import java.text.DecimalFormatSymbols
@@ -15,12 +14,6 @@ import java.util.Locale
 
 private val logger = KotlinLogging.logger {}
 
-// (lowercase) strings that are recognized to represent infinity and NaN in doubles in all locales
-private val INFINITIES = arrayOf("∞", "inf", "infinity", "infty")
-private val PLUS_INFINITIES = INFINITIES.map { "+$it" }
-private val MINUS_INFINITIES = INFINITIES.map { "-$it" }
-private val NANS = arrayOf("nan", "na", "n/a")
-
 /**
  * Parses a [String]/[CharSequence], [CharArray], or [ByteArray] into a [Double].
  *
@@ -28,6 +21,17 @@ private val NANS = arrayOf("nan", "na", "n/a")
  * fast double parser library, [FastDoubleParser](https://github.com/wrandelshofer/FastDoubleParser).
  * If not, or if it fails, it will use [NumberFormat] to parse the input.
  *
+ * The [locale][locale] used by the double parser is defined like:
+ *
+ *   [parserOptions][parserOptions]`?.`[locale][ParserOptions.locale]`  ?:  `[Parsers.locale][Parsers.locale]`  :?  `[Locale.getDefault()][Locale.getDefault]
+ *
+ * [FastDoubleParser] has a fallback mechanism; In practice, this means it can recognize symbols and notations
+ * of any locale recognized by Java as long as that symbol does not conflict with the given locale.
+ *
+ * For example, if your locale uses ',' as decimal separator, it will NOT recognize ',' as thousands separator,
+ * but it will recognize ' ', '٬', '_', ' ', etc. as such.
+ * The same holds for characters like "e", "inf", "×10^", "NaN", etc.
+ *
  * Public, so it can be used in other modules.
  *
  * @param parserOptions can be supplied to configure the parser.
@@ -41,106 +45,103 @@ public class FastDoubleParser(private val parserOptions: ParserOptions? = null)
 
     private val useFastDoubleParser = parserOptions?.useFastDoubleParser ?: Parsers.useFastDoubleParser
     private val locale = parserOptions?.locale ?: Parsers.locale
-    private val fallbackLocale = Locale.ROOT
-
-    private val localDecimalFormatSymbols = DecimalFormatSymbols.getInstance(locale)
-    private val fallbackDecimalFormatSymbols = DecimalFormatSymbols.getInstance(fallbackLocale)
 
     private val parser = ConfigurableDoubleParser(/* symbols = */ setupNumberFormatSymbols(), /* ignoreCase = */ true)
 
     /**
      * Sets up the [NumberFormatSymbols] for the [ConfigurableDoubleParser] based on
-     * [localDecimalFormatSymbols] with fallbacks from [fallbackDecimalFormatSymbols].
+     * the [locale] with fallbacks from all other locales.
      *
      * Fallback characters/strings are only added if they're not clashing with local characters/strings.
      */
-    private fun setupNumberFormatSymbols(): NumberFormatSymbols {
-        // collect all chars and strings that are locale-specific such that we can check whether
-        // fallback chars and strings are safe to add
-        val localChars = with(localDecimalFormatSymbols) {
-            buildSet {
-                add(decimalSeparator.lowercaseChar())
-                add(groupingSeparator.lowercaseChar())
-                add(minusSign.lowercaseChar())
-                add('+')
-                add(zeroDigit.lowercaseChar())
+    private fun setupNumberFormatSymbols(): NumberFormatSymbols =
+        numberFormatSymbolsCache.getOrPut(locale) {
+            val localDecimalFormatSymbols = DecimalFormatSymbols.getInstance(locale)
+
+            // collect all chars and strings that are locale-specific such that we can check whether
+            // fallback chars and strings are safe to add
+            val localChars = with(localDecimalFormatSymbols) {
+                buildSet {
+                    add(decimalSeparator.lowercaseChar())
+                    add(groupingSeparator.lowercaseChar())
+                    add(minusSign.lowercaseChar())
+                    add('+')
+                    // we don't include zeroDigit here, for notations like ×10^
+                }
             }
-        }
-        val localStrings = with(localDecimalFormatSymbols) {
-            buildSet {
-                add(exponentSeparator.lowercase())
-                add(infinity.lowercase())
-                add(naN.lowercase())
+            val localStrings = with(localDecimalFormatSymbols) {
+                buildSet {
+                    add(exponentSeparator.lowercase())
+                    add(infinity.lowercase())
+                    add(naN.lowercase())
+                }
             }
-        }
 
-        /**
-         * Builds a set with the specified char from [localDecimalFormatSymbols] and
-         * its fallback char from [fallbackDecimalFormatSymbols] if it's safe to do so.
-         * [additionals] will be added to the set too, when they're safe to add.
-         */
-        fun ((DecimalFormatSymbols) -> Char).fromLocalWithFallBack(vararg additionals: Char): Set<Char> =
-            buildSet {
-                val getChar = this@fromLocalWithFallBack
-                val char = getChar(localDecimalFormatSymbols).lowercaseChar()
-                add(char)
-
-                // add fallback char if it's safe to do so
-                val fallbackChar = getChar(fallbackDecimalFormatSymbols).lowercaseChar()
-                if (fallbackChar !in localChars && !localStrings.any { fallbackChar in it }) {
-                    add(fallbackChar)
-                }
+            /**
+             * Builds a set with the specified char from [this] and
+             * [fallbackChars] will be added to the set too, when they're safe to add.
+             */
+            fun Char.withFallback(fallbackChars: CharArray): Set<Char> =
+                buildSet {
+                    val char = this@withFallback.lowercaseChar()
+                    add(char)
 
-                // Fixes NBSP and other whitespace characters not being recognized if the user writes space instead.
-                if (char.isWhitespace()) add(' ')
+                    // Treat NBSP and other whitespace characters the same.
+                    if (char.isWhitespace()) addAll(WHITE_SPACES.asIterable())
 
-                // add additional chars if needed
-                for (additional in additionals) {
-                    val lowercase = additional.lowercaseChar()
-                    if (lowercase !in localChars && !localStrings.any { lowercase in it }) {
-                        add(lowercase)
+                    // add fallback chars if needed
+                    for (char in fallbackChars) {
+                        val lowercase = char.lowercaseChar()
+                        if (lowercase !in localChars && !localStrings.any { lowercase in it }) {
+                            add(lowercase)
+                        }
+
+                        // Treat NBSP and other whitespace characters the same.
+                        if (char.isWhitespace()) addAll(WHITE_SPACES.asIterable())
                     }
                 }
-            }
 
-        /**
-         * Builds a set with the specified string from [localDecimalFormatSymbols] and
-         * its fallback string from [fallbackDecimalFormatSymbols] if it's safe to do so.
-         * [additionals] will be added to the set too, when they're safe to add.
-         */
-        fun ((DecimalFormatSymbols) -> String).fromLocalWithFallBack(vararg additionals: String): Set<String> =
-            buildSet {
-                val getString = this@fromLocalWithFallBack
-                val string = getString(localDecimalFormatSymbols).lowercase()
-                add(string)
-
-                // add fallback string if it's safe to do so
-                val fallbackString = getString(fallbackDecimalFormatSymbols).lowercase()
-                if (!fallbackString.any { it in localChars } && fallbackString !in localStrings) {
-                    add(fallbackString)
-                }
+            /**
+             * Builds a set with the specified string from [this] and
+             * [fallbackStrings] will be added to the set too, when they're safe to add.
+             */
+            fun String.withFallback(fallbackStrings: Array<String>): Set<String> =
+                buildSet {
+                    val string = this@withFallback.lowercase()
+                    add(string)
+
+                    // Treat NBSP and other whitespace characters the same.
+                    if (string.isBlank()) addAll(WHITE_SPACES.map { it.toString() })
 
-                // Fixes NBSP and other whitespace characters not being recognized if the user writes space instead.
-                if (string.isBlank()) add(" ")
+                    // add fallback strings if needed
+                    for (string in fallbackStrings) {
+                        val lowercase = string.lowercase()
+                        if (!lowercase.any { it in localChars } && lowercase !in localStrings) {
+                            add(lowercase)
+                        }
 
-                // add additional strings if needed
-                for (additional in additionals) {
-                    val lowercase = additional.lowercase()
-                    if (!lowercase.any { it in localChars } && lowercase !in localStrings) {
-                        add(lowercase)
+                        // Treat NBSP and other whitespace characters the same.
+                        if (string.isBlank()) addAll(WHITE_SPACES.map { it.toString() })
                     }
                 }
-            }
 
-        return NumberFormatSymbols.fromDecimalFormatSymbols(localDecimalFormatSymbols)
-            .withPlusSign(setOf('+'))
-            .withDecimalSeparator(DecimalFormatSymbols::getDecimalSeparator.fromLocalWithFallBack())
-            .withGroupingSeparator(DecimalFormatSymbols::getGroupingSeparator.fromLocalWithFallBack())
-            .withExponentSeparator(DecimalFormatSymbols::getExponentSeparator.fromLocalWithFallBack())
-            .withMinusSign(DecimalFormatSymbols::getMinusSign.fromLocalWithFallBack())
-            .withInfinity(DecimalFormatSymbols::getInfinity.fromLocalWithFallBack(*INFINITIES))
-            .withNaN(DecimalFormatSymbols::getNaN.fromLocalWithFallBack(*NANS))
-    }
+            NumberFormatSymbols.fromDecimalFormatSymbols(localDecimalFormatSymbols)
+                .withPlusSign(
+                    setOf('+'),
+                ).withDecimalSeparator(
+                    localDecimalFormatSymbols.decimalSeparator.withFallback(DECIMAL_SEPARATORS),
+                ).withGroupingSeparator(
+                    localDecimalFormatSymbols.groupingSeparator.withFallback(GROUPING_SEPARATORS),
+                ).withExponentSeparator(
+                    localDecimalFormatSymbols.exponentSeparator.withFallback(EXPONENTS),
+                ).withMinusSign(
+                    localDecimalFormatSymbols.minusSign.withFallback(MINUS_SIGNS),
+                ).withInfinity(
+                    localDecimalFormatSymbols.infinity.withFallback(INFINITIES),
+                ).withNaN(
+                    localDecimalFormatSymbols.naN.withFallback(NANS),
+                )
+        }
 
     /** Fallback method for parsing doubles. */
     private fun String.parseToDoubleOrNullFallback(): Double? =
@@ -152,7 +153,7 @@ public class FastDoubleParser(private val parserOptions: ParserOptions? = null)
             in NANS -> Double.NaN
 
             else -> {
-                // not thread safe; must be created here
+                // NumberFormat is not thread safe; must be created in the function body
                 val numberFormat = NumberFormat.getInstance(locale)
                 val parsePosition = ParsePosition(0)
                 val result = numberFormat.parse(this, parsePosition)?.toDouble()
@@ -235,4 +236,49 @@ public class FastDoubleParser(private val parserOptions: ParserOptions? = null)
         }
         return String(chars = ca, offset = offset, length = length).parseToDoubleOrNullFallback()
     }
+
+    /**
+     * Here we store all possible decimal format symbols of all locales on the system.
+     * These will be used as fallbacks for the selected locale.
+     * They are only added by [withFallback] if they don't interfere with symbols already in the provided [locale]
+     * (so ',' is not added as grouping separator if '.' is already the locale's decimal separator).
+     */
+    internal companion object {
+        private val allDecimalFormatSymbols by lazy {
+            Locale.getAvailableLocales().map { DecimalFormatSymbols.getInstance(it) }
+        }
+        val MINUS_SIGNS by lazy {
+            allDecimalFormatSymbols.mapNotNullTo(mutableSetOf()) { it.minusSign }.toCharArray()
+        }
+        val INFINITIES by lazy {
+            allDecimalFormatSymbols.mapNotNullTo(mutableSetOf()) { it.infinity }
+                .plus(arrayOf("∞", "inf", "infinity", "infty"))
+                .toTypedArray()
+        }
+        val PLUS_INFINITIES by lazy { INFINITIES.map { "+$it" }.toTypedArray() }
+        val MINUS_INFINITIES by lazy {
+            INFINITIES.flatMap { inf -> MINUS_SIGNS.map { min -> min + inf } }.toTypedArray()
+        }
+        val NANS by lazy {
+            allDecimalFormatSymbols.mapNotNullTo(mutableSetOf()) { it.naN }
+                .plus(arrayOf("nan", "na", "n/a"))
+                .toTypedArray()
+        }
+        val WHITE_SPACES = charArrayOf(' ', '\u00A0', '\u2009', '\u202F', '\t')
+        val GROUPING_SEPARATORS by lazy {
+            allDecimalFormatSymbols.mapNotNullTo(mutableSetOf()) { it.groupingSeparator }
+                .plus(arrayOf('\'', '˙', *WHITE_SPACES.toTypedArray()))
+                .toCharArray()
+        }
+        val DECIMAL_SEPARATORS by lazy {
+            allDecimalFormatSymbols.flatMapTo(mutableSetOf()) {
+                listOfNotNull(it.decimalSeparator, it.monetaryDecimalSeparator)
+            }.plus(arrayOf('·', '⎖'))
+                .toCharArray()
+        }
+        val EXPONENTS by lazy {
+            allDecimalFormatSymbols.mapNotNullTo(mutableSetOf()) { it.exponentSeparator }.toTypedArray()
+        }
+        val numberFormatSymbolsCache = mutableMapOf<Locale, NumberFormatSymbols>()
+    }
 }
diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt
index 6d0f06f72f..553fac5961 100644
--- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt
+++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt
@@ -184,10 +184,10 @@ class ParserTests {
             columnComma.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(12345.0, 67890.0)
             columnMixed.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(12.345, 67890.0)
 
-            // uses fallback to ROOT locale
+            // uses fallback mechanism
             columnDot.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
             columnComma.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
-            // uses fallback to ROOT locale
+            // uses fallback mechanism
             columnMixed.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
 
             // --------------------------------------------------------------------------------
@@ -210,10 +210,10 @@ class ParserTests {
             columnComma.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(12345.0, 67890.0)
             columnMixed.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(12.345, 67890.0)
 
-            // uses fallback to ROOT locale
+            // uses fallback mechanism
             columnDot.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
             columnComma.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
-            // uses fallback to ROOT locale
+            // uses fallback mechanism
             columnMixed.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
 
             // --------------------------------------------------------------------------------
@@ -222,33 +222,141 @@ class ParserTests {
 
             columnDot.convertTo<Double>() shouldBe columnOf(12.345, 67.89)
             columnComma.convertTo<Double>() shouldBe columnOf(12.345, 67.89)
-            // uses fallback to ROOT locale
+            // uses fallback mechanism
             columnMixed.convertTo<Double>() shouldBe columnOf(12.345, 67.89)
 
             columnDot.convertTo<Double?>() shouldBe columnOf(12.345, 67.89)
             columnComma.convertTo<Double?>() shouldBe columnOf(12.345, 67.89)
-            // uses fallback to ROOT locale
+            // uses fallback mechanism
             columnMixed.convertTo<Double?>() shouldBe columnOf(12.345, 67.89)
 
             columnDot.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(12.345, 67.89)
             columnComma.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(12.345, 67.89)
-            // uses fallback to ROOT locale
+            // uses fallback mechanism
             columnMixed.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(12.345, 67.89)
 
             columnDot.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(12.345, 67.89)
             columnComma.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(12345.0, 67890.0)
             columnMixed.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(12.345, 67890.0)
 
-            // uses fallback to ROOT locale
+            // uses fallback mechanism
             columnDot.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
             columnComma.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
-            // uses fallback to ROOT locale
+            // uses fallback mechanism
             columnMixed.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(12.345, 67.89)
         } finally {
             Locale.setDefault(systemLocale)
         }
     }
 
+    @Test
+    fun `converting String to Double in different locales with NBSP grouping`() {
+        val systemLocale = Locale.getDefault()
+        try {
+            // Test 45 behaviour combinations:
+
+            // 3 source columns
+            val columnDot = columnOf("123 456.789", "0 987 654.321")
+            val columnComma = columnOf("123 456,789", "0 987 654,321")
+            val columnMixed = columnOf(
+                "123 456.789",
+                "0'987 654,321", // note the use of two different thousands grouping characters
+            )
+            // *
+            // (3 locales as converting parameter + original converting + original converting to nullable)
+            val parsingLocaleNotDefined: Locale? = null // takes parserOptions.locale ?: Locale.getDefault()
+            // uses dot as decimal separator, comma as grouping separator
+            val parsingLocaleUsesDot: Locale = Locale.forLanguageTag("en-US")
+            // uses comma as decimal separator, NBSP as grouping separator
+            val parsingLocaleUsesComma: Locale = Locale.forLanguageTag("ru-RU")
+            // *
+            // 3 system locales
+            // --------------------------------------------------------------------------------
+
+            Locale.setDefault(Locale.forLanguageTag("C.UTF-8"))
+
+            columnDot.convertTo<Double>() shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertTo<Double>() shouldBe columnOf(123_456_789.0, 987_654_321.0)
+            columnMixed.convertTo<Double>() shouldBe columnOf(123_456.789, 987_654_321.0)
+
+            columnDot.convertTo<Double?>() shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertTo<Double?>() shouldBe columnOf(123_456_789.0, 987_654_321.0)
+            columnMixed.convertTo<Double?>() shouldBe columnOf(123_456.789, 987_654_321.0)
+
+            columnDot.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(123_456_789.0, 987_654_321.0)
+            columnMixed.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(123_456.789, 987_654_321.0)
+
+            columnDot.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(123_456_789.0, 987_654_321.0)
+            columnMixed.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(123_456.789, 987_654_321.0)
+
+            // uses fallback mechanism
+            columnDot.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(123_456.789, 987_654.321)
+            // uses fallback mechanism
+            columnMixed.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(123_456.789, 987_654.321)
+
+            // --------------------------------------------------------------------------------
+
+            Locale.setDefault(Locale.forLanguageTag("en-US"))
+
+            columnDot.convertTo<Double>() shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertTo<Double>() shouldBe columnOf(123_456_789.0, 987_654_321.0)
+            columnMixed.convertTo<Double>() shouldBe columnOf(123_456.789, 987_654_321.0)
+
+            columnDot.convertTo<Double?>() shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertTo<Double?>() shouldBe columnOf(123_456_789.0, 987_654_321.0)
+            columnMixed.convertTo<Double?>() shouldBe columnOf(123_456.789, 987_654_321.0)
+
+            columnDot.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(123_456_789.0, 987_654_321.0)
+            columnMixed.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(123_456.789, 987_654_321.0)
+
+            columnDot.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(123_456_789.0, 987_654_321.0)
+            columnMixed.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(123_456.789, 987_654_321.0)
+
+            // uses fallback mechanism
+            columnDot.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(123_456.789, 987_654.321)
+            // uses fallback mechanism
+            columnMixed.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(123_456.789, 987_654.321)
+
+            // --------------------------------------------------------------------------------
+
+            Locale.setDefault(Locale.forLanguageTag("ru-RU"))
+
+            columnDot.convertTo<Double>() shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertTo<Double>() shouldBe columnOf(123_456.789, 987_654.321)
+            // uses fallback mechanism
+            columnMixed.convertTo<Double>() shouldBe columnOf(123_456.789, 987_654.321)
+
+            columnDot.convertTo<Double?>() shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertTo<Double?>() shouldBe columnOf(123_456.789, 987_654.321)
+            // uses fallback mechanism
+            columnMixed.convertTo<Double?>() shouldBe columnOf(123_456.789, 987_654.321)
+
+            columnDot.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(123_456.789, 987_654.321)
+            // uses fallback mechanism
+            columnMixed.convertToDouble(parsingLocaleNotDefined) shouldBe columnOf(123_456.789, 987_654.321)
+
+            columnDot.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(123_456.789, 987_654.321)
+            // parses correctly but may be surprising
+            columnComma.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(123_456_789.0, 987_654_321.0)
+            columnMixed.convertToDouble(parsingLocaleUsesDot) shouldBe columnOf(123_456.789, 987_654_321.0)
+
+            // uses fallback mechanism
+            columnDot.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(123_456.789, 987_654.321)
+            columnComma.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(123_456.789, 987_654.321)
+            // uses fallback mechanism
+            columnMixed.convertToDouble(parsingLocaleUsesComma) shouldBe columnOf(123_456.789, 987_654.321)
+        } finally {
+            Locale.setDefault(systemLocale)
+        }
+    }
+
     @Test
     fun `converting String to Double in different locales with comma grouping`() {
         val systemLocale = Locale.getDefault()
@@ -258,7 +366,10 @@ class ParserTests {
             // 3 source columns
             val columnDot = columnOf("123,456.789", "0,987,654.321")
             val columnComma = columnOf("123.456,789", "0.987.654,321")
-            val columnMixed = columnOf("123,456.789", "0.987.654,321")
+            val columnMixed = columnOf(
+                "123,456.789",
+                "0'987.654,321", // note the use of two different thousands grouping characters
+            )
             // *
             // (3 locales as converting parameter + original converting + original converting to nullable)
             val parsingLocaleNotDefined: Locale? = null // takes parserOptions.locale ?: Locale.getDefault()

From 1d12cba1a3e768abeb589b8ca3ba51262379cd72 Mon Sep 17 00:00:00 2001
From: Jolan Rensen <jolan.rensen@jetbrains.com>
Date: Tue, 11 Feb 2025 13:53:50 +0100
Subject: [PATCH 5/6] updated docs regarding double parsing

---
 docs/StardustDocs/topics/convert.md |  2 +-
 docs/StardustDocs/topics/parse.md   | 71 +++++++++++++++++++++++++++--
 2 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/docs/StardustDocs/topics/convert.md b/docs/StardustDocs/topics/convert.md
index bd7e2088e5..6cbe4e8e5f 100644
--- a/docs/StardustDocs/topics/convert.md
+++ b/docs/StardustDocs/topics/convert.md
@@ -44,7 +44,7 @@ df.convert { name }.asFrame { it.add("fullName") { "$firstName $lastName" } }
 * `Int` (and `Char`)
 * `Long`
 * `Float`
-* `Double`
+* `Double` (See [parsing doubles](parse.md#parsing-doubles) for `String` to `Double` conversion)
 * `BigDecimal`
 * `BigInteger`
 * `LocalDateTime` (kotlinx.datetime and java.time)
diff --git a/docs/StardustDocs/topics/parse.md b/docs/StardustDocs/topics/parse.md
index a8dbd5806e..30e70fd1f4 100644
--- a/docs/StardustDocs/topics/parse.md
+++ b/docs/StardustDocs/topics/parse.md
@@ -5,6 +5,10 @@ Returns a [`DataFrame`](DataFrame.md) in which the given `String` columns are pa
 
 This is a special case of the [convert](convert.md) operation.
 
+This parsing operation is sometimes executed implicitly, for example, when [reading from CSV](read.md) or
+[type converting from `String` columns](convert.md).
+You can recognize this by the `locale` or `parserOptions` arguments in these functions.
+
 <!---FUN parseAll-->
 
 ```kotlin
@@ -25,6 +29,8 @@ df.parse { age and weight }
 <dataFrame src="org.jetbrains.kotlinx.dataframe.samples.api.Modify.parseSome.html"/>
 <!---END-->
 
+### Parsing Order
+
 `parse` tries to parse every `String` column into one of supported types in the following order:
 * `Int`
 * `Long`
@@ -34,16 +40,30 @@ df.parse { age and weight }
 * `Duration` (`kotlin.time` and `java.time`)
 * `LocalTime` (`java.time`)
 * `URL` (`java.net`)
-* `Double` (with optional locale settings)
+* [`Double` (with optional locale settings)](#parsing-doubles)
 * `Boolean`
 * `BigDecimal`
 * `JSON` (arrays and objects)
 
+### Parser Options
+
+DataFrame supports multiple parser options that can be used to customize the parsing behavior.
+These can be supplied to the `parse` function (or any other function that can implicitly parse `Strings`)
+as an argument:
+
 Available parser options:
-* `locale: Locale` is used to parse doubles
+* `locale: Locale` is used to [parse doubles](#parsing-doubles)
+  * Default locale is `Locale.getDefault()`
 * `dateTimePattern: String` is used to parse date and time
 * `dateTimeFormatter: DateTimeFormatter` is used to parse date and time
-* `nullStrings: List<String>` is used to treat particular strings as `null` value. Default null strings are **"null"** and **"NULL"**
+* `nullStrings: List<String>` is used to treat particular strings as `null` value
+  * Default null strings are **"null"** and **"NULL"**
+  * When [reading from CSV](read.md), we include even more defaults, like **""**, and **"NA"**.
+  See the KDocs there for the exact details
+* `skipTypes: Set<KType>` types that should be skipped during parsing
+  * Empty set by default; parsing can result in any supported type
+* `useFastDoubleParser: Boolean` is used to enable or disable the [new fast double parser](#parsing-doubles)
+  * Enabled by default
 
 <!---FUN parseWithOptions-->
 
@@ -54,8 +74,13 @@ df.parse(options = ParserOptions(locale = Locale.CHINA, dateTimeFormatter = Date
 <dataFrame src="org.jetbrains.kotlinx.dataframe.samples.api.Modify.parseWithOptions.html"/>
 <!---END-->
 
+### Global Parser Options
+
 You can also set global parser options that will be used by default in [`read`](read.md), [`convert`](convert.md),
-and `parse` operations:
+and other `parse` operations.
+These can be seen as a global fallback for the `parserOptions` argument.
+
+For example, to change the locale to French and add a custom date-time pattern:
 
 <!---FUN globalParserOptions-->
 
@@ -64,4 +89,42 @@ DataFrame.parser.locale = Locale.FRANCE
 DataFrame.parser.addDateTimePattern("dd.MM.uuuu HH:mm:ss")
 ```
 
+This means that the locale being used by the parser is defined as:
+
+↪ The locale given as function argument directly, or in `parserOptions`, if it is not `null`, else
+
+&nbsp;&nbsp;&nbsp;&nbsp;↪ The locale set by `DataFrame.parser.locale = ...`, if it is not `null`, else
+
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;↪ `Locale.getDefault()`, which is the system's default locale that can be changed with `Locale.setDefault()`.
+
+### Parsing Doubles
+
+DataFrame has a new fast and powerful double parser enabled by default.
+It is based on [the FastDoubleParser library](https://github.com/wrandelshofer/FastDoubleParser) for its
+high performance and configurability
+(in the future, we might expand this support to `Float`, `BigDecimal`, and `BigInteger` as well).
+
+The parser is locale-aware; it will use the locale set by the [parser options](#parser-options) to parse the doubles.
+It also has a fallback mechanism built in, meaning it can recognize characters from
+all other locales (and some from [Wikipedia](https://en.wikipedia.org/wiki/Decimal_separator))
+and parse them correctly as long as they don't conflict with the current locale.
+
+For example, if your locale uses ',' as decimal separator, it will not recognize ',' as thousands separator, but it will
+recognize ''', ' ', '٬', '_', ' ', etc. as such.
+The same holds for characters like "e", "inf", "×10^", "NaN", etc. (ignoring case).
+
+This means you can safely parse `"123'456 789,012.345×10^6"` with a US locale but not `"1.234,5"`.
+
+Aside from this, DataFrame also explicitly recognizes "∞", "inf", "infinity", and "infty" as `Double.POSITIVE_INFINITY`
+(as well as their negative counterparts), "nan", "na", and "n/a" as `Double.NaN`,
+and all forms of whitespace are treated equally.
+
+If `FastDoubleParser` fails to parse a `String` as `Double`, DataFrame will try
+to parse it using the standard `NumberFormat.parse()` function as a last resort.
+
+If you experience any issues with the new parser, you can turn it off by setting
+`useFastDoubleParser = false`, which will use the old `NumberFormat.parse()` function instead.
+
+Please [report](https://github.com/Kotlin/dataframe/issues) any issues you encounter. 
+
 <!---END-->

From 08570f91ead6287aff7fcaeea4d5d6c3da130cd9 Mon Sep 17 00:00:00 2001
From: Jolan Rensen <jolan.rensen@jetbrains.com>
Date: Wed, 12 Feb 2025 14:26:36 +0100
Subject: [PATCH 6/6] small clarification of parsing docs

---
 docs/StardustDocs/topics/parse.md | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/docs/StardustDocs/topics/parse.md b/docs/StardustDocs/topics/parse.md
index 30e70fd1f4..7a4460b071 100644
--- a/docs/StardustDocs/topics/parse.md
+++ b/docs/StardustDocs/topics/parse.md
@@ -49,21 +49,26 @@ df.parse { age and weight }
 
 DataFrame supports multiple parser options that can be used to customize the parsing behavior.
 These can be supplied to the `parse` function (or any other function that can implicitly parse `Strings`)
-as an argument:
+as an argument.
+
+For each option you don't supply (or supply `null`) DataFrame will take the value from the
+[Global Parser Options](#global-parser-options).
 
 Available parser options:
 * `locale: Locale` is used to [parse doubles](#parsing-doubles)
-  * Default locale is `Locale.getDefault()`
+  * Global default locale is `Locale.getDefault()`
 * `dateTimePattern: String` is used to parse date and time
+  * Global default supports ISO (local) date-time
 * `dateTimeFormatter: DateTimeFormatter` is used to parse date and time
+  * Is derived from `dateTimePattern` and/or `locale` if `null`
 * `nullStrings: List<String>` is used to treat particular strings as `null` value
-  * Default null strings are **"null"** and **"NULL"**
+  * Global default null strings are **"null"** and **"NULL"**
   * When [reading from CSV](read.md), we include even more defaults, like **""**, and **"NA"**.
   See the KDocs there for the exact details
 * `skipTypes: Set<KType>` types that should be skipped during parsing
-  * Empty set by default; parsing can result in any supported type
+  * Empty set by global default; parsing can result in any supported type
 * `useFastDoubleParser: Boolean` is used to enable or disable the [new fast double parser](#parsing-doubles)
-  * Enabled by default
+  * Enabled by global default
 
 <!---FUN parseWithOptions-->
 
@@ -76,11 +81,12 @@ df.parse(options = ParserOptions(locale = Locale.CHINA, dateTimeFormatter = Date
 
 ### Global Parser Options
 
-You can also set global parser options that will be used by default in [`read`](read.md), [`convert`](convert.md),
-and other `parse` operations.
-These can be seen as a global fallback for the `parserOptions` argument.
+As mentioned before, you can change the default global parser options that will be used by [`read`](read.md),
+[`convert`](convert.md), and other `parse` operations.
+Whenever you don't explicitly provide [parser options](#parser-options) to a function call,
+DataFrame will use these global options instead.
 
-For example, to change the locale to French and add a custom date-time pattern:
+For example, to change the locale to French and add a custom date-time pattern for all following DataFrame calls, do:
 
 <!---FUN globalParserOptions-->
 
@@ -89,7 +95,7 @@ DataFrame.parser.locale = Locale.FRANCE
 DataFrame.parser.addDateTimePattern("dd.MM.uuuu HH:mm:ss")
 ```
 
-This means that the locale being used by the parser is defined as:
+For `locale`, this means that the one being used by the parser is defined as:
 
 ↪ The locale given as function argument directly, or in `parserOptions`, if it is not `null`, else
 
@@ -104,7 +110,8 @@ It is based on [the FastDoubleParser library](https://github.com/wrandelshofer/F
 high performance and configurability
 (in the future, we might expand this support to `Float`, `BigDecimal`, and `BigInteger` as well).
 
-The parser is locale-aware; it will use the locale set by the [parser options](#parser-options) to parse the doubles.
+The parser is locale-aware; it will use the locale set by the
+[(global)](#global-parser-options) [parser options](#parser-options) to parse the doubles.
 It also has a fallback mechanism built in, meaning it can recognize characters from
 all other locales (and some from [Wikipedia](https://en.wikipedia.org/wiki/Decimal_separator))
 and parse them correctly as long as they don't conflict with the current locale.