|
1 | 1 | package org.jetbrains.kotlinx.dataframe.io
|
2 | 2 |
|
| 3 | +import io.kotest.assertions.throwables.shouldThrow |
3 | 4 | import io.kotest.matchers.shouldBe
|
4 | 5 | import kotlinx.datetime.LocalDateTime
|
5 | 6 | import org.jetbrains.kotlinx.dataframe.DataColumn
|
6 | 7 | import org.jetbrains.kotlinx.dataframe.DataFrame
|
7 | 8 | import org.jetbrains.kotlinx.dataframe.api.cast
|
8 | 9 | import org.jetbrains.kotlinx.dataframe.api.columnOf
|
9 | 10 | import org.jetbrains.kotlinx.dataframe.api.convertTo
|
| 11 | +import org.jetbrains.kotlinx.dataframe.api.convertToDouble |
10 | 12 | import org.jetbrains.kotlinx.dataframe.api.parse
|
11 | 13 | import org.jetbrains.kotlinx.dataframe.api.parser
|
12 | 14 | import org.jetbrains.kotlinx.dataframe.api.tryParse
|
@@ -77,18 +79,73 @@ class ParserTests {
|
77 | 79 | fun `converting String to Double in different locales`() {
|
78 | 80 | val currentLocale = Locale.getDefault()
|
79 | 81 | try {
|
80 |
| - val stringValues = listOf("1", "2.3", "4,5") |
81 |
| - val stringColumn = DataColumn.createValueColumn("nums", stringValues, typeOf<String>()) |
82 |
| - Locale.setDefault(Locale.forLanguageTag("ru-RU")) |
83 |
| - // Use comma as local decimal separator and dot as fallback default (as it is used in POSIX/C.UTF-8) |
84 |
| - stringColumn.convertTo<Double>().shouldBe( |
85 |
| - DataColumn.createValueColumn("nums", listOf(1.0, 2.3, 4.5), typeOf<Double>()) |
86 |
| - ) |
| 82 | + // Test 36 behaviour combinations: |
| 83 | + |
| 84 | + // 3 source columns |
| 85 | + val columnDot = columnOf("12.345", "67.890") |
| 86 | + val columnComma = columnOf("12,345", "67,890") |
| 87 | + val columnMixed = columnOf("12.345", "67,890") |
| 88 | + // * |
| 89 | + // (3 locales as converting parameter + original converting) |
| 90 | + val parsingLocaleNotDefined: Locale? = null |
| 91 | + val parsingLocaleUsesDot: Locale = Locale.forLanguageTag("en-US") |
| 92 | + val parsingLocaleUsesComma: Locale = Locale.forLanguageTag("ru-RU") |
| 93 | + // * |
| 94 | + // 3 system locales |
| 95 | + |
| 96 | + Locale.setDefault(Locale.forLanguageTag("C.UTF-8")) |
| 97 | + |
| 98 | + columnDot.convertTo<Double>().shouldBe(columnOf(12.345, 67.89)) |
| 99 | + columnComma.convertTo<Double>().shouldBe(columnOf(12345.0, 67890.0)) |
| 100 | + columnMixed.convertTo<Double>().shouldBe(columnOf(12.345, 67890.0)) |
| 101 | + |
| 102 | + columnDot.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12.345, 67.89)) |
| 103 | + columnComma.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12345.0, 67890.0)) |
| 104 | + columnMixed.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12.345, 67890.0)) |
| 105 | + |
| 106 | + columnDot.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12.345, 67.89)) |
| 107 | + columnComma.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12345.0, 67890.0)) |
| 108 | + columnMixed.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12.345, 67890.0)) |
| 109 | + |
| 110 | + shouldThrow<TypeConversionException> { columnDot.convertToDouble(parsingLocaleUsesComma) } |
| 111 | + columnComma.convertToDouble(parsingLocaleUsesComma).shouldBe(columnOf(12.345, 67.89)) |
| 112 | + shouldThrow<TypeConversionException> { columnMixed.convertToDouble(parsingLocaleUsesComma) } |
| 113 | + |
87 | 114 | Locale.setDefault(Locale.forLanguageTag("en-US"))
|
88 |
| - // Use dot as local decimal separator. Comma is ignored (as it is group separator in this locale). |
89 |
| - stringColumn.convertTo<Double>().shouldBe( |
90 |
| - DataColumn.createValueColumn("nums", listOf(1.0, 2.3, 45.0), typeOf<Double>()) |
91 |
| - ) |
| 115 | + |
| 116 | + columnDot.convertTo<Double>().shouldBe(columnOf(12.345, 67.89)) |
| 117 | + columnComma.convertTo<Double>().shouldBe(columnOf(12345.0, 67890.0)) |
| 118 | + columnMixed.convertTo<Double>().shouldBe(columnOf(12.345, 67890.0)) |
| 119 | + |
| 120 | + columnDot.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12.345, 67.89)) |
| 121 | + columnComma.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12345.0, 67890.0)) |
| 122 | + columnMixed.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12.345, 67890.0)) |
| 123 | + |
| 124 | + columnDot.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12.345, 67.89)) |
| 125 | + columnComma.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12345.0, 67890.0)) |
| 126 | + columnMixed.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12.345, 67890.0)) |
| 127 | + |
| 128 | + shouldThrow<TypeConversionException> { columnDot.convertToDouble(parsingLocaleUsesComma) } |
| 129 | + columnComma.convertToDouble(parsingLocaleUsesComma).shouldBe(columnOf(12.345, 67.89)) |
| 130 | + shouldThrow<TypeConversionException> { columnMixed.convertToDouble(parsingLocaleUsesComma) } |
| 131 | + |
| 132 | + Locale.setDefault(Locale.forLanguageTag("ru-RU")) |
| 133 | + |
| 134 | + columnDot.convertTo<Double>().shouldBe(columnOf(12.345, 67.89)) |
| 135 | + columnComma.convertTo<Double>().shouldBe(columnOf(12345.0, 67890.0)) |
| 136 | + columnMixed.convertTo<Double>().shouldBe(columnOf(12.345, 67890.0)) |
| 137 | + |
| 138 | + columnDot.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12.345, 67.89)) |
| 139 | + columnComma.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12.345, 67.89)) |
| 140 | + columnMixed.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12.345, 67890.0)) |
| 141 | + |
| 142 | + columnDot.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12.345, 67.89)) |
| 143 | + columnComma.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12345.0, 67890.0)) |
| 144 | + columnMixed.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12.345, 67890.0)) |
| 145 | + |
| 146 | + shouldThrow<TypeConversionException> { columnDot.convertToDouble(parsingLocaleUsesComma) } |
| 147 | + columnComma.convertToDouble(parsingLocaleUsesComma).shouldBe(columnOf(12.345, 67.89)) |
| 148 | + shouldThrow<TypeConversionException> { columnMixed.convertToDouble(parsingLocaleUsesComma) } |
92 | 149 | } finally {
|
93 | 150 | Locale.setDefault(currentLocale)
|
94 | 151 | }
|
|
0 commit comments