Skip to content

Commit f588127

Browse files
committed
added double parsing test for all locales and streamlined behavior for "-" in RTL languages with NumberFormat
1 parent 57d492f commit f588127

File tree

2 files changed

+72
-0
lines changed

2 files changed

+72
-0
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/FastDoubleParser.kt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ public class FastDoubleParser(private val parserOptions: ParserOptions) {
4343

4444
private val parser = ConfigurableDoubleParser(/* symbols = */ setupNumberFormatSymbols(), /* ignoreCase = */ true)
4545

46+
// Fix for Java 8 RTL languages minus sign not being recognized
47+
private val minusSignIsFormatSymbol =
48+
Character.getType(localDecimalFormatSymbols.minusSign) == Character.FORMAT.toInt()
49+
4650
/**
4751
* Sets up the [NumberFormatSymbols] for the [ConfigurableDoubleParser] based on
4852
* [localDecimalFormatSymbols] with fallbacks from [fallbackDecimalFormatSymbols].
@@ -177,6 +181,17 @@ public class FastDoubleParser(private val parserOptions: ParserOptions) {
177181
): Double? {
178182
if (parserOptions.useFastDoubleParser && charset in supportedFastCharsets) {
179183
try {
184+
// Fixes RTL minus sign not being recognized
185+
if (minusSignIsFormatSymbol && ba.toString(charset).startsWith(localDecimalFormatSymbols.minusSign)) {
186+
val localMinusSize = localDecimalFormatSymbols.minusSign.toString().toByteArray(charset).size
187+
val fallbackMinusSize = fallbackDecimalFormatSymbols.minusSign.toString().toByteArray(charset).size
188+
val newOffset = (localMinusSize - fallbackMinusSize).coerceAtLeast(0)
189+
val newBa = ba.copyOf()
190+
fallbackDecimalFormatSymbols.minusSign.toString().toByteArray(charset)
191+
.copyInto(destination = newBa, destinationOffset = newOffset)
192+
193+
return parser.parseDouble(newBa, newOffset, length - newOffset)
194+
}
180195
return parser.parseDouble(ba, offset, length)
181196
} catch (e: Exception) {
182197
logger.debug(e) {
@@ -199,6 +214,15 @@ public class FastDoubleParser(private val parserOptions: ParserOptions) {
199214
public fun parseOrNull(cs: CharSequence): Double? {
200215
if (parserOptions.useFastDoubleParser) {
201216
try {
217+
// Fixes RTL minus sign not being recognized
218+
if (minusSignIsFormatSymbol && cs.startsWith(localDecimalFormatSymbols.minusSign)) {
219+
val newCs = cs.toString().replaceFirst(
220+
localDecimalFormatSymbols.minusSign,
221+
fallbackDecimalFormatSymbols.minusSign,
222+
)
223+
return parser.parseDouble(newCs)
224+
}
225+
202226
return parser.parseDouble(cs)
203227
} catch (e: Exception) {
204228
logger.debug(e) {
@@ -219,6 +243,12 @@ public class FastDoubleParser(private val parserOptions: ParserOptions) {
219243
public fun parseOrNull(ca: CharArray, offset: Int = 0, length: Int = ca.size): Double? {
220244
if (parserOptions.useFastDoubleParser) {
221245
try {
246+
// Fixes RTL minus sign not being recognized.
247+
if (minusSignIsFormatSymbol && ca.firstOrNull() == localDecimalFormatSymbols.minusSign) {
248+
val newCa = ca.copyOf()
249+
newCa[0] = fallbackDecimalFormatSymbols.minusSign
250+
return parser.parseDouble(newCa, offset, length)
251+
}
222252
return parser.parseDouble(ca, offset, length)
223253
} catch (e: Exception) {
224254
logger.debug(e) {

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/FastDoubleParserTests.kt

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
66
import org.junit.After
77
import org.junit.Before
88
import org.junit.Test
9+
import java.text.NumberFormat
910
import java.util.Locale
1011

1112
private const val LOG_LEVEL = "org.slf4j.simpleLogger.defaultLogLevel"
@@ -160,4 +161,45 @@ class FastDoubleParserTests {
160161
// ByteArray
161162
numbers.map { parser.parseOrNull(it.toByteArray()) }.shouldContainInOrder(expectedDoubles)
162163
}
164+
165+
@Test
166+
fun `fast parse any locale`() {
167+
val locales = Locale.getAvailableLocales()
168+
val doubles = listOf(
169+
12.45,
170+
-12.45,
171+
100_123.35,
172+
-204_235.23,
173+
1.234e3,
174+
-345.122,
175+
0.0,
176+
Double.POSITIVE_INFINITY,
177+
Double.NEGATIVE_INFINITY,
178+
Double.NaN,
179+
)
180+
181+
for (locale in locales) {
182+
val parser = FastDoubleParser(ParserOptions(locale = locale, useFastDoubleParser = true))
183+
val formatter = NumberFormat.getInstance(locale)
184+
for (double in doubles) {
185+
val formatted = formatter.format(double)
186+
val parsedByNumberFormatter = formatter.parse(formatted)?.toDouble()
187+
188+
val parsedString = parser.parseOrNull(formatted)
189+
assert(double == parsedString || double.isNaN() && parsedString?.isNaN() == true) {
190+
"Failed to parse $formatted with locale $locale. Expected $double, got $parsedString. NumberFormat parsed it like: $parsedByNumberFormatter"
191+
}
192+
193+
val parsedCharArray = parser.parseOrNull(formatted.toCharArray())
194+
assert(double == parsedCharArray || double.isNaN() && parsedCharArray?.isNaN() == true) {
195+
"Failed to parse $formatted with locale $locale. Expected $double, got $parsedCharArray. NumberFormat parsed it like: $parsedByNumberFormatter"
196+
}
197+
198+
val parsedByteArray = parser.parseOrNull(formatted.toByteArray())
199+
assert(double == parsedByteArray || double.isNaN() && parsedByteArray?.isNaN() == true) {
200+
"Failed to parse $formatted with locale $locale. Expected $double, got $parsedByteArray. NumberFormat parsed it like: $parsedByNumberFormatter"
201+
}
202+
}
203+
}
204+
}
163205
}

0 commit comments

Comments
 (0)