Skip to content

Commit 8655adc

Browse files
committed
set useFastDoubleParser parser option to true by default and updated KDocs
1 parent 8cc6c00 commit 8655adc

File tree

6 files changed

+29
-17
lines changed

6 files changed

+29
-17
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import org.jetbrains.kotlinx.dataframe.impl.api.toLocalTime
3737
import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl
3838
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
3939
import org.jetbrains.kotlinx.dataframe.io.toDataFrame
40+
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
4041
import java.math.BigDecimal
4142
import java.math.BigInteger
4243
import java.net.URL
@@ -223,8 +224,8 @@ public fun DataColumn<String>.convertToDouble(locale: Locale? = null): DataColum
223224
* @include [DataColumnStringConvertToDoubleDoc]
224225
* @param nullStrings a set of strings that should be treated as `null` values.
225226
* The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
226-
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
227-
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
227+
* @param useFastDoubleParser whether to use [FastDoubleParser].
228+
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `true`.
228229
*/
229230
@JvmName("convertToDoubleFromString")
230231
public fun DataColumn<String>.convertToDouble(
@@ -243,8 +244,8 @@ public fun DataColumn<String?>.convertToDouble(locale: Locale? = null): DataColu
243244
* @include [DataColumnStringConvertToDoubleDoc]
244245
* @param nullStrings a set of strings that should be treated as `null` values.
245246
* The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
246-
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
247-
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
247+
* @param useFastDoubleParser whether to use [FastDoubleParser].
248+
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `true`.
248249
*/
249250
@JvmName("convertToDoubleFromStringNullable")
250251
public fun DataColumn<String?>.convertToDouble(

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
1111
import org.jetbrains.kotlinx.dataframe.impl.api.StringParser
1212
import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl
1313
import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl
14+
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
1415
import org.jetbrains.kotlinx.dataframe.io.readCSV
1516
import org.jetbrains.kotlinx.dataframe.typeClass
1617
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS
@@ -45,6 +46,12 @@ public fun <T, C> DataFrame<T>.parse(vararg columns: ColumnReference<C>, options
4546
public fun <T, C> DataFrame<T>.parse(vararg columns: KProperty<C>, options: ParserOptions? = null): DataFrame<T> =
4647
parse(options) { columns.toColumnSet() }
4748

49+
/**
50+
* Global counterpart of [ParserOptions].
51+
* Settings changed here will affect the defaults for all parsing operations.
52+
*
53+
* The default values are set by [Parsers.resetToDefault].
54+
*/
4855
public interface GlobalParserOptions {
4956

5057
public fun addDateTimePattern(pattern: String)
@@ -54,7 +61,7 @@ public interface GlobalParserOptions {
5461
/** This function can be called to skip some types. Parsing will be attempted for all other types. */
5562
public fun addSkipType(type: KType)
5663

57-
/** Whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now. */
64+
/** Whether to use [FastDoubleParser], defaults to `true`. Please report any issues you encounter. */
5865
public var useFastDoubleParser: Boolean
5966

6067
public fun resetToDefault()
@@ -91,7 +98,7 @@ public interface GlobalParserOptions {
9198
* `["null", "NULL", "NA", "N/A"]`.
9299
* @param skipTypes a set of types that should be skipped during parsing. Parsing will be attempted for all other types.
93100
* By default, it's an empty set. To skip all types except a specified one, use [convertTo] instead.
94-
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
101+
* @param useFastDoubleParser whether to use [FastDoubleParser], defaults to `true`. Please report any issues you encounter.
95102
*/
96103
public class ParserOptions(
97104
public val locale: Locale? = null,

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,13 @@ import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
2525
import org.jetbrains.kotlinx.dataframe.api.isFrameColumn
2626
import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf
2727
import org.jetbrains.kotlinx.dataframe.api.map
28+
import org.jetbrains.kotlinx.dataframe.api.parser
2829
import org.jetbrains.kotlinx.dataframe.api.to
2930
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
3031
import org.jetbrains.kotlinx.dataframe.columns.size
3132
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
3233
import org.jetbrains.kotlinx.dataframe.hasNulls
34+
import org.jetbrains.kotlinx.dataframe.impl.asNullable
3335
import org.jetbrains.kotlinx.dataframe.impl.canParse
3436
import org.jetbrains.kotlinx.dataframe.impl.catchSilent
3537
import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType
@@ -47,6 +49,7 @@ import java.time.format.DateTimeFormatterBuilder
4749
import java.time.temporal.Temporal
4850
import java.time.temporal.TemporalQuery
4951
import java.util.Locale
52+
import kotlin.properties.Delegates
5053
import kotlin.reflect.KClass
5154
import kotlin.reflect.KType
5255
import kotlin.reflect.full.withNullability
@@ -114,6 +117,13 @@ internal class StringParserWithFormat<T>(
114117
}
115118
}
116119

120+
/**
121+
* Central implementation for [GlobalParserOptions].
122+
*
123+
* Can be obtained by a user by calling [DataFrame.parser][DataFrame.Companion.parser].
124+
*
125+
* Defaults are set by [resetToDefault].
126+
*/
117127
internal object Parsers : GlobalParserOptions {
118128

119129
private val formatters: MutableList<DateTimeFormatter> = mutableListOf()
@@ -140,7 +150,7 @@ internal object Parsers : GlobalParserOptions {
140150
skipTypesSet.add(type)
141151
}
142152

143-
override var useFastDoubleParser: Boolean = false
153+
override var useFastDoubleParser by Delegates.notNull<Boolean>()
144154

145155
private var _locale: Locale? = null
146156

@@ -165,7 +175,7 @@ internal object Parsers : GlobalParserOptions {
165175
.toFormatter()
166176
.let { formatters.add(it) }
167177

168-
useFastDoubleParser = false
178+
useFastDoubleParser = true
169179
_locale = null
170180
nullStrings.addAll(listOf("null", "NULL", "NA", "N/A"))
171181
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/FastDoubleParser.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ private val NANS = arrayOf("nan", "na", "n/a")
2424
/**
2525
* Parses a [String]/[CharSequence], [CharArray], or [ByteArray] into a [Double].
2626
*
27-
* If [ParserOptions.useFastDoubleParser] is enabled, it will try to parse the input with an _EXPERIMENTAL_
28-
* fast double parser, [FastDoubleParser](https://github.com/wrandelshofer/FastDoubleParser).
27+
* If [ParserOptions.useFastDoubleParser] is enabled, it will try to parse the input with the
28+
* fast double parser library, [FastDoubleParser](https://github.com/wrandelshofer/FastDoubleParser).
2929
* If not, or if it fails, it will use [NumberFormat] to parse the input.
3030
*
3131
* Public, so it can be used in other modules.

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,6 @@ internal object DelimParams {
138138
* ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
139139
*
140140
* The only exceptions are:
141-
* - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`,
142-
* regardless of the global setting.
143141
* - [nullStrings][ParserOptions.nullStrings], which, if `null`,
144142
* will take the global setting + {@include [DefaultNullStringsContentLink]}.
145143
* - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses] to

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,7 @@ internal fun readDelimImpl(
112112
): DataFrame<*> {
113113
// set up the csv specs
114114
val csvSpecs = with(CsvSpecs.builder()) {
115-
// turn on fast double parser if not explicitly set regardless of the global parser options
116-
@Suppress("NullableBooleanElvis")
117-
val adjustedParserOptions = (parserOptions ?: ParserOptions())
118-
.copy(useFastDoubleParser = parserOptions?.useFastDoubleParser ?: true)
119-
customDoubleParser(DataFrameCustomDoubleParser(adjustedParserOptions))
115+
customDoubleParser(DataFrameCustomDoubleParser(parserOptions))
120116

121117
// use the given nullStrings if provided, else take the global ones + some extras
122118
val nullStrings = parserOptions?.nullStrings ?: (DataFrame.parser.nulls + DEFAULT_DELIM_NULL_STRINGS)

0 commit comments

Comments
 (0)