Skip to content
Open
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
b8bbed8
Add JMH benchmarks for datetime format creation and performance evalu…
DmitryNekrasov Nov 12, 2025
7fcb699
Introduce `ConcatenatedListView` for efficient list concatenation in …
DmitryNekrasov Nov 12, 2025
941e426
Remove redundant type parameter `<T>` in `ParserStructure.simplify` m…
DmitryNekrasov Nov 12, 2025
2e096a6
Add unconditionalModifications after each step of simplification
DmitryNekrasov Nov 18, 2025
4ad707b
Rename `unconditionalModificationsForTails` to `unconditionalModifica…
DmitryNekrasov Nov 18, 2025
d4a6b02
Refactor `ParserStructure` logic to use `buildList` for streamlined l…
DmitryNekrasov Nov 18, 2025
970068c
Refactor `ParserStructure` instantiation to improve formatting and re…
DmitryNekrasov Nov 18, 2025
8f2bdeb
Reorder `unconditionalModifications` placement in `ParserStructure`.
DmitryNekrasov Nov 18, 2025
c35a73d
Passes all tests and it has benchmarc score increase on Python dateti…
DmitryNekrasov Nov 18, 2025
27bd65c
Refactor `ParserStructure` logic to extract `mergeOperations` for cle…
DmitryNekrasov Nov 18, 2025
b6e80f8
Refactor `ParserStructure.concat` logic to simplify `mergeOperations`…
DmitryNekrasov Nov 18, 2025
82ffb49
Add missing newline
DmitryNekrasov Nov 18, 2025
93accde
Rename `operations` to `mergedOperations`.
DmitryNekrasov Nov 18, 2025
17d0bb9
Simplify the ` mergedTails ` condition.
DmitryNekrasov Nov 19, 2025
6e7d053
Reorder and simplify `unconditionalModifications` handling in `Parser…
DmitryNekrasov Nov 19, 2025
9ce4e77
Refactor `ParserStructure` logic to streamline `mergedOperations` con…
DmitryNekrasov Nov 19, 2025
82e6929
Fix typo in comment: "number consumers" → "number of consumers".
DmitryNekrasov Nov 19, 2025
64ea6ef
Refactor `mergedOperations` construction to reuse `operationsToMerge`…
DmitryNekrasov Nov 19, 2025
d97ef89
Refactor `PythonDateTimeFormatBenchmark` into `CommonFormats`, update…
DmitryNekrasov Nov 19, 2025
423b3b3
Add benchmark for building four-digit UTC offset format
DmitryNekrasov Nov 19, 2025
e14c8fb
Add benchmark for building RFC 1123 DateTime format
DmitryNekrasov Nov 19, 2025
6aa9898
Add benchmark for building ISO DateTime with offset format
DmitryNekrasov Nov 19, 2025
0c5a7a6
Refactor RFC 1123 and UTC offset format benchmarks to use inline form…
DmitryNekrasov Nov 19, 2025
81266f4
Remove `ConcatenatedListView` as it is no longer in use.
DmitryNekrasov Nov 19, 2025
6095101
Remove `SerialFormatBenchmark` as it is no longer in use.
DmitryNekrasov Nov 19, 2025
28c36e6
Update `ParallelFormatBenchmark` warmup and measurement parameters.
DmitryNekrasov Nov 19, 2025
edd5d29
Update copyright year range in `Parser.kt` header.
DmitryNekrasov Nov 19, 2025
cc93dc0
Optimize concatenation of flat parsers.
DmitryNekrasov Nov 20, 2025
096f970
Refactor `Parser` to streamline handling of accumulated operations.
DmitryNekrasov Nov 20, 2025
61a5b06
Add `SerialFormatBenchmark` for evaluating large format serialization…
DmitryNekrasov Nov 20, 2025
d334373
Refactor `Parser` to replace `drop(1)` with an explicit loop for merg…
DmitryNekrasov Nov 21, 2025
31dfbe5
Refactor `Parser` to apply `unconditionalModifications` after process…
DmitryNekrasov Nov 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 167 additions & 0 deletions benchmarks/src/jmh/kotlin/CommonFormats.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
/*
* Copyright 2019-2025 JetBrains s.r.o. and contributors.
* Use of this source code is governed by the Apache 2.0 License that can be found in the LICENSE.txt file.
*/

@file:Suppress("unused")

package kotlinx.datetime

import kotlinx.datetime.format.*
import org.openjdk.jmh.annotations.*
import org.openjdk.jmh.infra.Blackhole
import java.util.concurrent.*

@Warmup(iterations = 20, time = 2)
@Measurement(iterations = 30, time = 2)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Benchmark)
@Fork(2)
open class CommonFormats {

@Benchmark
fun buildPythonDateTimeFormat(blackhole: Blackhole) {
val v = LocalDateTime.Format {
year()
char('-')
monthNumber()
char('-')
day()
char(' ')
hour()
char(':')
minute()
optional {
char(':')
second()
optional {
char('.')
secondFraction()
}
}
}
blackhole.consume(v)
}

@Benchmark
fun buildIsoDateTimeFormat(blackhole: Blackhole) {
val format = LocalDateTime.Format {
date(LocalDate.Format {
year()
char('-')
monthNumber()
char('-')
day()
})
alternativeParsing({ char('t') }) { char('T') }
time(LocalTime.Format {
hour()
char(':')
minute()
alternativeParsing({}) {
char(':')
second()
optional {
char('.')
secondFraction(1, 9)
}
}
})
}
blackhole.consume(format)
}

@Benchmark
fun buildFourDigitsUtcOffsetFormat(blackhole: Blackhole) {
val format = UtcOffset.Format {
offsetHours()
offsetMinutesOfHour()
}
blackhole.consume(format)
}

@Benchmark
fun buildRfc1123DateTimeFormat(blackhole: Blackhole) {
val format = DateTimeComponents.Format {
alternativeParsing({
// the day of week may be missing
}) {
dayOfWeek(DayOfWeekNames.ENGLISH_ABBREVIATED)
chars(", ")
}
day(Padding.NONE)
char(' ')
monthName(MonthNames.ENGLISH_ABBREVIATED)
char(' ')
year()
char(' ')
hour()
char(':')
minute()
optional {
char(':')
second()
}
chars(" ")
alternativeParsing({
chars("UT")
}, {
chars("Z")
}) {
optional("GMT") {
offset(UtcOffset.Format {
offsetHours()
offsetMinutesOfHour()
})
}
}
}
blackhole.consume(format)
}

@Benchmark
fun buildIsoDateTimeOffsetFormat(blackhole: Blackhole) {
val format = DateTimeComponents.Format {
date(LocalDate.Format {
year()
char('-')
monthNumber()
char('-')
day()
})
alternativeParsing({
char('t')
}) {
char('T')
}
hour()
char(':')
minute()
char(':')
second()
optional {
char('.')
secondFraction(1, 9)
}
alternativeParsing({
offsetHours()
}) {
offset(UtcOffset.Format {
alternativeParsing({ chars("z") }) {
optional("Z") {
offsetHours()
char(':')
offsetMinutesOfHour()
optional {
char(':')
offsetSecondsOfMinute()
}
}
}
})
}
}
blackhole.consume(format)
}
}
88 changes: 88 additions & 0 deletions benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* Copyright 2019-2025 JetBrains s.r.o. and contributors.
* Use of this source code is governed by the Apache 2.0 License that can be found in the LICENSE.txt file.
*/

@file:Suppress("unused")

package kotlinx.datetime

import kotlinx.datetime.format.alternativeParsing
import kotlinx.datetime.format.char
import org.openjdk.jmh.annotations.*
import org.openjdk.jmh.infra.Blackhole
import java.util.concurrent.TimeUnit

@Warmup(iterations = 10, time = 2)
@Measurement(iterations = 20, time = 2)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Benchmark)
@Fork(1)
open class ParallelFormatBenchmark {

@Param("2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")
var n = 0

@Benchmark
fun formatCreationWithAlternativeParsing(blackhole: Blackhole) {
val format = LocalDateTime.Format {
repeat(n) {
alternativeParsing(
{ monthNumber() },
{ day() },
primaryFormat = { hour() }
)
char('@')
minute()
char('#')
second()
}
}
blackhole.consume(format)
}

@Benchmark
fun formatCreationWithNestedAlternativeParsing(blackhole: Blackhole) {
val format = LocalDateTime.Format {
repeat(n) { index ->
alternativeParsing(
{ monthNumber(); char('-'); day() },
{ day(); char('/'); monthNumber() },
primaryFormat = { year(); char('-'); monthNumber(); char('-'); day() }
)

if (index and 1 == 0) {
alternativeParsing(
{
alternativeParsing(
{ hour(); char(':'); minute() },
{ minute(); char(':'); second() },
primaryFormat = { hour(); char(':'); minute(); char(':'); second() }
)
},
primaryFormat = {
year(); char('-'); monthNumber(); char('-'); day()
char('T')
hour(); char(':'); minute(); char(':'); second()
}
)
}

char('|')
if (index % 3 == 0) {
char('|')
}

if (index and 2 == 0) {
alternativeParsing(
{ char('Z') },
{ char('+'); hour(); char(':'); minute() },
primaryFormat = { char('-'); hour(); char(':'); minute() }
)
}
}
}
blackhole.consume(format)
}
}
82 changes: 44 additions & 38 deletions core/common/src/internal/format/parser/Parser.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2023 JetBrains s.r.o. and contributors.
* Copyright 2023-2025 JetBrains s.r.o. and contributors.
* Use of this source code is governed by the Apache 2.0 License that can be found in the LICENSE.txt file.
*/

Expand Down Expand Up @@ -41,19 +41,40 @@ internal class ParserStructure<in Output>(
"${operations.joinToString(", ")}(${followedBy.joinToString(";")})"
}

// TODO: O(size of the resulting parser ^ 2), but can be O(size of the resulting parser)
internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
fun <T> ParserStructure<T>.append(other: ParserStructure<T>): ParserStructure<T> = if (followedBy.isEmpty()) {
ParserStructure(operations + other.operations, other.followedBy)
} else {
ParserStructure(operations, followedBy.map { it.append(other) })
fun mergeOperations(
baseOperations: List<ParserOperation<T>>,
numberSpan: List<NumberConsumer<T>>?,
unconditionalModifications: List<UnconditionalModification<T>>,
simplifiedParserStructure: ParserStructure<T>,
): ParserStructure<T> {
val operationsToMerge = simplifiedParserStructure.operations
val firstOperation = operationsToMerge.firstOrNull()
val mergedOperations = buildList {
addAll(baseOperations)
when {
numberSpan == null -> {
addAll(operationsToMerge)
}
firstOperation is NumberSpanParserOperation -> {
add(NumberSpanParserOperation(numberSpan + firstOperation.consumers))
addAll(operationsToMerge.drop(1))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

drop(1) creates an additional new list, so I'd expect a manual iteration over the indices here instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

}
else -> {
add(NumberSpanParserOperation(numberSpan))
addAll(operationsToMerge)
}
}
addAll(unconditionalModifications)
}
return ParserStructure(mergedOperations, simplifiedParserStructure.followedBy)
}

fun <T> ParserStructure<T>.simplify(unconditionalModifications: List<UnconditionalModification<T>>): ParserStructure<T> {
fun ParserStructure<T>.simplifyAndAppend(other: ParserStructure<T>): ParserStructure<T> {
val newOperations = mutableListOf<ParserOperation<T>>()
var currentNumberSpan: MutableList<NumberConsumer<T>>? = null
val unconditionalModificationsForTails = unconditionalModifications.toMutableList()
// joining together the number consumers in this parser before the first alternative;
val unconditionalModifications = mutableListOf<UnconditionalModification<T>>()
// joining together the number of consumers in this parser before the first alternative;
// collecting the unconditional modifications to push them to the end of all the parser's branches.
for (op in operations) {
if (op is NumberSpanParserOperation) {
Expand All @@ -63,7 +84,7 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
currentNumberSpan = op.consumers.toMutableList()
}
} else if (op is UnconditionalModification) {
unconditionalModificationsForTails.add(op)
unconditionalModifications.add(op)
} else {
if (currentNumberSpan != null) {
newOperations.add(NumberSpanParserOperation(currentNumberSpan))
Expand All @@ -72,8 +93,9 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
newOperations.add(op)
}
}

val mergedTails = followedBy.flatMap {
val simplified = it.simplify(unconditionalModificationsForTails)
val simplified = it.simplifyAndAppend(other)
// parser `ParserStructure(emptyList(), p)` is equivalent to `p`,
// unless `p` is empty. For example, ((a|b)|(c|d)) is equivalent to (a|b|c|d).
// As a special case, `ParserStructure(emptyList(), emptyList())` represents a parser that recognizes an empty
Expand All @@ -83,46 +105,30 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
else
listOf(simplified)
}.ifEmpty {
// preserving the invariant that `mergedTails` contains all unconditional modifications
listOf(ParserStructure(unconditionalModificationsForTails, emptyList()))
if (other.operations.isNotEmpty()) {
return mergeOperations(newOperations, currentNumberSpan, unconditionalModifications, other)
}
other.followedBy
}

return if (currentNumberSpan == null) {
// the last operation was not a number span, or it was a number span that we are allowed to interrupt
newOperations.addAll(unconditionalModifications)
ParserStructure(newOperations, mergedTails)
} else if (mergedTails.none {
it.operations.firstOrNull()?.let { it is NumberSpanParserOperation } == true
}) {
} else if (mergedTails.none { it.operations.firstOrNull() is NumberSpanParserOperation }) {
// the last operation was a number span, but there are no alternatives that start with a number span.
newOperations.add(NumberSpanParserOperation(currentNumberSpan))
newOperations.addAll(unconditionalModifications)
ParserStructure(newOperations, mergedTails)
} else {
val newTails = mergedTails.map {
when (val firstOperation = it.operations.firstOrNull()) {
is NumberSpanParserOperation -> {
ParserStructure(
listOf(NumberSpanParserOperation(currentNumberSpan + firstOperation.consumers)) + it.operations.drop(
1
),
it.followedBy
)
}

null -> ParserStructure(
listOf(NumberSpanParserOperation(currentNumberSpan)),
it.followedBy
)

else -> ParserStructure(
listOf(NumberSpanParserOperation(currentNumberSpan)) + it.operations,
it.followedBy
)
}
mergeOperations(emptyList(), currentNumberSpan, unconditionalModifications, it)
}
ParserStructure(newOperations, newTails)
}
}
val naiveParser = foldRight(ParserStructure<T>(emptyList(), emptyList())) { parser, acc -> parser.append(acc) }
return naiveParser.simplify(emptyList())

return foldRight(ParserStructure(emptyList(), emptyList())) { parser, acc -> parser.simplifyAndAppend(acc) }
}

internal interface Copyable<Self> {
Expand Down