Skip to content

Commit 27b52ca

Browse files
committed
Implement the general machinery for parsing entities from code
1 parent 90bf21f commit 27b52ca

File tree

4 files changed

+790
-0
lines changed

4 files changed

+790
-0
lines changed
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/*
2+
* Copyright 2023 JetBrains s.r.o. and contributors.
3+
* Use of this source code is governed by the Apache 2.0 License that can be found in the LICENSE.txt file.
4+
*/
5+
6+
package kotlinx.datetime.internal.format.parser
7+
8+
import kotlinx.datetime.internal.POWERS_OF_TEN
9+
import kotlinx.datetime.internal.DecimalFraction
10+
11+
/**
12+
* A parser that expects to receive a string consisting of [length] digits, or, if [length] is `null`,
13+
* a string consisting of any number of digits.
14+
*/
15+
internal sealed class NumberConsumer<in Receiver>(
16+
/** The number of digits to consume. `null` means that the length is variable. */
17+
open val length: Int?,
18+
/** The human-readable name of the entity being parsed here. */
19+
val whatThisExpects: String
20+
) {
21+
/**
22+
* Wholly consumes the given [input]. Should be called with a string consisting of [length] digits, or,
23+
* if [length] is `null`, with a string consisting of any number of digits. [consume] itself does not
24+
* necessarily check the length of the input string, instead expecting to be passed a valid one.
25+
*
26+
* Returns `null` on success and a `NumberConsumptionError` on failure.
27+
*/
28+
abstract fun consume(storage: Receiver, input: String): NumberConsumptionError?
29+
}
30+
31+
internal interface NumberConsumptionError {
32+
fun errorMessage(): String
33+
object ExpectedInt: NumberConsumptionError {
34+
override fun errorMessage() = "expected an Int value"
35+
}
36+
class TooManyDigits(val maxDigits: Int): NumberConsumptionError {
37+
override fun errorMessage() = "expected at most $maxDigits digits"
38+
}
39+
class TooFewDigits(val minDigits: Int): NumberConsumptionError {
40+
override fun errorMessage() = "expected at least $minDigits digits"
41+
}
42+
class WrongConstant(val expected: String): NumberConsumptionError {
43+
override fun errorMessage() = "expected '$expected'"
44+
}
45+
class Conflicting(val conflicting: Any): NumberConsumptionError {
46+
override fun errorMessage() = "attempted to overwrite the existing value '$conflicting'"
47+
}
48+
}
49+
50+
/**
51+
* A parser that accepts an [Int] value in range from `0` to [Int.MAX_VALUE].
52+
*/
53+
// TODO: should the parser reject excessive padding?
54+
internal class UnsignedIntConsumer<in Receiver>(
55+
private val minLength: Int?,
56+
private val maxLength: Int?,
57+
private val setter: AssignableField<Receiver, Int>,
58+
name: String,
59+
private val multiplyByMinus1: Boolean = false,
60+
) : NumberConsumer<Receiver>(if (minLength == maxLength) minLength else null, name) {
61+
62+
init {
63+
require(length == null || length in 1..9) { "Invalid length for field $whatThisExpects: $length" }
64+
}
65+
66+
override fun consume(storage: Receiver, input: String): NumberConsumptionError? = when {
67+
maxLength != null && input.length > maxLength -> NumberConsumptionError.TooManyDigits(maxLength)
68+
minLength != null && input.length < minLength -> NumberConsumptionError.TooFewDigits(minLength)
69+
else -> when (val result = input.toIntOrNull()) {
70+
null -> NumberConsumptionError.ExpectedInt
71+
else -> setter.setWithoutReassigning(storage, if (multiplyByMinus1) -result else result)
72+
}
73+
}
74+
}
75+
76+
internal class ReducedIntConsumer<in Receiver>(
77+
override val length: Int,
78+
private val setter: AssignableField<Receiver, Int>,
79+
name: String,
80+
val base: Int,
81+
): NumberConsumer<Receiver>(length, name) {
82+
83+
private val modulo = POWERS_OF_TEN[length]
84+
private val baseMod = base % modulo
85+
private val baseFloor = base - baseMod
86+
87+
override fun consume(storage: Receiver, input: String): NumberConsumptionError? = when (val result = input.toIntOrNull()) {
88+
null -> NumberConsumptionError.ExpectedInt
89+
else -> setter.setWithoutReassigning(storage, if (result >= baseMod) {
90+
baseFloor + result
91+
} else {
92+
baseFloor + modulo + result
93+
})
94+
}
95+
}
96+
97+
/**
98+
* A parser that consumes exactly the string [expected].
99+
*/
100+
internal class ConstantNumberConsumer<in Receiver>(
101+
private val expected: String
102+
) : NumberConsumer<Receiver>(expected.length, "the predefined string $expected") {
103+
override fun consume(storage: Receiver, input: String): NumberConsumptionError? = if (input == expected) {
104+
null
105+
} else {
106+
NumberConsumptionError.WrongConstant(expected)
107+
}
108+
}
109+
110+
internal class FractionPartConsumer<in Receiver>(
111+
private val minLength: Int?,
112+
private val maxLength: Int?,
113+
private val setter: AssignableField<Receiver, DecimalFraction>,
114+
name: String,
115+
) : NumberConsumer<Receiver>(if (minLength == maxLength) minLength else null, name) {
116+
init {
117+
require(minLength == null || minLength in 1..9) { "Invalid length for field $whatThisExpects: $length" }
118+
// TODO: bounds on maxLength
119+
}
120+
121+
override fun consume(storage: Receiver, input: String): NumberConsumptionError? = when {
122+
minLength != null && input.length < minLength -> NumberConsumptionError.TooFewDigits(minLength)
123+
maxLength != null && input.length > maxLength -> NumberConsumptionError.TooManyDigits(maxLength)
124+
else -> when (val numerator = input.toIntOrNull()) {
125+
null -> NumberConsumptionError.TooManyDigits(9)
126+
else -> setter.setWithoutReassigning(storage, DecimalFraction(numerator, input.length))
127+
}
128+
}
129+
}
130+
131+
private fun <Object, Type> AssignableField<Object, Type>.setWithoutReassigning(
132+
receiver: Object,
133+
value: Type,
134+
): NumberConsumptionError? {
135+
val conflictingValue = trySetWithoutReassigning(receiver, value) ?: return null
136+
return NumberConsumptionError.Conflicting(conflictingValue)
137+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
* Copyright 2019-2022 JetBrains s.r.o. and contributors.
3+
* Use of this source code is governed by the Apache 2.0 License that can be found in the LICENSE.txt file.
4+
*/
5+
6+
package kotlinx.datetime.internal.format.parser
7+
8+
import kotlin.jvm.JvmInline
9+
10+
@JvmInline
11+
internal value class ParseResult private constructor(val value: Any) {
12+
companion object {
13+
fun Ok(indexOfNextUnparsed: Int) = ParseResult(indexOfNextUnparsed)
14+
fun Error(position: Int, message: () -> String) =
15+
ParseResult(ParseError(position, message))
16+
}
17+
18+
inline fun<T> match(onSuccess: (Int) -> T, onFailure: (ParseError) -> T): T =
19+
when (value) {
20+
is Int -> onSuccess(value)
21+
is ParseError -> onFailure(value)
22+
else -> error("Unexpected parse result: $value")
23+
}
24+
}
25+
26+
internal class ParseError(val position: Int, val message: () -> String)
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
/*
2+
* Copyright 2023 JetBrains s.r.o. and contributors.
3+
* Use of this source code is governed by the Apache 2.0 License that can be found in the LICENSE.txt file.
4+
*/
5+
6+
package kotlinx.datetime.internal.format.parser
7+
8+
import kotlin.jvm.JvmInline
9+
10+
/**
11+
* Describes the commands that the parser must execute, in two portions:
12+
* * [operations], which are executed in order, and
13+
* * [followedBy], which are executed *in parallel* after [operations].
14+
*
15+
* An example of a [ParserStructure]:
16+
* ```
17+
* // number - dash - number - dash - number
18+
* // |
19+
* // \
20+
* // letter 'W' - number
21+
* ParserStructure(
22+
* listOf(numberParser),
23+
* listOf(
24+
* ParserStructure(
25+
* listOf(stringParser("-"), numberParser, stringParser("-"), numberParser),
26+
* emptyList()
27+
* ),
28+
* ParserStructure(
29+
* listOf(stringParser("W"), numberParser),
30+
* emptyList()
31+
* ),
32+
* )
33+
* )
34+
* ```
35+
*/
36+
internal class ParserStructure<in Output>(
37+
val operations: List<ParserOperation<Output>>,
38+
val followedBy: List<ParserStructure<Output>>,
39+
) {
40+
override fun toString(): String =
41+
"${operations.joinToString(", ")}(${followedBy.joinToString(";")})"
42+
}
43+
44+
// TODO: O(size of the resulting parser ^ 2), but can be O(size of the resulting parser)
45+
internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
46+
fun <T> ParserStructure<T>.append(other: ParserStructure<T>): ParserStructure<T> = if (followedBy.isEmpty()) {
47+
ParserStructure(operations + other.operations, other.followedBy)
48+
} else {
49+
ParserStructure(operations, followedBy.map { it.append(other) })
50+
}
51+
fun <T> ParserStructure<T>.simplify(): ParserStructure<T> {
52+
val newOperations = mutableListOf<ParserOperation<T>>()
53+
var currentNumberSpan: MutableList<NumberConsumer<T>>? = null
54+
// joining together the number consumers in this parser before the first alternative
55+
for (op in operations) {
56+
if (op is NumberSpanParserOperation) {
57+
if (currentNumberSpan != null) {
58+
currentNumberSpan.addAll(op.consumers)
59+
} else {
60+
currentNumberSpan = op.consumers.toMutableList()
61+
}
62+
} else {
63+
if (currentNumberSpan != null) {
64+
newOperations.add(NumberSpanParserOperation(currentNumberSpan))
65+
currentNumberSpan = null
66+
}
67+
newOperations.add(op)
68+
}
69+
}
70+
val mergedTails = followedBy.flatMap {
71+
val simplified = it.simplify()
72+
// parser `ParserStructure(emptyList(), p)` is equivalent to `p`,
73+
// unless `p` is empty. For example, ((a|b)|(c|d)) is equivalent to (a|b|c|d).
74+
// As a special case, `ParserStructure(emptyList(), emptyList())` represents a parser that recognizes an empty
75+
// string. For example, (|a|b) is not equivalent to (a|b).
76+
if (simplified.operations.isEmpty())
77+
simplified.followedBy.ifEmpty { listOf(simplified) }
78+
else
79+
listOf(simplified)
80+
}
81+
return if (currentNumberSpan == null) {
82+
// the last operation was not a number span, or it was a number span that we are allowed to interrupt
83+
ParserStructure(newOperations, mergedTails)
84+
} else if (mergedTails.none {
85+
it.operations.firstOrNull()?.let { it is NumberSpanParserOperation } == true
86+
}) {
87+
// the last operation was a number span, but there are no alternatives that start with a number span.
88+
newOperations.add(NumberSpanParserOperation(currentNumberSpan))
89+
ParserStructure(newOperations, mergedTails)
90+
} else {
91+
val newTails = mergedTails.map {
92+
when (val firstOperation = it.operations.firstOrNull()) {
93+
is NumberSpanParserOperation -> {
94+
ParserStructure(
95+
listOf(NumberSpanParserOperation(currentNumberSpan + firstOperation.consumers)) + it.operations.drop(
96+
1
97+
),
98+
it.followedBy
99+
)
100+
}
101+
102+
null -> ParserStructure(
103+
listOf(NumberSpanParserOperation(currentNumberSpan)),
104+
it.followedBy
105+
)
106+
107+
else -> ParserStructure(
108+
listOf(NumberSpanParserOperation(currentNumberSpan)) + it.operations,
109+
it.followedBy
110+
)
111+
}
112+
}
113+
ParserStructure(newOperations, newTails)
114+
}
115+
}
116+
val naiveParser = foldRight(ParserStructure<T>(emptyList(), emptyList())) { parser, acc -> parser.append(acc) }
117+
return naiveParser.simplify()
118+
}
119+
120+
internal interface Copyable<Self> {
121+
fun copy(): Self
122+
}
123+
124+
@JvmInline
125+
internal value class Parser<Output: Copyable<Output>>(
126+
private val commands: ParserStructure<Output>
127+
) {
128+
/**
129+
* [startIndex] is the index of the first character that is not yet consumed.
130+
*
131+
* [allowDanglingInput] determines whether the match is only successful if the whole string after [startIndex]
132+
* is consumed.
133+
*
134+
* [onSuccess] is invoked as soon as some parsing attempt succeeds.
135+
* [onError] is invoked when some parsing attempt fails.
136+
*/
137+
// Would be a great place to use the `Flow` from `kotlinx.coroutines` here instead of `onSuccess` and
138+
// `onError`, but alas.
139+
private inline fun parse(
140+
input: CharSequence,
141+
startIndex: Int,
142+
initialContainer: Output,
143+
allowDanglingInput: Boolean,
144+
onError: (ParseError) -> Unit,
145+
onSuccess: (Int, Output) -> Unit
146+
) {
147+
val parseOptions = mutableListOf(ParserState(initialContainer, commands, startIndex))
148+
iterate_over_alternatives@while (true) {
149+
val state = parseOptions.removeLastOrNull() ?: break
150+
val output = state.output.copy()
151+
var inputPosition = state.inputPosition
152+
val parserStructure = state.parserStructure
153+
run parse_one_alternative@{
154+
for (ix in parserStructure.operations.indices) {
155+
parserStructure.operations[ix].consume(output, input, inputPosition).match(
156+
{ inputPosition = it },
157+
{
158+
onError(it)
159+
return@parse_one_alternative // continue@iterate_over_alternatives, if that were supported
160+
}
161+
)
162+
}
163+
if (parserStructure.followedBy.isEmpty()) {
164+
if (allowDanglingInput || inputPosition == input.length) {
165+
onSuccess(inputPosition, output)
166+
} else {
167+
onError(ParseError(inputPosition) { "There is more input to consume" })
168+
}
169+
} else {
170+
for (ix in parserStructure.followedBy.indices.reversed()) {
171+
parseOptions.add(ParserState(output, parserStructure.followedBy[ix], inputPosition))
172+
}
173+
}
174+
}
175+
}
176+
}
177+
178+
fun match(input: CharSequence, initialContainer: Output, startIndex: Int = 0): Output {
179+
val errors = mutableListOf<ParseError>()
180+
parse(input, startIndex, initialContainer, allowDanglingInput = false, { errors.add(it) }, { _, out -> return@match out })
181+
errors.sortByDescending { it.position }
182+
// `errors` can not be empty because each parser will have (successes + failures) >= 1, and here, successes == 0
183+
ParseException(errors.first()).let {
184+
for (error in errors.drop(1)) {
185+
it.addSuppressed(ParseException(error))
186+
}
187+
throw it
188+
}
189+
}
190+
191+
fun matchOrNull(input: CharSequence, initialContainer: Output, startIndex: Int = 0): Output? {
192+
parse(input, startIndex, initialContainer, allowDanglingInput = false, { }, { _, out -> return@matchOrNull out })
193+
return null
194+
}
195+
196+
private class ParserState<Output>(
197+
val output: Output,
198+
val parserStructure: ParserStructure<Output>,
199+
val inputPosition: Int,
200+
)
201+
}
202+
203+
internal class ParseException(error: ParseError) : Exception("Position ${error.position}: ${error.message()}")

0 commit comments

Comments
 (0)