Skip to content

Commit d01535a

Browse files
committed
Merge pull request 'fix/name-token-index-escaping' (!84) from fix/name-token-index-escaping into dev
Reviewed-on: https://git.sciprog.center/kscience/dataforge-core/pulls/84 Reviewed-by: Alexander Nozik <[email protected]>
2 parents 1e86fe4 + a4e57d9 commit d01535a

File tree

7 files changed

+171
-42
lines changed

7 files changed

+171
-42
lines changed

dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/ioMisc.kt

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public inline fun ByteArray(block: Sink.() -> Unit): ByteArray =
3030
public inline fun Binary(block: Sink.() -> Unit): Binary =
3131
ByteArray(block).asBinary()
3232

33-
public operator fun Binary.get(range: IntRange): Binary = view(range.first, range.last - range.first)
33+
public operator fun Binary.get(range: IntRange): Binary = view(range.first, range.last - range.first + 1)
3434

3535
/**
3636
* Return inferred [EnvelopeFormat] if only one format could read given file. If no format accepts the binary, return null. If
@@ -128,21 +128,23 @@ public fun Source.readWithSeparatorTo(
128128
val byte = readByte()
129129
counter++
130130
if (counter >= atMost) error("Maximum number of bytes to be read $atMost reached.")
131+
val flushedByte = if (rb.isFull()) rb[0] else null
131132
rb.push(byte)
132133
if (rb.contentEquals(separator)) {
133134
return counter
134-
} else if (rb.isFull()) {
135-
output?.writeByte(rb[0])
135+
} else if (flushedByte != null) {
136+
output?.writeByte(flushedByte)
136137
}
137138
}
138139

139140
if (errorOnEof) {
140141
error("Read to the end of input without encountering ${separator.decodeToString()}")
141142
} else {
142-
for (i in 1 until rb.size) {
143-
output?.writeByte(rb[i])
143+
if (output != null) {
144+
for (i in 0 until rb.size) {
145+
output.writeByte(rb[i])
146+
}
144147
}
145-
counter += (rb.size - 1)
146148
return counter
147149
}
148150
}

dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/IOTest.kt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package space.kscience.dataforge.io
22

3+
import kotlinx.io.Buffer
34
import kotlinx.io.buffered
45
import kotlinx.io.bytestring.encodeToByteString
56
import kotlinx.io.readByteArray
@@ -55,4 +56,28 @@ class IOTest {
5556
}
5657

5758
}
59+
60+
@Test
61+
fun testReadWithSeparatorToLosesByteOnShortEof() {
62+
val separator = "END_SEPARATOR".encodeToByteString()
63+
val content = "short"
64+
val source = Buffer().apply { write(content.encodeToByteArray()) }
65+
val output = Buffer()
66+
67+
source.readWithSeparatorTo(output, separator, errorOnEof = false)
68+
69+
assertEquals(content, output.readByteArray().decodeToString(), "Should not lose the first byte on short EOF")
70+
}
71+
72+
@Test
73+
fun `range operator on Binary is inclusive`() {
74+
val src = byteArrayOf(0, 1, 2, 3, 4).asBinary()
75+
76+
val slice = src[1..3]
77+
78+
assertEquals(3, slice.size, "Binary[1..3] must contain 3 bytes")
79+
80+
val bytes = slice.toByteArray()
81+
assertEquals(listOf<Byte>(1, 2, 3), bytes.toList(), "Slice content must include the right bound")
82+
}
5883
}

dataforge-meta/src/commonMain/kotlin/space/kscience/dataforge/meta/Value.kt

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,16 +83,16 @@ public interface Value {
8383
*/
8484
public fun parse(string: String): Value {
8585

86-
//Trying to get integer
8786
if (string.isEmpty() || string == Null.string) {
8887
return Null
8988
}
9089

9190
//string constants
92-
if (string.startsWith("\"") && string.endsWith("\"")) {
93-
return StringValue(string.substring(1, string.length - 2))
91+
if (string.length >= 2 && string.startsWith("\"") && string.endsWith("\"")) {
92+
return StringValue(string.substring(1, string.length - 1))
9493
}
9594

95+
//Trying to get integer
9696
string.toIntOrNull()?.let {
9797
return NumberValue(it)
9898
}
@@ -190,7 +190,11 @@ public class NumberValue(public val number: Number) : Value {
190190
}
191191
}
192192

193-
override fun hashCode(): Int = numberOrNull.hashCode()
193+
override fun hashCode(): Int {
194+
val d = number.toDouble()
195+
val canonical = if (d == 0.0) 0.0 else d
196+
return canonical.hashCode()
197+
}
194198
}
195199

196200
@JvmInline

dataforge-meta/src/commonMain/kotlin/space/kscience/dataforge/names/Name.kt

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -68,48 +68,58 @@ public class Name(public val tokens: List<NameToken>) {
6868
for (it in string) {
6969
when {
7070
escape -> {
71-
if (queryOn()) {
72-
queryBuilder.append(it)
73-
} else {
74-
bodyBuilder.append(it)
75-
}
71+
(if (queryOn()) queryBuilder else bodyBuilder).append(it)
7672
escape = false
7773
}
7874

7975
it == '\\' -> {
8076
escape = true
77+
if (queryOn()) queryBuilder.append(it)
8178
}
8279

8380
queryOn() -> {
81+
queryBuilder.append(it)
8482
when (it) {
8583
'[' -> bracketCount++
8684
']' -> bracketCount--
8785
}
88-
if (queryOn()) queryBuilder.append(it)
8986
}
9087

9188
else -> when (it) {
9289
'.' -> {
93-
val query = if (queryBuilder.isEmpty()) null else queryBuilder.toString()
94-
add(NameToken(bodyBuilder.toString(), query))
90+
val query = if (queryBuilder.isNotEmpty()) queryBuilder.toString().dropLast(1) else null
91+
add(NameToken(bodyBuilder.toString(), query?.unescape()))
9592
bodyBuilder = StringBuilder()
9693
queryBuilder = StringBuilder()
9794
}
98-
9995
'[' -> bracketCount++
10096
']' -> error("Syntax error: closing bracket ] not have not matching open bracket")
101-
else -> {
102-
if (queryBuilder.isNotEmpty()) error("Syntax error: only name end and name separator are allowed after index")
103-
bodyBuilder.append(it)
104-
}
97+
else -> bodyBuilder.append(it)
10598
}
10699
}
107100
}
108-
val query = if (queryBuilder.isEmpty()) null else queryBuilder.toString()
109-
add(NameToken(bodyBuilder.toString(), query))
101+
val query = if (queryBuilder.isNotEmpty()) queryBuilder.toString().dropLast(1) else null
102+
add(NameToken(bodyBuilder.toString(), query?.unescape()))
110103
}
111104
return Name(tokens)
112105
}
106+
107+
private fun String.unescape(): String {
108+
if ('\\' !in this) return this
109+
val builder = StringBuilder(length)
110+
var i = 0
111+
while (i < length) {
112+
val char = this[i]
113+
if (char == '\\' && i + 1 < length) {
114+
builder.append(this[i + 1])
115+
i += 2
116+
} else {
117+
builder.append(char)
118+
i++
119+
}
120+
}
121+
return builder.toString()
122+
}
113123
}
114124
}
115125

dataforge-meta/src/commonMain/kotlin/space/kscience/dataforge/names/NameToken.kt

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,25 @@ public class NameToken(public val body: String, public val index: String? = null
1616

1717
private val bodyEscaped by lazy {
1818
val escaped = buildString {
19-
body.forEach {
20-
if (it in escapedChars) {
19+
body.forEach { ch ->
20+
if (ch in escapedBodyChars) {
2121
append('\\')
2222
}
23-
append(it)
23+
append(ch)
2424
}
2525
}
2626
if (escaped == body) body else escaped
2727
}
2828

29+
private val indexEscaped by lazy {
30+
index?.replace("\\", "\\\\")
31+
?.replace("]", "\\]")
32+
?.replace("[", "\\[")
33+
}
34+
35+
2936
override fun toString(): String = if (hasIndex()) {
30-
"${bodyEscaped}[$index]"
37+
"${bodyEscaped}[${indexEscaped!!}]"
3138
} else {
3239
bodyEscaped
3340
}
@@ -61,33 +68,33 @@ public class NameToken(public val body: String, public val index: String? = null
6168

6269
public companion object {
6370

64-
private val escapedChars = listOf('\\', '.', '[', ']')
71+
private val escapedBodyChars = listOf('\\', '.', '[', ']')
6572

6673
/**
6774
* Parse name token from a string
6875
*/
6976
public fun parse(string: String): NameToken {
7077
var indexStart = -1
7178
var indexEnd = -1
79+
var escape = false
7280
string.forEachIndexed { index, c ->
81+
if(escape){
82+
escape = false
83+
return@forEachIndexed
84+
}
7385
when (c) {
74-
'[' -> when {
75-
indexStart >= 0 -> error("Second opening bracket not allowed in NameToken: $string")
76-
else -> indexStart = index
77-
}
78-
79-
']' -> when {
80-
indexStart < 0 -> error("Closing index bracket could not be used before opening bracket in NameToken: $string")
81-
indexEnd >= 0 -> error("Second closing bracket not allowed in NameToken: $string")
82-
else -> indexEnd = index
83-
}
84-
86+
'\\' -> escape = true
87+
'[' -> if(indexStart < 0) indexStart = index
88+
']' -> if(indexStart >= 0) indexEnd = index
8589
else -> if (indexEnd >= 0) error("Symbols not allowed after index in NameToken: $string")
8690
}
8791
}
92+
8893
if (indexStart >= 0 && indexEnd < 0) error("Opening bracket without closing bracket not allowed in NameToken: $string")
94+
if (indexStart > indexEnd && indexEnd != -1) error("Closing bracket before opening one in NameToken: $string")
95+
8996
return NameToken(
90-
if (indexStart >= 0) string.substring(0, indexStart) else string,
97+
if (indexStart >= 0) string.take(indexStart) else string,
9198
if (indexStart >= 0) string.substring(indexStart + 1, indexEnd) else null
9299
)
93100
}

dataforge-meta/src/commonTest/kotlin/space/kscience/dataforge/meta/MetaTest.kt

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
package space.kscience.dataforge.meta
22

33
import space.kscience.dataforge.misc.DFExperimental
4+
import space.kscience.dataforge.names.Name
5+
import space.kscience.dataforge.names.NameToken
6+
import space.kscience.dataforge.names.first
47
import kotlin.test.Test
58
import kotlin.test.assertEquals
9+
import kotlin.test.assertTrue
610

711
class MetaTest {
812
@Test
@@ -90,4 +94,72 @@ class MetaTest {
9094
println(oldMeta)
9195
assertEquals(setOf("a", "b", "d"), oldMeta.items.keys.map { it.toString() }.toSet())
9296
}
97+
98+
@Test
99+
fun testValueParseQuotedString() {
100+
val cases = listOf(
101+
"\"abc\"" to "abc",
102+
"\"\"" to "",
103+
"\"123\"" to "123",
104+
"\"true\"" to "true",
105+
"\" a b \"" to " a b "
106+
)
107+
108+
for ((input, expected) in cases) {
109+
val v = Value.parse(input)
110+
assertTrue(v is StringValue, "Expected StringValue for $input")
111+
assertEquals(expected, v.string, "Wrong literal parsing for $input")
112+
}
113+
}
114+
115+
@Test
116+
fun `single double-quote char is not treated as a quoted block`() {
117+
val v = Value.parse("\"")
118+
assertTrue(v is StringValue, "Expected StringValue for single quote")
119+
assertEquals("\"", v.string)
120+
}
121+
122+
private fun assertRoundTrip(token: NameToken, expectedString: String) {
123+
val asString = token.toString()
124+
assertEquals(expectedString, asString, "String representation mismatch for token with index '${token.index}'")
125+
val reparsed = Name.parse(asString).first()
126+
assertEquals(token, reparsed, "Round-trip parse(toString()) failed for token: $token")
127+
}
128+
129+
@Test
130+
fun `index with opening bracket is parsable`() {
131+
assertRoundTrip(NameToken("tok", "a[b"), "tok[a\\[b]")
132+
}
133+
134+
@Test
135+
fun `mixed index stays parsable and equal after round-trip`() {
136+
assertRoundTrip(NameToken("tok", "a\\b]c[d"), "tok[a\\\\b\\]c\\[d]")
137+
}
138+
139+
@Test
140+
fun `numeric equality implies equal hash codes`() {
141+
val intOne: Value = Value.of(1)
142+
val doubleOne: Value = Value.of(1.0)
143+
144+
assertEquals(intOne, doubleOne, "1 and 1.0 must be equal as Value")
145+
146+
assertEquals(
147+
intOne.hashCode(),
148+
doubleOne.hashCode(),
149+
"Equal numeric Values must produce equal hash codes"
150+
)
151+
152+
val set = hashSetOf(intOne)
153+
assertTrue(doubleOne in set, "HashSet containment must work for equal numeric Values")
154+
}
155+
156+
@Test
157+
fun `minus zero and plus zero must hash equally when equal`() {
158+
val a = Value.of(-0.0)
159+
val b = Value.of(0.0)
160+
161+
assertEquals(a, b, "(-0.0) and (+0.0) must be equal as Value")
162+
163+
assertEquals(a.hashCode(), b.hashCode(), "Equal numeric Values must produce equal hash codes")
164+
}
93165
}

dataforge-meta/src/commonTest/kotlin/space/kscience/dataforge/names/NameTest.kt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,13 @@ class NameTest {
7474
NameToken.parse("token[22]ddd")
7575
}
7676
}
77+
78+
@Test
79+
fun testIndexEscaping() {
80+
val tokenWithBracketInIndex = NameToken("a", "b]c")
81+
val asString = tokenWithBracketInIndex.toString()
82+
val parsedName = Name.parse(asString)
83+
assertEquals(1, parsedName.length, "Parsed name should have a single token")
84+
assertEquals(tokenWithBracketInIndex, parsedName.firstOrNull(), "Parsed token should be equal to the original")
85+
}
7786
}

0 commit comments

Comments
 (0)