Skip to content

Commit 4bff110

Browse files
author
Sergey Mashkov
committed
Introduce peekCharUtf8
1 parent 1b483b3 commit 4bff110

File tree

4 files changed

+206
-0
lines changed

4 files changed

+206
-0
lines changed

binary-compatibility-validator/reference-public-api/kotlinx-io-jvm.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ public final class kotlinx/io/core/InputKt {
304304
public static final fun discard (Lkotlinx/io/core/Input;)J
305305
public static final fun discardExact (Lkotlinx/io/core/Input;I)V
306306
public static final fun discardExact (Lkotlinx/io/core/Input;J)V
307+
public static final fun peekCharUtf8 (Lkotlinx/io/core/Input;)C
307308
public static final fun readAvailable (Lkotlinx/io/core/Input;Lkotlinx/io/core/IoBuffer;I)I
308309
public static final fun readAvailable (Lkotlinx/io/core/Input;[BII)I
309310
public static final fun readAvailable (Lkotlinx/io/core/Input;[DII)I

src/main/kotlin/kotlinx/io/core/Input.kt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,3 +221,30 @@ inline fun Input.takeWhileSize(initialSize: Int = 1, block: (IoBuffer) -> Int) {
221221
}
222222
}
223223

224+
@ExperimentalIoApi
225+
fun Input.peekCharUtf8(): Char {
226+
val rc = tryPeek()
227+
if (rc and 0x80 == 0) return rc.toChar()
228+
if (rc == -1) throw EOFException("Failed to peek a char: end of input")
229+
230+
return peekCharUtf8Impl(rc)
231+
}
232+
233+
private fun Input.peekCharUtf8Impl(first: Int): Char {
234+
var rc = '?'
235+
var found = false
236+
237+
takeWhileSize(byteCountUtf8(first)) {
238+
it.decodeUTF8 { ch ->
239+
found = true
240+
rc = ch
241+
false
242+
}
243+
}
244+
245+
if (!found) {
246+
throw MalformedUTF8InputException("No UTF-8 character found")
247+
}
248+
249+
return rc
250+
}

src/main/kotlin/kotlinx/io/core/internal/UTF8.kt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,25 @@ suspend fun decodeUTF8LineLoopSuspend(
8181
private fun prematureEndOfStreamUtf(size: Int): Nothing =
8282
throw EOFException("Premature end of stream: expected $size bytes to decode UTF-8 char")
8383

84+
@DangerousInternalIoApi
85+
internal fun byteCountUtf8(firstByte: Int): Int {
86+
var byteCount = 0
87+
var mask = 0x80
88+
var value = firstByte
89+
90+
for (i in 1..6) {
91+
if (value and mask != 0) {
92+
value = value and mask.inv()
93+
mask = mask shr 1
94+
byteCount++
95+
} else {
96+
break
97+
}
98+
}
99+
100+
return byteCount
101+
}
102+
84103
/**
85104
* Decodes all the bytes to utf8 applying every character on [consumer] until or consumer return `false`.
86105
* If a consumer returned false then a character will be pushed back (including all surrogates will be pushed back as well)
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
package kotlinx.io.tests
2+
3+
import kotlinx.io.core.*
4+
import kotlin.test.*
5+
6+
class PeekCharTest {
7+
@Test
8+
fun testPeekEOF() {
9+
assertFailsWith<EOFException> {
10+
ByteReadPacket.Empty.peekCharUtf8()
11+
}
12+
}
13+
14+
@Test
15+
fun testPeekAsciiSingle() {
16+
buildPacket {
17+
writeByte(0x37)
18+
}.use {
19+
assertEquals('7', it.peekCharUtf8())
20+
assertEquals(1, it.remaining)
21+
}
22+
}
23+
24+
@Test
25+
fun testPeekAsciiSeveral() {
26+
buildPacket {
27+
writeByte(0x37)
28+
writeByte(0x38)
29+
writeByte(0x39)
30+
}.use {
31+
assertEquals('7', it.peekCharUtf8())
32+
it.discardExact(1)
33+
assertEquals('8', it.peekCharUtf8())
34+
it.discardExact(1)
35+
assertEquals('9', it.peekCharUtf8())
36+
it.discardExact(1)
37+
assertEquals(0, it.remaining)
38+
}
39+
}
40+
41+
@Test
42+
fun testPeekUtf8() {
43+
buildPacket {
44+
append('\u0422')
45+
}.use {
46+
assertEquals('\u0422', it.peekCharUtf8())
47+
}
48+
}
49+
50+
@Test
51+
fun testPeekUtf8of3bytes() {
52+
val bopomofoChar = '\u310f'
53+
54+
buildPacket {
55+
append(bopomofoChar)
56+
}.use {
57+
assertEquals(3, it.remaining)
58+
assertEquals(bopomofoChar, it.peekCharUtf8())
59+
}
60+
}
61+
62+
@Test
63+
fun testPeekUtf8Edge() {
64+
val oSlash = '\u00f8'
65+
66+
val chunk1 = IoBuffer.Pool.borrow()
67+
val chunk2 = IoBuffer.Pool.borrow()
68+
chunk1.reserveEndGap(8)
69+
chunk1.next = chunk2
70+
71+
chunk1.writeByte(0xc3.toByte())
72+
chunk2.writeByte(0xb8.toByte())
73+
74+
ByteReadPacket(chunk1, IoBuffer.Pool).use {
75+
assertEquals(oSlash, it.peekCharUtf8())
76+
}
77+
}
78+
79+
@Test
80+
fun testPeekUtf8EdgeFor3BytesCharacter() {
81+
val bopomofoChar = '\u310f'
82+
83+
val chunk1 = IoBuffer.Pool.borrow()
84+
val chunk2 = IoBuffer.Pool.borrow()
85+
chunk1.reserveEndGap(8)
86+
chunk1.next = chunk2
87+
88+
chunk1.writeByte(0xe3.toByte())
89+
chunk2.writeByte(0x84.toByte())
90+
chunk2.writeByte(0x8f.toByte())
91+
92+
ByteReadPacket(chunk1, IoBuffer.Pool).use {
93+
assertEquals(bopomofoChar, it.peekCharUtf8())
94+
}
95+
}
96+
97+
@Test
98+
fun testPeekUtf8EdgeFor3BytesCharacter2() {
99+
val bopomofoChar = '\u310f'
100+
101+
val chunk1 = IoBuffer.Pool.borrow()
102+
val chunk2 = IoBuffer.Pool.borrow()
103+
chunk1.reserveEndGap(8)
104+
chunk1.next = chunk2
105+
106+
chunk1.writeByte(0xe3.toByte())
107+
chunk1.writeByte(0x84.toByte())
108+
chunk2.writeByte(0x8f.toByte())
109+
110+
ByteReadPacket(chunk1, IoBuffer.Pool).use {
111+
assertEquals(bopomofoChar, it.peekCharUtf8())
112+
}
113+
}
114+
115+
@Test
116+
fun testPeekUtf8EdgeReservedFor3BytesCharacter() {
117+
val bopomofoChar = '\u310f'
118+
119+
val chunk1 = IoBuffer.Pool.borrow()
120+
val chunk2 = IoBuffer.Pool.borrow()
121+
chunk1.reserveEndGap(8)
122+
chunk1.next = chunk2
123+
124+
chunk1.writeFully(ByteArray(4087))
125+
chunk1.writeByte(0xe3.toByte())
126+
chunk2.writeByte(0x84.toByte())
127+
chunk2.writeByte(0x8f.toByte())
128+
chunk2.writeByte(0x30)
129+
130+
ByteReadPacket(chunk1, IoBuffer.Pool).use {
131+
it.discardExact(4087)
132+
assertEquals(bopomofoChar, it.peekCharUtf8())
133+
it.discardExact(3)
134+
assertEquals('0', it.peekCharUtf8())
135+
}
136+
}
137+
138+
@Test
139+
fun testPeekUtf8EdgeFor3BytesCharacterFromAbstractInput() {
140+
val bopomofoChar = '\u310f'
141+
var count = 0
142+
143+
val myInput = object : AbstractInput() {
144+
override fun fill(): IoBuffer? = when (count++) {
145+
0 -> pool.borrow().apply { writeByte(0xe3.toByte()) }
146+
1 -> pool.borrow().apply { writeByte(0x84.toByte()) }
147+
2 -> pool.borrow().apply { writeByte(0x8f.toByte()) }
148+
else -> null
149+
}
150+
151+
override fun closeSource() {
152+
}
153+
}
154+
155+
myInput.use {
156+
assertEquals(bopomofoChar, it.peekCharUtf8())
157+
}
158+
}
159+
}

0 commit comments

Comments
 (0)