Skip to content

Commit bf89d3c

Browse files
UnknownJoe796Sven Obser
andauthored
Support for encoding/decoding a sequence of values (#20)
* Support for streaming via Reader and Appendable * Handle Microsoft Excel's insistence on using a byte order marker * Cleaning up new unit tests for FetchSourceTest * Removed commented debugging println's for FetchSource * Sequence encoding and decoding * Streaming serialization bug fix * Cleanup formatting * Refactor asynchronous API to `CsvRecordReader` and `CsvRecordWriter` * Fix blocking initialization of readers --------- Co-authored-by: Sven Obser <[email protected]>
1 parent 5a358d8 commit bf89d3c

File tree

8 files changed

+304
-80
lines changed

8 files changed

+304
-80
lines changed

gradle/libs.versions.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
dokka = "1.9.20"
33
junit-jupiter = "5.11.0"
44
kotlin = "2.1.0"
5+
kotlinx-coroutines = "1.9.0"
56
kotlinx-serialization-core = "1.7.3"
67
nexus-publish = "0.4.0"
78
nexus-staging = "0.30.0"
89
researchgate-release = "3.0.2"
910

1011
[libraries]
1112
junit-jupiter = { module = "org.junit.jupiter:junit-jupiter", version.ref = "junit-jupiter" }
13+
kotlinx-coroutines-test = { module = "org.jetbrains.kotlinx:kotlinx-coroutines-test", version.ref = "kotlinx-coroutines" }
1214
kotlinx-serialization-core = { module = "org.jetbrains.kotlinx:kotlinx-serialization-core", version.ref = "kotlinx-serialization-core" }
1315

1416
[plugins]

library/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ dependencies {
1515

1616
testImplementation(kotlin("test-junit5"))
1717
testImplementation(libs.junit.jupiter)
18+
testImplementation(libs.kotlinx.coroutines.test)
1819
}
1920

2021
kotlin {
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package kotlinx.serialization.csv
2+
3+
import kotlinx.serialization.ExperimentalSerializationApi
4+
import kotlinx.serialization.KSerializer
5+
import kotlinx.serialization.builtins.ListSerializer
6+
import kotlinx.serialization.csv.decode.CsvReader
7+
import kotlinx.serialization.csv.decode.FetchSource
8+
import kotlinx.serialization.csv.decode.RecordListCsvDecoder
9+
import kotlinx.serialization.encoding.CompositeDecoder.Companion.DECODE_DONE
10+
import java.io.Reader
11+
12+
/**
13+
* Record reader that allows reading CSV line-by-line.
14+
*/
15+
interface CsvRecordReader<T : Any> : Iterator<T> {
16+
/**
17+
* Read next record
18+
*/
19+
fun read(): T? = if (hasNext()) next() else null
20+
}
21+
22+
/**
23+
* Parse CSV line-by-line from the given [input].
24+
*
25+
* @param deserializer The deserializer used to parse the given CSV string.
26+
* @param input The CSV reader to parse. This function *does not close the reader*.
27+
*/
28+
@ExperimentalSerializationApi
29+
fun <T : Any> Csv.recordReader(deserializer: KSerializer<T>, input: Reader): CsvRecordReader<T> {
30+
val decoder = RecordListCsvDecoder(
31+
csv = this,
32+
reader = CsvReader(FetchSource(input), config)
33+
)
34+
val listDescriptor = ListSerializer(deserializer).descriptor
35+
var previousValue: T? = null
36+
37+
return object : CsvRecordReader<T> {
38+
override fun hasNext(): Boolean =
39+
decoder.decodeElementIndex(listDescriptor) != DECODE_DONE
40+
41+
override fun next(): T {
42+
val index = decoder.decodeElementIndex(listDescriptor)
43+
return decoder.decodeSerializableElement(listDescriptor, index, deserializer, previousValue).also {
44+
previousValue = it
45+
}
46+
}
47+
}
48+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package kotlinx.serialization.csv
2+
3+
import kotlinx.serialization.ExperimentalSerializationApi
4+
import kotlinx.serialization.KSerializer
5+
import kotlinx.serialization.csv.encode.CsvWriter
6+
import kotlinx.serialization.csv.encode.RecordListCsvEncoder
7+
8+
/**
9+
* Record writer that allows writing CSV line by line.
10+
*/
11+
fun interface CsvRecordWriter<T : Any> {
12+
/**
13+
* Write next record.
14+
*/
15+
fun write(record: T)
16+
}
17+
18+
/**
19+
* Create [CsvRecordWriter] that allows writing CSV line-by-line.
20+
*
21+
* @param serializer The serializer used to serialize the given object.
22+
* @param output The output where the CSV will be written.
23+
*/
24+
@ExperimentalSerializationApi
25+
fun <T : Any> Csv.recordWriter(serializer: KSerializer<T>, output: Appendable): CsvRecordWriter<T> {
26+
val encoder = RecordListCsvEncoder(
27+
csv = this,
28+
writer = CsvWriter(output, config)
29+
)
30+
31+
return CsvRecordWriter {
32+
encoder.encodeSerializableValue(serializer, it)
33+
}
34+
}

library/src/main/kotlin/kotlinx/serialization/csv/decode/CsvReader.kt

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,17 @@ import kotlinx.serialization.csv.config.CsvConfig
55
/**
66
* Reader that parses CSV input.
77
*/
8-
internal class CsvReader(private val source: Source, private val config: CsvConfig) {
8+
internal class CsvReader(source: Source, private val config: CsvConfig) {
9+
10+
private val source: Source by lazy {
11+
source.also {
12+
// Skip Microsoft Excel's byte order marker, should it appear.
13+
// This has to happen lazily to avoid blocking read calls during the initialization of the CsvReader.
14+
if (source.peek() == '\uFEFF') {
15+
source.read()
16+
}
17+
}
18+
}
919

1020
val offset
1121
get() = source.offset
@@ -21,11 +31,6 @@ internal class CsvReader(private val source: Source, private val config: CsvConf
2131

2232
private var marks = arrayListOf<Int>()
2333

24-
init {
25-
// Skip Microsoft Excel's byte order marker, should it appear:
26-
read("\uFEFF")
27-
}
28-
2934
/**
3035
* Read value in the next column.
3136
*/

library/src/main/kotlin/kotlinx/serialization/csv/decode/FetchSource.kt

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package kotlinx.serialization.csv.decode
22

3+
import java.io.EOFException
34
import java.io.Reader
45

56
internal class FetchSource(
@@ -20,18 +21,27 @@ internal class FetchSource(
2021
override var offset: Int = 0
2122
private set
2223

23-
private var next: Char? = getChar()
24+
private var queue = ArrayList<Char>(2048)
25+
private var marks = ArrayList<Int>(2048)
26+
private var queueOffset = 0
27+
28+
private var next: Char? = null
29+
get() {
30+
if (field == null && nextPosition == 0) {
31+
// Reading first char has to happen lazily to avoid blocking read calls
32+
// during the initialization of the FetchSource.
33+
field = getChar()
34+
}
35+
return field
36+
}
37+
2438
private fun nextChar(): Char {
25-
val n = next ?: throw IllegalStateException("Out of characters")
39+
val nextChar = next ?: throw EOFException("No more characters to read.")
2640
next = getChar()
2741
nextPosition++
28-
return n
42+
return nextChar
2943
}
3044

31-
private var queue = ArrayList<Char>(2048)
32-
private var marks = ArrayList<Int>(2048)
33-
private var queueOffset = 0
34-
3545
override fun canRead(): Boolean = offset <= nextPosition
3646

3747
override fun read(): Char? {

library/src/main/kotlin/kotlinx/serialization/csv/encode/CsvWriter.kt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import kotlinx.serialization.csv.config.QuoteMode
99
* To write one CSV record, call [beginRecord], followed by multiple calls to [printColumn] and
1010
* finally call [endRecord] to finish the record.
1111
*/
12-
internal class CsvWriter(private val sb: Appendable, private val config: CsvConfig) {
12+
internal class CsvWriter(private val output: Appendable, private val config: CsvConfig) {
1313

1414
var isFirstRecord = true
1515
private var isFirstColumn = true
@@ -20,7 +20,7 @@ internal class CsvWriter(private val sb: Appendable, private val config: CsvConf
2020
*/
2121
fun beginRecord() {
2222
if (!isFirstRecord) {
23-
sb.append(config.recordSeparator)
23+
output.append(config.recordSeparator)
2424
}
2525
}
2626

@@ -64,19 +64,19 @@ internal class CsvWriter(private val sb: Appendable, private val config: CsvConf
6464
escapeCharacters = "$escapeChar$delimiter$quoteChar$recordSeparator",
6565
escapeChar = escapeChar
6666
)
67-
sb.append(escapedValue)
67+
output.append(escapedValue)
6868
} else if (mode == WriteMode.QUOTED || mode == WriteMode.ESCAPED) {
6969
val escapedValue = value.replace("$quoteChar", "$quoteChar$quoteChar")
70-
sb.append(quoteChar).append(escapedValue).append(quoteChar)
70+
output.append(quoteChar).append(escapedValue).append(quoteChar)
7171
} else {
72-
sb.append(value)
72+
output.append(value)
7373
}
7474
}
7575

7676
/** End the current column (which writes the column delimiter). */
7777
private fun nextColumn() {
7878
if (!isFirstColumn) {
79-
sb.append(config.delimiter)
79+
output.append(config.delimiter)
8080
}
8181
isFirstColumn = false
8282
}

0 commit comments

Comments
 (0)