Skip to content

Commit 5505bf0

Browse files
authored
Merge pull request #10 from brudaswen/feature/issue-9
Support `ignoreUnknownColumns`
2 parents b54f67f + 7304013 commit 5505bf0

File tree

11 files changed

+230
-11
lines changed

11 files changed

+230
-11
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

77
## [Unreleased]
8+
### Added
9+
- Support `ignoreUnknownColumns`.
810

911
## [1.0.2] - 2020-10-11
1012
### Changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ CSV serialization and parsing options can be changed by providing a custom `CsvC
9494
| `ignoreEmptyLines` | `true` | Ignore empty lines during parsing. |
9595
| `hasHeaderRecord` | `false` | First line is header record. |
9696
| `headerSeparator` | `.` | Character that is used to separate hierarchical header names. |
97+
| `ignoreUnknownColumns` | `false` | Ignore unknown columns (only has effect when `hasHeaderRecord` is enabled). |
9798
| `hasTrailingDelimiter` | `false` | If records end with a trailing `delimiter`. |
9899

99100
## Requirements

library/src/main/kotlin/kotlinx/serialization/csv/CsvConfiguration.kt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ package kotlinx.serialization.csv
1212
* @param ignoreEmptyLines Ignore empty lines during parsing (default: `true`).
1313
* @param hasHeaderRecord First line is header record (default: `false`).
1414
* @param headerSeparator Character that is used to separate hierarchical header names (default: `.`).
15+
* @param ignoreUnknownColumns Ignore unknown columns when `hasHeaderRecord` is enabled (default: `false`).
1516
* @param hasTrailingDelimiter If records end with a trailing [delimiter] (default: `false`).
1617
*/
1718
data class CsvConfiguration(
@@ -24,7 +25,8 @@ data class CsvConfiguration(
2425
val ignoreEmptyLines: Boolean = true,
2526
val hasHeaderRecord: Boolean = false,
2627
val headerSeparator: Char = '.',
27-
val hasTrailingDelimiter: Boolean = false
28+
val ignoreUnknownColumns: Boolean = false,
29+
val hasTrailingDelimiter: Boolean = false,
2830
) {
2931

3032
init {
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
@file:Suppress("FunctionName")
2+
3+
package kotlinx.serialization.csv
4+
5+
import kotlinx.serialization.ExperimentalSerializationApi
6+
import kotlinx.serialization.SerializationException
7+
import kotlinx.serialization.descriptors.SerialDescriptor
8+
9+
/**
10+
* Generic exception indicating a problem with CSV serialization and deserialization.
11+
*/
12+
internal open class CsvException(message: String) : SerializationException(message)
13+
14+
/**
15+
* Thrown when [Csv] has failed to create a CSV string from the given value.
16+
*/
17+
internal class CsvEncodingException(message: String) : CsvException(message)
18+
19+
@OptIn(ExperimentalSerializationApi::class)
20+
internal fun UnsupportedSerialDescriptorException(descriptor: SerialDescriptor) = CsvEncodingException (
21+
"CSV does not support '${descriptor.kind}'."
22+
)
23+
24+
@OptIn(ExperimentalSerializationApi::class)
25+
internal fun HeadersNotSupportedForSerialDescriptorException(descriptor: SerialDescriptor) = CsvEncodingException (
26+
"CSV headers are not supported for variable sized type '${descriptor.kind}'."
27+
)
28+
29+
/**
30+
* Thrown when [Csv] has failed to parse the given CSV string or deserialize it to a target class.
31+
*/
32+
internal class CsvDecodingException(message: String) : CsvException(message)
33+
34+
internal fun CsvDecodingException(offset: Int?, message: String) =
35+
CsvDecodingException(if (offset != null) "Unexpected CSV token at offset $offset: $message" else message)
36+
37+
internal fun UnknownColumnHeaderException(offset: Int, header: String) = CsvDecodingException(
38+
offset,
39+
"""
40+
|Encountered unknown column header '$header'.
41+
|Use 'ignoreUnknownColumns = true' in 'Csv {}' builder to ignore unknown columns.
42+
|""".trimMargin()
43+
)

library/src/main/kotlin/kotlinx/serialization/csv/decode/ClassCsvDecoder.kt

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,23 @@ internal class ClassCsvDecoder(
2020
) : CsvDecoder(csv, reader, parent) {
2121

2222
private var elementIndex = 0
23+
private var columnIndex = 0
2324

2425
override fun decodeElementIndex(descriptor: SerialDescriptor): Int = when {
25-
reader.isDone || elementIndex >= descriptor.elementsCount -> CompositeDecoder.DECODE_DONE
26-
classHeaders != null -> classHeaders[elementIndex]
26+
reader.isDone -> CompositeDecoder.DECODE_DONE
27+
elementIndex >= descriptor.elementsCount -> CompositeDecoder.DECODE_DONE
28+
classHeaders != null && columnIndex >= classHeaders.size -> CompositeDecoder.DECODE_DONE
29+
30+
classHeaders != null ->
31+
when (val result = classHeaders[columnIndex]) {
32+
CompositeDecoder.UNKNOWN_NAME -> {
33+
ignoreColumn()
34+
decodeElementIndex(descriptor)
35+
}
36+
null -> CompositeDecoder.UNKNOWN_NAME
37+
else -> result
38+
}
39+
2740
else -> elementIndex
2841
}
2942

@@ -42,14 +55,31 @@ internal class ClassCsvDecoder(
4255
}
4356
}
4457

58+
override fun endStructure(descriptor: SerialDescriptor) {
59+
super.endStructure(descriptor)
60+
61+
if (classHeaders != null && csv.configuration.ignoreUnknownColumns) {
62+
while (columnIndex < classHeaders.size) {
63+
ignoreColumn()
64+
}
65+
}
66+
}
67+
4568
override fun endChildStructure(descriptor: SerialDescriptor) {
4669
super.endChildStructure(descriptor)
4770
elementIndex++
71+
columnIndex++
4872
}
4973

5074
override fun decodeColumn(): String {
5175
val value = super.decodeColumn()
5276
elementIndex++
77+
columnIndex++
5378
return value
5479
}
80+
81+
private fun ignoreColumn() {
82+
reader.readColumn()
83+
columnIndex++
84+
}
5585
}

library/src/main/kotlin/kotlinx/serialization/csv/decode/CsvDecoder.kt

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package kotlinx.serialization.csv.decode
33
import kotlinx.serialization.ExperimentalSerializationApi
44
import kotlinx.serialization.csv.Csv
55
import kotlinx.serialization.csv.CsvConfiguration
6+
import kotlinx.serialization.csv.UnknownColumnHeaderException
7+
import kotlinx.serialization.csv.UnsupportedSerialDescriptorException
68
import kotlinx.serialization.descriptors.PolymorphicKind
79
import kotlinx.serialization.descriptors.SerialDescriptor
810
import kotlinx.serialization.descriptors.StructureKind
@@ -24,10 +26,10 @@ internal abstract class CsvDecoder(
2426
override val serializersModule: SerializersModule
2527
get() = csv.serializersModule
2628

27-
protected val configuration: CsvConfiguration
29+
private val configuration: CsvConfiguration
2830
get() = csv.configuration
2931

30-
protected var headers: Headers? = null
32+
private var headers: Headers? = null
3133

3234
override fun beginStructure(descriptor: SerialDescriptor): CompositeDecoder {
3335
return when (descriptor.kind) {
@@ -47,8 +49,7 @@ internal abstract class CsvDecoder(
4749
PolymorphicKind.OPEN ->
4850
ClassCsvDecoder(csv, reader, this, headers)
4951

50-
else ->
51-
error("CSV does not support '${descriptor.kind}'.")
52+
else -> throw UnsupportedSerialDescriptorException(descriptor)
5253
}
5354
}
5455

@@ -124,9 +125,11 @@ internal abstract class CsvDecoder(
124125
private fun readHeaders(desc: SerialDescriptor, prefix: String): Headers {
125126
val headers = Headers()
126127
var position = 0
127-
while (reader.isFirstRecord) {
128-
// Read header value and check if it (still) starts with required prefix
128+
while (!reader.isDone && reader.isFirstRecord) {
129+
val offset = reader.offset
129130
reader.mark()
131+
132+
// Read header value and check if it (still) starts with required prefix
130133
val value = reader.readColumn()
131134
if (!value.startsWith(prefix)) {
132135
reader.reset()
@@ -151,8 +154,13 @@ internal abstract class CsvDecoder(
151154
} else {
152155
reader.unmark()
153156
}
154-
} else {
157+
} else if (csv.configuration.ignoreUnknownColumns) {
158+
headers[position] = CompositeDecoder.UNKNOWN_NAME
155159
reader.unmark()
160+
} else if (value == "" && !reader.isFirstRecord && configuration.hasTrailingDelimiter) {
161+
reader.unmark()
162+
} else {
163+
throw UnknownColumnHeaderException(offset, value)
156164
}
157165
}
158166
position++
@@ -176,8 +184,11 @@ internal abstract class CsvDecoder(
176184
private val map = mutableMapOf<Int, Int>()
177185
private val subHeaders = mutableMapOf<Int, Headers>()
178186

187+
val size
188+
get() = map.size
189+
179190
operator fun get(position: Int) =
180-
map.getOrElse(position) { CompositeDecoder.UNKNOWN_NAME }
191+
map[position]
181192

182193
operator fun set(key: Int, value: Int) {
183194
map[key] = value

library/src/main/kotlin/kotlinx/serialization/csv/decode/CsvReader.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ import kotlinx.serialization.csv.CsvConfiguration
77
*/
88
internal class CsvReader(private val source: Source, private val configuration: CsvConfiguration) {
99

10+
val offset
11+
get() = source.offset
12+
1013
var recordNo = 0
1114
private set
1215

library/src/main/kotlin/kotlinx/serialization/csv/decode/Source.kt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ package kotlinx.serialization.csv.decode
55
*/
66
interface Source {
77

8+
/**
9+
* Current read offset in the source.
10+
*/
11+
val offset: Int
12+
813
/**
914
* Check if there are more characters to read.
1015
* @return True if EOF has not been read, yet; false if EOF has already been read.

library/src/main/kotlin/kotlinx/serialization/csv/decode/StringSource.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ private const val EOF = -1
77
*/
88
internal class StringSource(private val content: String) : Source {
99

10+
override val offset: Int
11+
get() = position
12+
1013
private var position = 0
1114

1215
private var marks = arrayListOf<Int>()
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package kotlinx.serialization.csv.config
2+
3+
import kotlinx.serialization.ExperimentalSerializationApi
4+
import kotlinx.serialization.builtins.ListSerializer
5+
import kotlinx.serialization.csv.Csv
6+
import kotlinx.serialization.csv.CsvConfiguration
7+
import kotlinx.serialization.csv.records.Data
8+
import kotlinx.serialization.csv.records.IntStringRecord
9+
import kotlinx.serialization.csv.records.Location
10+
import kotlinx.serialization.csv.records.NestedRecord
11+
import kotlinx.serialization.test.assertParse
12+
import kotlinx.serialization.test.assertParseFails
13+
import kotlin.test.Test
14+
15+
@OptIn(ExperimentalSerializationApi::class)
16+
internal class CsvIgnoreUnknownKeysTest {
17+
18+
@Test
19+
fun testMultipleColumns() = assertParse(
20+
"a,b,IGNORED\r\n1,testing,ignored",
21+
IntStringRecord(1, "testing"),
22+
IntStringRecord.serializer(),
23+
Csv(
24+
CsvConfiguration(
25+
hasHeaderRecord = true,
26+
ignoreUnknownColumns = true
27+
)
28+
)
29+
)
30+
31+
@Test
32+
fun testMultipleColumns_failure() = assertParseFails(
33+
"a,b,IGNORED\r\n1,testing,ignored",
34+
IntStringRecord.serializer(),
35+
Csv(
36+
CsvConfiguration(
37+
hasHeaderRecord = true
38+
)
39+
)
40+
)
41+
42+
@Test
43+
fun testMultipleColumnsReordered() = assertParse(
44+
"IGNORED,b,a\r\nignored,testing,1",
45+
IntStringRecord(1, "testing"),
46+
IntStringRecord.serializer(),
47+
Csv(
48+
CsvConfiguration(
49+
hasHeaderRecord = true,
50+
ignoreUnknownColumns = true
51+
)
52+
)
53+
)
54+
55+
@Test
56+
fun testMultipleColumnsReordered_failure() = assertParseFails(
57+
"IGNORED,b,a\r\nignored,testing,1",
58+
IntStringRecord.serializer(),
59+
Csv(
60+
CsvConfiguration(
61+
hasHeaderRecord = true
62+
)
63+
)
64+
)
65+
66+
@Test
67+
fun testNestedRecordListWithHeaderReordered() = assertParse(
68+
"""IGNORED,time,name,data.location.lon,data.location.IGNORED,data.location.lat,data.speed,data.info,IGNORED
69+
|IGNORED,0,Alice,1.0,IGNORED,0.0,100,info,IGNORED
70+
|IGNORED,1,Bob,20.0,IGNORED,10.0,50,info2,IGNORED
71+
|""".trimMargin().replace("\n", "\r\n"),
72+
listOf(
73+
NestedRecord(
74+
time = 0,
75+
name = "Alice",
76+
data = Data(
77+
location = Location(
78+
lat = 0.0,
79+
lon = 1.0
80+
),
81+
speed = 100,
82+
info = "info"
83+
)
84+
),
85+
NestedRecord(
86+
time = 1,
87+
name = "Bob",
88+
data = Data(
89+
location = Location(
90+
lat = 10.0,
91+
lon = 20.0
92+
),
93+
speed = 50,
94+
info = "info2"
95+
)
96+
)
97+
),
98+
ListSerializer(NestedRecord.serializer()),
99+
Csv(
100+
CsvConfiguration(
101+
hasHeaderRecord = true,
102+
ignoreUnknownColumns = true
103+
)
104+
)
105+
)
106+
}

0 commit comments

Comments
 (0)