Skip to content

Commit c78e950

Browse files
authored
Protobuf packed encoding/decoding (#1830)
* Create an annotation to request packing of collections. * The specification only allows packing for primitive types (wire types 1, 2 or 5) to allow decoders decode either format independently of the proto specification. * Make pushback work in respect to currentType/currentId. This allows it to be used to effectively peek the type without assumptions on state. * Clarify in the documentation that reading will (per the standard) supports inputs in either format, independent of the annotation. The annotation only affects writing. * Support decoding "packed" arrays as toplevels. Add tests for handling of strings and "packed" toplevel arrays. The checking for eof works as bytesize is always >= array length.
1 parent ece9fdd commit c78e950

File tree

11 files changed

+296
-20
lines changed

11 files changed

+296
-20
lines changed

docs/formats.md

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ Field #3: 1D Fixed32 Value = 3, Hex = 03-00-00-00
379379

380380
### Lists as repeated fields
381381

382-
Kotlin lists and other collections are representend as repeated fields.
382+
By default, kotlin lists and other collections are representend as repeated fields.
383383
In the protocol buffers when the list is empty there are no elements in the
384384
stream with the corresponding number. For Kotlin Serialization you must explicitly specify a default of `emptyList()`
385385
for any property of a collection or map type. Otherwise you will not be able deserialize an empty
@@ -418,8 +418,6 @@ Data(a=[1, 2, 3], b=[])
418418
```
419419

420420
<!--- TEST -->
421-
422-
> Packed repeated fields are not supported.
423421
424422
In [ProtoBuf diagnostic mode](https://protogen.marcgravell.com/decode) the output is equivalent to the following:
425423
```
@@ -428,6 +426,13 @@ Field #1: 08 Varint Value = 2, Hex = 02
428426
Field #1: 08 Varint Value = 3, Hex = 03
429427
```
430428

429+
### Packed fields
430+
Collection types (not maps) can be **written** as packed fields when annotated with the `@ProtoPacked` annotation.
431+
Per the standard packed fields can only be used on primitive numeric types. The annotation is ignored on other types.
432+
433+
Per the [format description](https://developers.google.com/protocol-buffers/docs/encoding#packed) the parser ignores
434+
the annotation, but rather reads list in either packed or repeated format.
435+
431436
## Properties (experimental)
432437

433438
Kotlin Serialization can serialize a class into a flat map with `String` keys via

formats/protobuf/api/kotlinx-serialization-protobuf.api

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,13 @@ public final class kotlinx/serialization/protobuf/ProtoNumber$Impl : kotlinx/ser
3838
public final synthetic fun number ()I
3939
}
4040

41+
public abstract interface annotation class kotlinx/serialization/protobuf/ProtoPacked : java/lang/annotation/Annotation {
42+
}
43+
44+
public final class kotlinx/serialization/protobuf/ProtoPacked$Impl : kotlinx/serialization/protobuf/ProtoPacked {
45+
public fun <init> ()V
46+
}
47+
4148
public abstract interface annotation class kotlinx/serialization/protobuf/ProtoType : java/lang/annotation/Annotation {
4249
public abstract fun type ()Lkotlinx/serialization/protobuf/ProtoIntegerType;
4350
}

formats/protobuf/commonMain/src/kotlinx/serialization/protobuf/ProtoTypes.kt

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ public annotation class ProtoNumber(public val number: Int)
3232
@Suppress("NO_EXPLICIT_VISIBILITY_IN_API_MODE_WARNING")
3333
@ExperimentalSerializationApi
3434
public enum class ProtoIntegerType(internal val signature: Long) {
35-
DEFAULT(0L shl 32),
36-
SIGNED(1L shl 32),
37-
FIXED(2L shl 32);
35+
DEFAULT(0L shl 33),
36+
SIGNED(1L shl 33),
37+
FIXED(2L shl 33);
3838
}
3939

4040
/**
@@ -45,3 +45,12 @@ public enum class ProtoIntegerType(internal val signature: Long) {
4545
@Target(AnnotationTarget.PROPERTY)
4646
@ExperimentalSerializationApi
4747
public annotation class ProtoType(public val type: ProtoIntegerType)
48+
49+
50+
/**
51+
* Instructs that a particular collection should be written as [packed array](https://developers.google.com/protocol-buffers/docs/encoding#packed)
52+
*/
53+
@SerialInfo
54+
@Target(AnnotationTarget.PROPERTY)
55+
@ExperimentalSerializationApi
56+
public annotation class ProtoPacked

formats/protobuf/commonMain/src/kotlinx/serialization/protobuf/internal/Helpers.kt

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,15 @@ internal const val i64 = 1
1616
internal const val SIZE_DELIMITED = 2
1717
internal const val i32 = 5
1818

19-
private const val MASK = Int.MAX_VALUE.toLong() shl 32
19+
private const val INTTYPEMASK = (Int.MAX_VALUE.toLong() shr 1) shl 33
20+
private const val PACKEDMASK = 1L shl 32
21+
22+
@Suppress("NOTHING_TO_INLINE")
23+
internal inline fun ProtoDesc(protoId: Int, type: ProtoIntegerType, packed: Boolean): ProtoDesc {
24+
val packedBits = if (packed) 1L shl 32 else 0L
25+
val signature = type.signature or packedBits
26+
return signature or protoId.toLong()
27+
}
2028

2129
@Suppress("NOTHING_TO_INLINE")
2230
internal inline fun ProtoDesc(protoId: Int, type: ProtoIntegerType): ProtoDesc {
@@ -26,25 +34,40 @@ internal inline fun ProtoDesc(protoId: Int, type: ProtoIntegerType): ProtoDesc {
2634
internal inline val ProtoDesc.protoId: Int get() = (this and Int.MAX_VALUE.toLong()).toInt()
2735

2836
internal val ProtoDesc.integerType: ProtoIntegerType
29-
get() = when(this and MASK) {
37+
get() = when(this and INTTYPEMASK) {
3038
ProtoIntegerType.DEFAULT.signature -> ProtoIntegerType.DEFAULT
3139
ProtoIntegerType.SIGNED.signature -> ProtoIntegerType.SIGNED
3240
else -> ProtoIntegerType.FIXED
3341
}
3442

43+
internal val SerialDescriptor.isPackable: Boolean
44+
@OptIn(kotlinx.serialization.ExperimentalSerializationApi::class)
45+
get() = when (kind) {
46+
PrimitiveKind.STRING,
47+
!is PrimitiveKind -> false
48+
else -> true
49+
}
50+
51+
internal val ProtoDesc.isPacked: Boolean
52+
get() = (this and PACKEDMASK) != 0L
53+
3554
internal fun SerialDescriptor.extractParameters(index: Int): ProtoDesc {
3655
val annotations = getElementAnnotations(index)
3756
var protoId: Int = index + 1
3857
var format: ProtoIntegerType = ProtoIntegerType.DEFAULT
58+
var protoPacked = false
59+
3960
for (i in annotations.indices) { // Allocation-friendly loop
4061
val annotation = annotations[i]
4162
if (annotation is ProtoNumber) {
4263
protoId = annotation.number
4364
} else if (annotation is ProtoType) {
4465
format = annotation.type
66+
} else if (annotation is ProtoPacked) {
67+
protoPacked = true
4568
}
4669
}
47-
return ProtoDesc(protoId, format)
70+
return ProtoDesc(protoId, format, protoPacked)
4871
}
4972

5073
internal fun extractProtoId(descriptor: SerialDescriptor, index: Int, zeroBasedDefault: Boolean): Int {
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package kotlinx.serialization.protobuf.internal
2+
3+
import kotlinx.serialization.*
4+
import kotlinx.serialization.descriptors.*
5+
import kotlinx.serialization.encoding.*
6+
import kotlinx.serialization.protobuf.*
7+
8+
@OptIn(ExperimentalSerializationApi::class)
9+
internal class PackedArrayDecoder(
10+
proto: ProtoBuf,
11+
reader: ProtobufReader,
12+
descriptor: SerialDescriptor,
13+
) : ProtobufDecoder(proto, reader, descriptor) {
14+
private var nextIndex: Int = 0
15+
16+
// Tags are omitted in the packed array format
17+
override fun SerialDescriptor.getTag(index: Int): ProtoDesc = MISSING_TAG
18+
19+
override fun beginStructure(descriptor: SerialDescriptor): CompositeDecoder {
20+
throw SerializationException("Packing only supports primitive number types. The input type however was a struct: $descriptor")
21+
}
22+
23+
override fun decodeElementIndex(descriptor: SerialDescriptor): Int {
24+
// We need eof here as there is no tag to read in packed form.
25+
if (reader.eof) return CompositeDecoder.DECODE_DONE
26+
return nextIndex++
27+
}
28+
29+
override fun decodeTaggedString(tag: ProtoDesc): String {
30+
throw SerializationException("Packing only supports primitive number types. The actual reading is for string.")
31+
}
32+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package kotlinx.serialization.protobuf.internal
2+
3+
import kotlinx.serialization.*
4+
import kotlinx.serialization.descriptors.*
5+
import kotlinx.serialization.encoding.*
6+
import kotlinx.serialization.protobuf.*
7+
8+
@OptIn(ExperimentalSerializationApi::class)
9+
internal class PackedArrayEncoder(
10+
proto: ProtoBuf,
11+
writer: ProtobufWriter,
12+
curTag: ProtoDesc,
13+
descriptor: SerialDescriptor,
14+
stream: ByteArrayOutput = ByteArrayOutput()
15+
) : NestedRepeatedEncoder(proto, writer, curTag, descriptor, stream) {
16+
17+
// Triggers not writing header
18+
override fun SerialDescriptor.getTag(index: Int): ProtoDesc = MISSING_TAG
19+
20+
override fun beginCollection(descriptor: SerialDescriptor, collectionSize: Int): CompositeEncoder {
21+
throw SerializationException("Packing only supports primitive number types")
22+
}
23+
24+
override fun beginStructure(descriptor: SerialDescriptor): CompositeEncoder {
25+
throw SerializationException("Packing only supports primitive number types")
26+
}
27+
28+
override fun encodeTaggedString(tag: ProtoDesc, value: String) {
29+
throw SerializationException("Packing only supports primitive number types")
30+
}
31+
}

formats/protobuf/commonMain/src/kotlinx/serialization/protobuf/internal/ProtobufDecoding.kt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@ internal open class ProtobufDecoder(
108108
reader.readTag()
109109
// all elements always have id = 1
110110
RepeatedDecoder(proto, reader, ProtoDesc(1, ProtoIntegerType.DEFAULT), descriptor)
111+
112+
} else if (reader.currentType == SIZE_DELIMITED && descriptor.getElementDescriptor(0).isPackable) {
113+
val sliceReader = ProtobufReader(reader.objectInput())
114+
PackedArrayDecoder(proto, sliceReader, descriptor)
115+
111116
} else {
112117
RepeatedDecoder(proto, reader, tag, descriptor)
113118
}
@@ -287,7 +292,8 @@ private class RepeatedDecoder(
287292
private fun decodeListIndexNoTag(): Int {
288293
val size = -tagOrSize
289294
val idx = ++index
290-
if (idx.toLong() == size) return CompositeDecoder.DECODE_DONE
295+
// Check for eof is here for the case that it is an out-of-spec packed array where size is bytesize not list length.
296+
if (idx.toLong() == size || reader.eof) return CompositeDecoder.DECODE_DONE
291297
return idx
292298
}
293299

formats/protobuf/commonMain/src/kotlinx/serialization/protobuf/internal/ProtobufEncoding.kt

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,17 @@ internal open class ProtobufEncoder(
3030
): CompositeEncoder = when (descriptor.kind) {
3131
StructureKind.LIST -> {
3232
val tag = currentTagOrDefault
33-
if (tag == MISSING_TAG) {
34-
writer.writeInt(collectionSize)
35-
}
36-
if (this.descriptor.kind == StructureKind.LIST && tag != MISSING_TAG && this.descriptor != descriptor) {
37-
NestedRepeatedEncoder(proto, writer, tag, descriptor)
33+
if (tag.isPacked && descriptor.getElementDescriptor(0).isPackable) {
34+
PackedArrayEncoder(proto, writer, currentTagOrDefault, descriptor)
3835
} else {
39-
RepeatedEncoder(proto, writer, tag, descriptor)
36+
if (tag == MISSING_TAG) {
37+
writer.writeInt(collectionSize)
38+
}
39+
if (this.descriptor.kind == StructureKind.LIST && tag != MISSING_TAG && this.descriptor != descriptor) {
40+
NestedRepeatedEncoder(proto, writer, tag, descriptor)
41+
} else {
42+
RepeatedEncoder(proto, writer, tag, descriptor)
43+
}
4044
}
4145
}
4246
StructureKind.MAP -> {
@@ -47,7 +51,13 @@ internal open class ProtobufEncoder(
4751
}
4852

4953
override fun beginStructure(descriptor: SerialDescriptor): CompositeEncoder = when (descriptor.kind) {
50-
StructureKind.LIST -> RepeatedEncoder(proto, writer, currentTagOrDefault, descriptor)
54+
StructureKind.LIST -> {
55+
if (descriptor.getElementDescriptor(0).isPackable && currentTagOrDefault.isPacked) {
56+
PackedArrayEncoder(proto, writer, currentTagOrDefault, descriptor)
57+
} else {
58+
RepeatedEncoder(proto, writer, currentTagOrDefault, descriptor)
59+
}
60+
}
5161
StructureKind.CLASS, StructureKind.OBJECT, is PolymorphicKind -> {
5262
val tag = currentTagOrDefault
5363
if (tag == MISSING_TAG && descriptor == this.descriptor) this
@@ -183,7 +193,7 @@ private class RepeatedEncoder(
183193
override fun SerialDescriptor.getTag(index: Int) = curTag
184194
}
185195

186-
private class NestedRepeatedEncoder(
196+
internal open class NestedRepeatedEncoder(
187197
proto: ProtoBuf,
188198
@JvmField val writer: ProtobufWriter,
189199
@JvmField val curTag: ProtoDesc,

formats/protobuf/commonMain/src/kotlinx/serialization/protobuf/internal/ProtobufReader.kt

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,27 @@ internal class ProtobufReader(private val input: ByteArrayInput) {
1515
@JvmField
1616
public var currentType = -1
1717
private var pushBack = false
18+
private var pushBackHeader = 0
19+
20+
public val eof
21+
get() = !pushBack && input.availableBytes == 0
1822

1923
public fun readTag(): Int {
2024
if (pushBack) {
2125
pushBack = false
22-
return currentId
26+
val previousHeader = (currentId shl 3) or currentType
27+
return updateIdAndType(pushBackHeader).also {
28+
pushBackHeader = previousHeader
29+
}
2330
}
31+
// Header to use when pushed back is the old id/type
32+
pushBackHeader = (currentId shl 3) or currentType
2433

2534
val header = input.readVarint64(true).toInt()
35+
return updateIdAndType(header)
36+
}
37+
38+
private fun updateIdAndType(header: Int): Int {
2639
return if (header == -1) {
2740
currentId = -1
2841
currentType = -1
@@ -36,6 +49,10 @@ internal class ProtobufReader(private val input: ByteArrayInput) {
3649

3750
public fun pushBackTag() {
3851
pushBack = true
52+
53+
val nextHeader = (currentId shl 3) or currentType
54+
updateIdAndType(pushBackHeader)
55+
pushBackHeader = nextHeader
3956
}
4057

4158
fun skipElement() {

formats/protobuf/commonMain/src/kotlinx/serialization/protobuf/internal/Streams.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import kotlinx.serialization.*
88

99
internal class ByteArrayInput(private var array: ByteArray, private val endIndex: Int = array.size) {
1010
private var position: Int = 0
11-
private val availableBytes: Int get() = endIndex - position
11+
val availableBytes: Int get() = endIndex - position
1212

1313
fun slice(size: Int): ByteArrayInput {
1414
ensureEnoughBytes(size)

0 commit comments

Comments
 (0)