6
6
#![ cfg_attr( not( test) , deny( clippy:: expect_used) ) ]
7
7
#![ cfg_attr( not( test) , deny( clippy:: unimplemented) ) ]
8
8
9
- //! This crate implements Protobuf serializers for [`profiles`], including
10
- //! serializers for:
9
+ //! This crate implements Protobuf encoders for [`profiles`] which write to a
10
+ //! [`Write`]. It has encoders for:
11
11
//!
12
12
//! - [Function]
13
13
//! - [Label]
16
16
//! - [Sample]
17
17
//! - [ValueType]
18
18
//!
19
- //! Serialization often happens one byte at a time, so a buffered writer
20
- //! should probably be used.
19
+ //! There is no encoder for a Profile message. It would require borrowing a
20
+ //! lot of data, which becomes unwieldy. It also isn't very compatible with
21
+ //! writing a streaming serializer to lower peak memory usage.
21
22
//!
22
- //! There is no serializer for Profile. It would require borrowing a lot of
23
- //! data, which becomes unwieldy. It also isn't very compatible with writing
24
- //! a streaming serializer to lower peak memory usage.
23
+ //! Encoding often happens one byte at a time, so a buffered writer should
24
+ //! probably be used.
25
25
//!
26
26
//! Indices into the string table are represented by [StringOffset], which uses
27
27
//! a 32-bit number. ID fields are still 64-bit, so the user can control their
28
28
//! values, potentially using a 64-bit address for its value.
29
29
//!
30
30
//! The types are generally `#[repr(C)]` so they can be used in FFI one day.
31
31
//!
32
+ //! Here is a condensed reference for the parts of protobuf used by profiles:
33
+ //!
34
+ //! ```reference
35
+ //! message := (tag value)*
36
+ //! tag := (field << 3) bit-or wire_type;
37
+ //! encoded as uint32 varint
38
+ //! value := varint for wire_type == VARINT,
39
+ //! len-prefix for wire_type == LEN,
40
+ //! varint := int64 | uint64
41
+ //! len-prefix := size (message | string | packed);
42
+ //! size encoded as int32 varint
43
+ //! string := valid UTF-8 string;
44
+ //! max 2GB of bytes
45
+ //! packed := varint*
46
+ //! consecutive values of the type specified in `.proto`
47
+ //! ```
48
+ //!
49
+ //! A [`Record`] represents a [`Tag`] and [`Value`] pair, where the
50
+ //! [`WireType`] comes from [`Value::WIRE_TYPE`].
51
+ //!
52
+ //! Protos must be smaller than 2 GiB when encoded. Many proto implementations
53
+ //! will refuse to encode or decode messages that exceed this limit.
54
+ //!
32
55
//! [`profiles`]: https://github.com/google/pprof/blob/main/proto/profile.proto
33
56
34
57
mod function;
@@ -48,61 +71,78 @@ pub use label::*;
48
71
pub use location:: * ;
49
72
pub use mapping:: * ;
50
73
pub use sample:: * ;
51
- use std:: fmt:: { Debug , Formatter } ;
52
74
pub use string:: * ;
53
75
pub use value_type:: * ;
54
76
77
+ use std:: fmt:: { Debug , Formatter } ;
55
78
use std:: io:: { self , Write } ;
56
79
57
- /// Create a field of a given type, field number, and whether to perform the
58
- /// zero-size optimization or not.
80
+ /// A record is responsible for encoding the field number, wire type and
81
+ /// payload. The wire type tells the parser how big the payload after it is.
82
+ /// For more details, refer to the [Condensed Reference Card].
83
+ ///
84
+ /// The `P` is the payload, the `F` is the field number, and `O` is whether to
85
+ /// apply the zero-sized optimization or not. Most of the time, it shouldn't
86
+ /// matter if the optimization is applied. However, if something is part of
87
+ /// a repeated field, then applying the optimization would change the number
88
+ /// of elements in the array.
89
+ ///
90
+ /// [Condensed Reference Card]: https://protobuf.dev/programming-guides/encoding/#cheat-sheet
59
91
#[ derive( Copy , Clone , Default , Eq , PartialEq ) ]
60
92
#[ repr( transparent) ]
61
93
#[ cfg_attr( test, derive( bolero:: generator:: TypeGenerator ) ) ]
62
- pub struct Field < T : Value , const N : u32 , const O : bool > {
63
- pub value : T ,
94
+ pub struct Record < P : Value , const F : u32 , const O : bool > {
95
+ pub value : P ,
64
96
}
65
97
66
98
/// Represents the wire type for the in-wire protobuf encoding. There are more
67
- /// types than are represented here; these are just the supported ones.
99
+ /// types than are represented here; these are just the ones used in profiles.
100
+ /// See [Message Structure] for more documentation.
101
+ ///
102
+ /// [Message Structure]: https://protobuf.dev/programming-guides/encoding/#structure
68
103
#[ derive( Clone , Copy , Debug , PartialEq , Eq ) ]
69
104
#[ repr( u8 ) ]
70
105
pub enum WireType {
71
106
Varint = 0 ,
72
107
LengthDelimited = 2 ,
73
108
}
74
109
75
- /// A value is stored differently depending on the wire_type.
76
- pub trait Value : Default + Eq {
110
+ /// A value (or payload) is stored differently depending on the wire_type. In
111
+ /// profiles, there two types of payloads: varints and len-prefixed types.
112
+ ///
113
+ /// # Safety
114
+ ///
115
+ /// The [`Default`] implementation _must_ provide all zero values.
116
+ pub unsafe trait Value : Default + Eq {
77
117
/// The wire type this value uses.
78
118
const WIRE_TYPE : WireType ;
79
119
80
- /// The number of bytes it takes to encode this value.
120
+ /// The number of bytes it takes to encode this value. Do not include the
121
+ /// number of bytes it takes to encode the length-prefix as a varint. For
122
+ /// example, using this snippet of the reference:
123
+ ///
124
+ /// ```reference
125
+ /// len-prefix := size (message | string | packed);
126
+ /// size encoded as int32 varint
127
+ /// ```
128
+ ///
129
+ /// Calculate the number of bytes for `(message | string | packed)` only.
130
+ ///
131
+ /// For a varint, returns between 1 and 10 bytes for the number of bytes
132
+ /// used to encode the varint.
133
+ ///
134
+ /// Returns u64 rather than u31 to avoid a lot of overflow checking.
81
135
fn proto_len ( & self ) -> u64 ;
82
136
83
- /// Encode the value to the in-wire protobuf format.
137
+ /// Encode the value to the in-wire protobuf format. For length-delimited
138
+ /// types, do not include the length-prefix; see [`Value::proto_len`] for
139
+ /// more details.
84
140
///
85
- /// Serialization often happens one byte at a time, so a buffered writer
86
- /// should probably be used.
141
+ /// Encoding often happens one byte at a time, so a buffered writer should
142
+ /// probably be used.
87
143
fn encode < W : Write > ( & self , writer : & mut W ) -> io:: Result < ( ) > ;
88
144
}
89
145
90
- /// You can use varint to store any of the listed data types:
91
- /// int32 | int64 | uint32 | uint64 | bool | enum | sint32 | sint64
92
- ///
93
- /// # Safety
94
- ///
95
- /// The [`Value::WIRE_TYPE`] must be [`WireType::Varint`]!
96
- pub unsafe trait Varint : Value + Sized { }
97
-
98
- /// You can use LengthDelimited to store any of the listed data types:
99
- /// string, bytes, embedded messages, packed repeated fields
100
- ///
101
- /// # Safety
102
- ///
103
- /// The [`Value::WIRE_TYPE`] must be [`WireType::LengthDelimited`]!
104
- pub unsafe trait LengthDelimited : Value + Sized { }
105
-
106
146
/// Intended to be provided to a Field to mean that it _should_ optimize for a
107
147
/// value of zero. See also [`NO_OPT_ZERO`].
108
148
pub const OPT_ZERO : bool = true ;
@@ -112,44 +152,46 @@ pub const OPT_ZERO: bool = true;
112
152
/// Mapping.id.
113
153
pub const NO_OPT_ZERO : bool = false ;
114
154
115
- impl < T : Value , const N : u32 , const O : bool > From < T > for Field < T , N , O > {
116
- fn from ( value : T ) -> Self {
117
- Field { value }
155
+ impl < P : Value , const F : u32 , const O : bool > From < P > for Record < P , F , O > {
156
+ fn from ( value : P ) -> Self {
157
+ Record { value }
118
158
}
119
159
}
120
160
121
- impl < T : Value , const N : u32 , const O : bool > Field < T , N , O > {
122
- pub fn proto_len ( & self ) -> u64 {
123
- if O && self . value == T :: default ( ) {
161
+ unsafe impl < P : Value , const F : u32 , const O : bool > Value for Record < P , F , O > {
162
+ const WIRE_TYPE : WireType = P :: WIRE_TYPE ;
163
+
164
+ fn proto_len ( & self ) -> u64 {
165
+ if O && self . value == P :: default ( ) {
124
166
return 0 ;
125
167
}
126
168
let proto_len = self . value . proto_len ( ) ;
127
- let len = if T :: WIRE_TYPE == WireType :: LengthDelimited {
169
+ let len = if P :: WIRE_TYPE == WireType :: LengthDelimited {
128
170
proto_len. proto_len ( )
129
171
} else {
130
172
0
131
173
} ;
132
- let tag = Tag :: new ( N , T :: WIRE_TYPE ) . proto_len ( ) ;
174
+ let tag = Tag :: new ( F , P :: WIRE_TYPE ) . proto_len ( ) ;
133
175
tag + len + proto_len
134
176
}
135
177
136
- pub fn encode < W : Write > ( & self , writer : & mut W ) -> io:: Result < ( ) > {
137
- if O && self . value == T :: default ( ) {
178
+ fn encode < W : Write > ( & self , writer : & mut W ) -> io:: Result < ( ) > {
179
+ if O && self . value == P :: default ( ) {
138
180
return Ok ( ( ) ) ;
139
181
}
140
- Tag :: new ( N , T :: WIRE_TYPE ) . encode ( writer) ?;
141
- if T :: WIRE_TYPE == WireType :: LengthDelimited {
142
- self . value . proto_len ( ) . encode ( writer) ?;
182
+ Tag :: new ( F , P :: WIRE_TYPE ) . encode ( writer) ?;
183
+ if P :: WIRE_TYPE == WireType :: LengthDelimited {
184
+ varint :: encode ( self . value . proto_len ( ) , writer) ?;
143
185
}
144
186
self . value . encode ( writer)
145
187
}
146
188
}
147
189
148
- impl < T : Debug + Value , const N : u32 , const O : bool > Debug for Field < T , N , O > {
190
+ impl < P : Debug + Value , const F : u32 , const O : bool > Debug for Record < P , F , O > {
149
191
fn fmt ( & self , f : & mut Formatter < ' _ > ) -> std:: fmt:: Result {
150
192
f. debug_struct ( "Field" )
151
193
. field ( "value" , & self . value )
152
- . field ( "number" , & N )
194
+ . field ( "number" , & F )
153
195
. field ( "optimize_for_zero" , & O )
154
196
. finish ( )
155
197
}
@@ -176,11 +218,26 @@ impl Tag {
176
218
177
219
#[ inline]
178
220
pub fn proto_len ( self ) -> u64 {
179
- ( self . 0 as u64 ) . proto_len ( )
221
+ varint :: proto_len ( self . 0 as u64 )
180
222
}
181
223
182
224
#[ inline]
183
225
pub fn encode < W : Write > ( self , writer : & mut W ) -> io:: Result < ( ) > {
184
- ( self . 0 as u64 ) . encode ( writer)
226
+ varint:: encode ( self . 0 as u64 , writer)
227
+ }
228
+ }
229
+
230
+ unsafe impl < T : Value > Value for & ' _ [ T ] {
231
+ const WIRE_TYPE : WireType = WireType :: LengthDelimited ;
232
+
233
+ fn proto_len ( & self ) -> u64 {
234
+ self . iter ( ) . map ( Value :: proto_len) . sum ( )
235
+ }
236
+
237
+ fn encode < W : Write > ( & self , writer : & mut W ) -> io:: Result < ( ) > {
238
+ for value in self . iter ( ) {
239
+ value. encode ( writer) ?;
240
+ }
241
+ Ok ( ( ) )
185
242
}
186
243
}
0 commit comments