Skip to content

Commit e79ff4f

Browse files
committed
docs: create examples and document types
1 parent 9b8e8b3 commit e79ff4f

File tree

8 files changed

+311
-98
lines changed

8 files changed

+311
-98
lines changed

datadog-profiling-protobuf/src/function.rs

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,33 +4,47 @@
44
use super::{StringOffset, Value, Varint, WireType};
55
use std::io::{self, Write};
66

7+
/// Represents a function in a profile. Omits the start line because it's not
8+
/// useful to libdatadog right now, so we save the bytes/ops.
79
#[repr(C)]
810
#[derive(Copy, Clone, Debug)]
911
#[cfg_attr(test, derive(bolero::generator::TypeGenerator))]
1012
pub struct Function {
11-
pub id: u64, // 1
12-
pub name: StringOffset, // 2
13+
/// Unique nonzero id for the function.
14+
pub id: u64, // 1
15+
/// Name of the function, in human-readable form if available.
16+
pub name: StringOffset, // 2
17+
/// Name of the function, as identified by the system.
18+
/// For instance, it can be a C++ mangled name.
1319
pub system_name: StringOffset, // 3
14-
pub filename: StringOffset, // 4
20+
/// Source file containing the function.
21+
pub filename: StringOffset, // 4
1522
}
1623

1724
impl Value for Function {
1825
const WIRE_TYPE: WireType = WireType::LengthDelimited;
1926

2027
fn proto_len(&self) -> u64 {
2128
Varint(self.id).field(1).proto_len()
22-
+ Varint::from(self.name).field(2).proto_len_small()
23-
+ Varint::from(self.system_name).field(3).proto_len_small()
24-
+ Varint::from(self.filename).field(4).proto_len_small()
29+
+ Varint::from(self.name).field(2).zero_opt().proto_len()
30+
+ Varint::from(self.system_name)
31+
.field(3)
32+
.zero_opt()
33+
.proto_len()
34+
+ Varint::from(self.filename).field(4).zero_opt().proto_len()
2535
}
2636

2737
fn encode<W: Write>(&self, writer: &mut W) -> io::Result<()> {
2838
Varint(self.id).field(1).encode(writer)?;
29-
Varint::from(self.name).field(2).encode_small(writer)?;
39+
Varint::from(self.name).field(2).zero_opt().encode(writer)?;
3040
Varint::from(self.system_name)
3141
.field(3)
32-
.encode_small(writer)?;
33-
Varint::from(self.filename).field(4).encode_small(writer)
42+
.zero_opt()
43+
.encode(writer)?;
44+
Varint::from(self.filename)
45+
.field(4)
46+
.zero_opt()
47+
.encode(writer)
3448
}
3549
}
3650

datadog-profiling-protobuf/src/label.rs

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,25 @@
44
use super::{StringOffset, Value, Varint, WireType};
55
use std::io::{self, Write};
66

7-
// todo: if we don't use num_unit, then we can save 8 bytes--4 from num_unit
8-
// plus 4 from padding.
7+
/// Label includes additional context for this sample. It can include things
8+
/// like a thread id, allocation size, etc.
99
#[repr(C)]
1010
#[derive(Copy, Clone, Debug)]
1111
#[cfg_attr(test, derive(bolero::generator::TypeGenerator))]
1212
pub struct Label {
13-
pub key: StringOffset, // 1
14-
pub str: StringOffset, // 2
15-
pub num: i64, // 3
13+
/// An annotation for a sample, e.g. "allocation_size".
14+
pub key: StringOffset, // 1
15+
/// At most, one of the str and num should be used.
16+
pub str: StringOffset, // 2
17+
/// At most, one of the str and num should be used.
18+
pub num: i64, // 3
19+
20+
// todo: if we don't use num_unit, then we can save 8 bytes--4 from
21+
// num_unit plus 4 from padding.
22+
/// Should only be present when num is present.
23+
/// Specifies the units of num.
24+
/// Use arbitrary string (for example, "requests") as a custom count unit.
25+
/// If no unit is specified, consumer may apply heuristic to deduce it.
1626
pub num_unit: StringOffset, // 4
1727
}
1828

@@ -21,16 +31,19 @@ impl Value for Label {
2131

2232
fn proto_len(&self) -> u64 {
2333
Varint::from(self.key).field(1).proto_len()
24-
+ Varint::from(self.str).field(2).proto_len_small()
25-
+ Varint::from(self.num).field(3).proto_len_small()
26-
+ Varint::from(self.num_unit).field(4).proto_len_small()
34+
+ Varint::from(self.str).field(2).zero_opt().proto_len()
35+
+ Varint::from(self.num).field(3).zero_opt().proto_len()
36+
+ Varint::from(self.num_unit).field(4).zero_opt().proto_len()
2737
}
2838

2939
fn encode<W: Write>(&self, writer: &mut W) -> io::Result<()> {
3040
Varint::from(self.key).field(1).encode(writer)?;
31-
Varint::from(self.str).field(2).encode_small(writer)?;
32-
Varint::from(self.num).field(3).encode_small(writer)?;
33-
Varint::from(self.num_unit).field(4).encode_small(writer)
41+
Varint::from(self.str).field(2).zero_opt().encode(writer)?;
42+
Varint::from(self.num).field(3).zero_opt().encode(writer)?;
43+
Varint::from(self.num_unit)
44+
.field(4)
45+
.zero_opt()
46+
.encode(writer)
3447
}
3548
}
3649

datadog-profiling-protobuf/src/lib.rs

Lines changed: 154 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,27 @@
66
#![cfg_attr(not(test), deny(clippy::expect_used))]
77
#![cfg_attr(not(test), deny(clippy::unimplemented))]
88

9+
//! This crate implements Protobuf serializers for [`profiles`], including:
10+
//!
11+
//! - [Function]
12+
//! - [Label]
13+
//! - [Location] and [Line]
14+
//! - [Mapping]
15+
//! - [Sample]
16+
//! - [ValueType]
17+
//!
18+
//! There is no serializer for Profile. It would require borrowing a lot of
19+
//! data, which becomes unwieldy. It also isn't very compatible with writing
20+
//! a streaming serializer to lower peak memory usage.
21+
//!
22+
//! Indices into the string table are represented by [StringOffset], which uses
23+
//! a 32-bit number. ID fields are still 64-bit, so the user can control their
24+
//! values, potentially using a 64-bit address for its value.
25+
//!
26+
//! The types are generally `#[repr(C)]` so they can be used in FFI one day.
27+
//!
28+
//! [`profiles`]: https://github.com/google/pprof/blob/main/proto/profile.proto
29+
930
mod function;
1031
mod label;
1132
mod location;
@@ -29,19 +50,27 @@ pub use varint::*;
2950

3051
use std::io::{self, Write};
3152

32-
/// A tag is a combination of a wire_type, stored in the least significant
33-
/// three bits, and the field number that is defined in the .proto file.
34-
#[derive(Copy, Clone)]
35-
pub struct Tag(u32);
53+
/// Represents the wire type for the in-wire protobuf encoding. There are more
54+
/// types than are represented here; these are just the supported ones.
55+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
56+
#[repr(u8)]
57+
pub enum WireType {
58+
Varint = 0,
59+
LengthDelimited = 2,
60+
}
3661

3762
/// A value is stored differently depending on the wire_type.
3863
pub trait Value {
3964
const WIRE_TYPE: WireType;
4065

66+
/// The number of bytes it takes to encode this value.
4167
fn proto_len(&self) -> u64;
4268

69+
/// Encode the value to the in-wire protobuf format.
4370
fn encode<W: Write>(&self, writer: &mut W) -> io::Result<()>;
4471

72+
/// Create a Pair with the given field. The wire type will be added
73+
/// implicitly, and will be this type's [Self::WIRE_TYPE].
4574
#[inline]
4675
fn field(self, field: u32) -> Pair<Self>
4776
where
@@ -61,7 +90,7 @@ pub struct Pair<V: Value> {
6190
}
6291

6392
impl<V: Value> Pair<V> {
64-
/// Calculate the size of pair, without using the zero-size optimization.
93+
/// Calculate the size of pair, without the zero-size optimization.
6594
pub fn proto_len(&self) -> u64 {
6695
let tag = Tag::new(self.field, V::WIRE_TYPE).proto_len();
6796
let value = self.value.proto_len();
@@ -73,17 +102,31 @@ impl<V: Value> Pair<V> {
73102
tag + len_prefix + value
74103
}
75104

76-
/// Calculate the size of pair, using the zero-size optimization.
77-
#[inline]
78-
pub fn proto_len_small(&self) -> u64 {
79-
if self.value.proto_len() != 0 {
80-
self.proto_len()
81-
} else {
82-
0
83-
}
84-
}
85-
86-
/// Encodes into protobuf, without using the zero-size optimization.
105+
/// Encodes the pair into protobuf, without the zero-size optimization.
106+
///
107+
/// # Examples
108+
///
109+
/// Given a message like:
110+
///
111+
/// ```protobuf
112+
/// message ValueType {
113+
/// int64 type = 1;
114+
/// int64 unit = 2;
115+
/// }
116+
/// ```
117+
///
118+
/// You can encode it like this:
119+
///
120+
/// ```
121+
/// # use datadog_profiling_protobuf::{Value, Varint};
122+
/// # struct ValueType { r#type: i64, unit: i64 }
123+
/// # fn main() -> std::io::Result<()> {
124+
/// let mut w = Vec::new();
125+
/// let value_type = ValueType { r#type: 4, unit: 5 };
126+
/// Varint::from(value_type.r#type).field(1).encode(&mut w)?;
127+
/// Varint::from(value_type.unit).field(2).encode(&mut w)?;
128+
/// # Ok(()) }
129+
/// ```
87130
pub fn encode(&self, writer: &mut impl Write) -> io::Result<()> {
88131
Tag::new(self.field, V::WIRE_TYPE).encode(writer)?;
89132
if V::WIRE_TYPE == WireType::LengthDelimited {
@@ -93,19 +136,90 @@ impl<V: Value> Pair<V> {
93136
self.value.encode(writer)
94137
}
95138

96-
/// Encodes into protobuf, using the zero-size optimization.
139+
/// Convert the pair into one that will apply the zero-size optimization.
140+
///
141+
/// Note that the zero-size optimization should be applied to the field
142+
/// consistently in its [Value::proto_len] and [Value::encode] methods.
143+
/// If it's done to one, it should be done to the other.
144+
#[inline]
145+
pub fn zero_opt(self) -> WithZeroOptimization<V> {
146+
WithZeroOptimization { pair: self }
147+
}
148+
}
149+
150+
pub struct WithZeroOptimization<V: Value> {
151+
pair: Pair<V>,
152+
}
153+
154+
impl<V: Value> WithZeroOptimization<V> {
155+
/// Calculate the size of pair, using the zero-size optimization.
156+
#[inline]
157+
pub fn proto_len(&self) -> u64 {
158+
if self.pair.value.proto_len() != 0 {
159+
self.pair.proto_len()
160+
} else {
161+
0
162+
}
163+
}
164+
165+
/// Encodes into protobuf, using the zero-size optimization. Protobuf
166+
/// doesn't require fields with values of zero to be present, so to save
167+
/// space, they can be omitted them altogether.
168+
///
169+
/// # Examples
170+
///
171+
/// Label is a great message to demonstrate how the optimization is useful
172+
/// because it has multiple optional values:
173+
///
174+
/// ```protobuf
175+
/// message Label {
176+
/// int64 key = 1;
177+
///
178+
/// // At most one of the following must be present
179+
/// int64 str = 2;
180+
/// int64 num = 3;
181+
///
182+
/// // Should only be present when num is present.
183+
/// int64 num_unit = 4;
184+
/// }
185+
/// ```
186+
///
187+
/// This can be taken advantage of by using `zero_opt`:
188+
///
189+
/// ```
190+
/// # use datadog_profiling_protobuf::{Value, Varint};
191+
/// # struct Label { key: i64, str: i64, num: i64, num_unit: i64 }
192+
/// # fn main() -> std::io::Result<()> {
193+
/// let mut w = Vec::new();
194+
///
195+
/// let label = Label {
196+
/// key: 1,
197+
/// str: 0,
198+
/// num: 4194303,
199+
/// num_unit: 0,
200+
/// };
201+
///
202+
/// Varint::from(label.key).field(1).zero_opt().encode(&mut w)?;
203+
/// Varint::from(label.str).field(2).zero_opt().encode(&mut w)?;
204+
/// Varint::from(label.num).field(3).zero_opt().encode(&mut w)?;
205+
/// Varint::from(label.num_unit)
206+
/// .field(4)
207+
/// .zero_opt()
208+
/// .encode(&mut w)?;
209+
/// # Ok(()) }
210+
/// ```
97211
#[inline]
98-
pub fn encode_small(&self, writer: &mut impl Write) -> io::Result<()> {
99-
let len = self.value.proto_len();
212+
pub fn encode(&self, writer: &mut impl Write) -> io::Result<()> {
213+
let len = self.pair.value.proto_len();
100214
if len == 0 {
101215
return Ok(());
102216
}
103217

104-
Tag::new(self.field, V::WIRE_TYPE).encode(writer)?;
218+
Tag::new(self.pair.field, V::WIRE_TYPE).encode(writer)?;
105219
if V::WIRE_TYPE == WireType::LengthDelimited {
106220
Varint(len).encode(writer)?;
107221
}
108-
self.value.encode(writer)
222+
self.pair.value.encode(writer)
109223
}
110224
}
111225

@@ -115,14 +229,10 @@ const MIN_FIELD: u32 = 1;
115229
/// The largest possible protobuf field number.
116230
const MAX_FIELD: u32 = (1 << 29) - 1;
117231

118-
/// Represents the wire type for in-wire protobuf. There are more types than
119-
/// are represented here; these are just the supported ones.
120-
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
121-
#[repr(u8)]
122-
pub enum WireType {
123-
Varint = 0,
124-
LengthDelimited = 2,
125-
}
232+
/// A tag is a combination of a wire_type, stored in the least significant
233+
/// three bits, and the field number that is defined in the .proto file.
234+
#[derive(Copy, Clone)]
235+
pub struct Tag(u32);
126236

127237
impl Tag {
128238
#[cfg_attr(debug_assertions, track_caller)]
@@ -143,6 +253,21 @@ impl Tag {
143253
}
144254
}
145255

256+
/// Represents a packed varint. There are other kinds of things which can be
257+
/// packed in protobuf, but profiles don't currently use them.
258+
///
259+
/// # Examples
260+
///
261+
/// Packed is generic over `Into<Varint>`, so packed values of i64 and u64 can
262+
/// both be used.
263+
///
264+
/// ```
265+
/// # use datadog_profiling_protobuf::Packed;
266+
/// // u64
267+
/// _ = Packed::new(&[42u64, 67u64]);
268+
/// // i64
269+
/// _ = Packed::new(&[42i64, 67i64]);
270+
/// ```
146271
pub struct Packed<'a, T: Into<Varint>> {
147272
values: &'a [T],
148273
}

0 commit comments

Comments
 (0)