Skip to content

Commit 531e4fb

Browse files
committed
Rename Field to Record, document more things, etc
1 parent 3634bc9 commit 531e4fb

File tree

13 files changed

+242
-199
lines changed

13 files changed

+242
-199
lines changed

datadog-profiling-protobuf/src/function.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,27 @@
11
// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
22
// SPDX-License-Identifier: Apache-2.0
33

4-
use super::{Field, StringOffset, Value, WireType, NO_OPT_ZERO, OPT_ZERO};
4+
use super::{Record, StringOffset, Value, WireType, NO_OPT_ZERO, OPT_ZERO};
55
use std::io::{self, Write};
66

77
/// Represents a function in a profile. Omits the start line because it's not
8-
/// useful to libdatadog right now, so we save the bytes/ops.
8+
/// useful to Datadog right now, so we save the bytes/ops.
99
#[repr(C)]
1010
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
1111
#[cfg_attr(test, derive(bolero::generator::TypeGenerator))]
1212
pub struct Function {
1313
/// Unique nonzero id for the function.
14-
pub id: Field<u64, 1, NO_OPT_ZERO>,
14+
pub id: Record<u64, 1, NO_OPT_ZERO>,
1515
/// Name of the function, in human-readable form if available.
16-
pub name: Field<StringOffset, 2, OPT_ZERO>,
16+
pub name: Record<StringOffset, 2, OPT_ZERO>,
1717
/// Name of the function, as identified by the system.
1818
/// For instance, it can be a C++ mangled name.
19-
pub system_name: Field<StringOffset, 3, OPT_ZERO>,
19+
pub system_name: Record<StringOffset, 3, OPT_ZERO>,
2020
/// Source file containing the function.
21-
pub filename: Field<StringOffset, 4, OPT_ZERO>,
21+
pub filename: Record<StringOffset, 4, OPT_ZERO>,
2222
}
2323

24-
impl Value for Function {
24+
unsafe impl Value for Function {
2525
const WIRE_TYPE: WireType = WireType::LengthDelimited;
2626

2727
fn proto_len(&self) -> u64 {

datadog-profiling-protobuf/src/label.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
22
// SPDX-License-Identifier: Apache-2.0
33

4-
use super::{Field, StringOffset, Value, WireType, OPT_ZERO};
4+
use super::{Record, StringOffset, Value, WireType, OPT_ZERO};
55
use std::io::{self, Write};
66

77
/// Label includes additional context for this sample. It can include things
@@ -11,22 +11,22 @@ use std::io::{self, Write};
1111
#[cfg_attr(test, derive(bolero::generator::TypeGenerator))]
1212
pub struct Label {
1313
/// An annotation for a sample, e.g. "allocation_size".
14-
pub key: Field<StringOffset, 1, OPT_ZERO>,
14+
pub key: Record<StringOffset, 1, OPT_ZERO>,
1515
/// At most, one of the str and num should be used.
16-
pub str: Field<StringOffset, 2, OPT_ZERO>,
16+
pub str: Record<StringOffset, 2, OPT_ZERO>,
1717
/// At most, one of the str and num should be used.
18-
pub num: Field<i64, 3, OPT_ZERO>,
18+
pub num: Record<i64, 3, OPT_ZERO>,
1919

2020
// todo: if we don't use num_unit, then we can save 8 bytes--4 from
2121
// num_unit plus 4 from padding.
2222
/// Should only be present when num is present.
2323
/// Specifies the units of num.
2424
/// Use arbitrary string (for example, "requests") as a custom count unit.
2525
/// If no unit is specified, consumer may apply heuristic to deduce it.
26-
pub num_unit: Field<StringOffset, 4, OPT_ZERO>,
26+
pub num_unit: Record<StringOffset, 4, OPT_ZERO>,
2727
}
2828

29-
impl Value for Label {
29+
unsafe impl Value for Label {
3030
const WIRE_TYPE: WireType = WireType::LengthDelimited;
3131

3232
fn proto_len(&self) -> u64 {

datadog-profiling-protobuf/src/lib.rs

Lines changed: 109 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
#![cfg_attr(not(test), deny(clippy::expect_used))]
77
#![cfg_attr(not(test), deny(clippy::unimplemented))]
88

9-
//! This crate implements Protobuf serializers for [`profiles`], including
10-
//! serializers for:
9+
//! This crate implements Protobuf encoders for [`profiles`] which write to a
10+
//! [`Write`]. It has encoders for:
1111
//!
1212
//! - [Function]
1313
//! - [Label]
@@ -16,19 +16,42 @@
1616
//! - [Sample]
1717
//! - [ValueType]
1818
//!
19-
//! Serialization often happens one byte at a time, so a buffered writer
20-
//! should probably be used.
19+
//! There is no encoder for a Profile message. It would require borrowing a
20+
//! lot of data, which becomes unwieldy. It also isn't very compatible with
21+
//! writing a streaming serializer to lower peak memory usage.
2122
//!
22-
//! There is no serializer for Profile. It would require borrowing a lot of
23-
//! data, which becomes unwieldy. It also isn't very compatible with writing
24-
//! a streaming serializer to lower peak memory usage.
23+
//! Encoding often happens one byte at a time, so a buffered writer should
24+
//! probably be used.
2525
//!
2626
//! Indices into the string table are represented by [StringOffset], which uses
2727
//! a 32-bit number. ID fields are still 64-bit, so the user can control their
2828
//! values, potentially using a 64-bit address for its value.
2929
//!
3030
//! The types are generally `#[repr(C)]` so they can be used in FFI one day.
3131
//!
32+
//! Here is a condensed reference for the parts of protobuf used by profiles:
33+
//!
34+
//! ```reference
35+
//! message := (tag value)*
36+
//! tag := (field << 3) bit-or wire_type;
37+
//! encoded as uint32 varint
38+
//! value := varint for wire_type == VARINT,
39+
//! len-prefix for wire_type == LEN,
40+
//! varint := int64 | uint64
41+
//! len-prefix := size (message | string | packed);
42+
//! size encoded as int32 varint
43+
//! string := valid UTF-8 string;
44+
//! max 2GB of bytes
45+
//! packed := varint*
46+
//! consecutive values of the type specified in `.proto`
47+
//! ```
48+
//!
49+
//! A [`Record`] represents a [`Tag`] and [`Value`] pair, where the
50+
//! [`WireType`] comes from [`Value::WIRE_TYPE`].
51+
//!
52+
//! Protos must be smaller than 2 GiB when encoded. Many proto implementations
53+
//! will refuse to encode or decode messages that exceed this limit.
54+
//!
3255
//! [`profiles`]: https://github.com/google/pprof/blob/main/proto/profile.proto
3356
3457
mod function;
@@ -48,61 +71,78 @@ pub use label::*;
4871
pub use location::*;
4972
pub use mapping::*;
5073
pub use sample::*;
51-
use std::fmt::{Debug, Formatter};
5274
pub use string::*;
5375
pub use value_type::*;
5476

77+
use std::fmt::{Debug, Formatter};
5578
use std::io::{self, Write};
5679

57-
/// Create a field of a given type, field number, and whether to perform the
58-
/// zero-size optimization or not.
80+
/// A record is responsible for encoding the field number, wire type and
81+
/// payload. The wire type tells the parser how big the payload after it is.
82+
/// For more details, refer to the [Condensed Reference Card].
83+
///
84+
/// The `P` is the payload, the `F` is the field number, and `O` is whether to
85+
/// apply the zero-sized optimization or not. Most of the time, it shouldn't
86+
/// matter if the optimization is applied. However, if something is part of
87+
/// a repeated field, then applying the optimization would change the number
88+
/// of elements in the array.
89+
///
90+
/// [Condensed Reference Card]: https://protobuf.dev/programming-guides/encoding/#cheat-sheet
5991
#[derive(Copy, Clone, Default, Eq, PartialEq)]
6092
#[repr(transparent)]
6193
#[cfg_attr(test, derive(bolero::generator::TypeGenerator))]
62-
pub struct Field<T: Value, const N: u32, const O: bool> {
63-
pub value: T,
94+
pub struct Record<P: Value, const F: u32, const O: bool> {
95+
pub value: P,
6496
}
6597

6698
/// Represents the wire type for the in-wire protobuf encoding. There are more
67-
/// types than are represented here; these are just the supported ones.
99+
/// types than are represented here; these are just the ones used in profiles.
100+
/// See [Message Structure] for more documentation.
101+
///
102+
/// [Message Structure]: https://protobuf.dev/programming-guides/encoding/#structure
68103
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
69104
#[repr(u8)]
70105
pub enum WireType {
71106
Varint = 0,
72107
LengthDelimited = 2,
73108
}
74109

75-
/// A value is stored differently depending on the wire_type.
76-
pub trait Value: Default + Eq {
110+
/// A value (or payload) is stored differently depending on the wire_type. In
111+
/// profiles, there two types of payloads: varints and len-prefixed types.
112+
///
113+
/// # Safety
114+
///
115+
/// The [`Default`] implementation _must_ provide all zero values.
116+
pub unsafe trait Value: Default + Eq {
77117
/// The wire type this value uses.
78118
const WIRE_TYPE: WireType;
79119

80-
/// The number of bytes it takes to encode this value.
120+
/// The number of bytes it takes to encode this value. Do not include the
121+
/// number of bytes it takes to encode the length-prefix as a varint. For
122+
/// example, using this snippet of the reference:
123+
///
124+
/// ```reference
125+
/// len-prefix := size (message | string | packed);
126+
/// size encoded as int32 varint
127+
/// ```
128+
///
129+
/// Calculate the number of bytes for `(message | string | packed)` only.
130+
///
131+
/// For a varint, returns between 1 and 10 bytes for the number of bytes
132+
/// used to encode the varint.
133+
///
134+
/// Returns u64 rather than u31 to avoid a lot of overflow checking.
81135
fn proto_len(&self) -> u64;
82136

83-
/// Encode the value to the in-wire protobuf format.
137+
/// Encode the value to the in-wire protobuf format. For length-delimited
138+
/// types, do not include the length-prefix; see [`Value::proto_len`] for
139+
/// more details.
84140
///
85-
/// Serialization often happens one byte at a time, so a buffered writer
86-
/// should probably be used.
141+
/// Encoding often happens one byte at a time, so a buffered writer should
142+
/// probably be used.
87143
fn encode<W: Write>(&self, writer: &mut W) -> io::Result<()>;
88144
}
89145

90-
/// You can use varint to store any of the listed data types:
91-
/// int32 | int64 | uint32 | uint64 | bool | enum | sint32 | sint64
92-
///
93-
/// # Safety
94-
///
95-
/// The [`Value::WIRE_TYPE`] must be [`WireType::Varint`]!
96-
pub unsafe trait Varint: Value + Sized {}
97-
98-
/// You can use LengthDelimited to store any of the listed data types:
99-
/// string, bytes, embedded messages, packed repeated fields
100-
///
101-
/// # Safety
102-
///
103-
/// The [`Value::WIRE_TYPE`] must be [`WireType::LengthDelimited`]!
104-
pub unsafe trait LengthDelimited: Value + Sized {}
105-
106146
/// Intended to be provided to a Field to mean that it _should_ optimize for a
107147
/// value of zero. See also [`NO_OPT_ZERO`].
108148
pub const OPT_ZERO: bool = true;
@@ -112,44 +152,46 @@ pub const OPT_ZERO: bool = true;
112152
/// Mapping.id.
113153
pub const NO_OPT_ZERO: bool = false;
114154

115-
impl<T: Value, const N: u32, const O: bool> From<T> for Field<T, N, O> {
116-
fn from(value: T) -> Self {
117-
Field { value }
155+
impl<P: Value, const F: u32, const O: bool> From<P> for Record<P, F, O> {
156+
fn from(value: P) -> Self {
157+
Record { value }
118158
}
119159
}
120160

121-
impl<T: Value, const N: u32, const O: bool> Field<T, N, O> {
122-
pub fn proto_len(&self) -> u64 {
123-
if O && self.value == T::default() {
161+
unsafe impl<P: Value, const F: u32, const O: bool> Value for Record<P, F, O> {
162+
const WIRE_TYPE: WireType = P::WIRE_TYPE;
163+
164+
fn proto_len(&self) -> u64 {
165+
if O && self.value == P::default() {
124166
return 0;
125167
}
126168
let proto_len = self.value.proto_len();
127-
let len = if T::WIRE_TYPE == WireType::LengthDelimited {
169+
let len = if P::WIRE_TYPE == WireType::LengthDelimited {
128170
proto_len.proto_len()
129171
} else {
130172
0
131173
};
132-
let tag = Tag::new(N, T::WIRE_TYPE).proto_len();
174+
let tag = Tag::new(F, P::WIRE_TYPE).proto_len();
133175
tag + len + proto_len
134176
}
135177

136-
pub fn encode<W: Write>(&self, writer: &mut W) -> io::Result<()> {
137-
if O && self.value == T::default() {
178+
fn encode<W: Write>(&self, writer: &mut W) -> io::Result<()> {
179+
if O && self.value == P::default() {
138180
return Ok(());
139181
}
140-
Tag::new(N, T::WIRE_TYPE).encode(writer)?;
141-
if T::WIRE_TYPE == WireType::LengthDelimited {
142-
self.value.proto_len().encode(writer)?;
182+
Tag::new(F, P::WIRE_TYPE).encode(writer)?;
183+
if P::WIRE_TYPE == WireType::LengthDelimited {
184+
varint::encode(self.value.proto_len(), writer)?;
143185
}
144186
self.value.encode(writer)
145187
}
146188
}
147189

148-
impl<T: Debug + Value, const N: u32, const O: bool> Debug for Field<T, N, O> {
190+
impl<P: Debug + Value, const F: u32, const O: bool> Debug for Record<P, F, O> {
149191
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
150192
f.debug_struct("Field")
151193
.field("value", &self.value)
152-
.field("number", &N)
194+
.field("number", &F)
153195
.field("optimize_for_zero", &O)
154196
.finish()
155197
}
@@ -176,11 +218,26 @@ impl Tag {
176218

177219
#[inline]
178220
pub fn proto_len(self) -> u64 {
179-
(self.0 as u64).proto_len()
221+
varint::proto_len(self.0 as u64)
180222
}
181223

182224
#[inline]
183225
pub fn encode<W: Write>(self, writer: &mut W) -> io::Result<()> {
184-
(self.0 as u64).encode(writer)
226+
varint::encode(self.0 as u64, writer)
227+
}
228+
}
229+
230+
unsafe impl<T: Value> Value for &'_ [T] {
231+
const WIRE_TYPE: WireType = WireType::LengthDelimited;
232+
233+
fn proto_len(&self) -> u64 {
234+
self.iter().map(Value::proto_len).sum()
235+
}
236+
237+
fn encode<W: Write>(&self, writer: &mut W) -> io::Result<()> {
238+
for value in self.iter() {
239+
value.encode(writer)?;
240+
}
241+
Ok(())
185242
}
186243
}

0 commit comments

Comments
 (0)