Skip to content

Commit c93be3f

Browse files
committed
refactor: pass in compressor
1 parent b0d05c5 commit c93be3f

File tree

1 file changed

+38
-26
lines changed
  • datadog-profiling/src/internal/profile

1 file changed

+38
-26
lines changed

datadog-profiling/src/internal/profile/mod.rs

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use interning_api::Generation;
2020
use lz4_flex::frame::FrameEncoder;
2121
use std::borrow::Cow;
2222
use std::collections::HashMap;
23+
use std::io;
2324
use std::sync::atomic::AtomicU64;
2425
use std::sync::{Arc, Mutex};
2526
use std::time::{Duration, SystemTime};
@@ -324,6 +325,32 @@ impl Profile {
324325
end_time: Option<SystemTime>,
325326
duration: Option<Duration>,
326327
) -> anyhow::Result<EncodedProfile> {
328+
// On 2023-08-23, we analyzed the uploaded tarball size per language.
329+
// These tarballs include 1 or more profiles, but for most languages
330+
// using libdatadog (all?) there is only 1 profile, so this is a good
331+
// proxy for the compressed, final size of the profiles.
332+
// We found that for all languages using libdatadog, the average
333+
// tarball was at least 18 KiB. Since these archives are compressed,
334+
// and because profiles compress well, especially ones with timeline
335+
// enabled (over 9x for some analyzed timeline profiles), this initial
336+
// size of 32KiB should definitely outperform starting at zero for
337+
// time consumed, allocator pressure, and allocator fragmentation.
338+
const INITIAL_PPROF_BUFFER_SIZE: usize = 32 * 1024;
339+
let mut compressor = FrameEncoder::new(Vec::with_capacity(INITIAL_PPROF_BUFFER_SIZE));
340+
341+
let mut encoded_profile = self.encode(&mut compressor, end_time, duration)?;
342+
encoded_profile.buffer = compressor.finish()?;
343+
Ok(encoded_profile)
344+
}
345+
346+
/// Encodes the profile. Note that the buffer will be empty. The caller
347+
/// needs to flush/finish the writer, then fill/replace the buffer.
348+
fn encode<W: io::Write>(
349+
mut self,
350+
writer: &mut W,
351+
end_time: Option<SystemTime>,
352+
duration: Option<Duration>,
353+
) -> io::Result<EncodedProfile> {
327354
let end = end_time.unwrap_or_else(SystemTime::now);
328355
let start = self.start_time;
329356
let endpoints_stats = std::mem::take(&mut self.endpoints.stats);
@@ -338,19 +365,6 @@ impl Profile {
338365
.as_nanos()
339366
.min(i64::MAX as u128) as i64;
340367

341-
// On 2023-08-23, we analyzed the uploaded tarball size per language.
342-
// These tarballs include 1 or more profiles, but for most languages
343-
// using libdatadog (all?) there is only 1 profile, so this is a good
344-
// proxy for the compressed, final size of the profiles.
345-
// We found that for all languages using libdatadog, the average
346-
// tarball was at least 18 KiB. Since these archives are compressed,
347-
// and because profiles compress well, especially ones with timeline
348-
// enabled (over 9x for some analyzed timeline profiles), this initial
349-
// size of 32KiB should definitely out-perform starting at zero for
350-
// time consumed, allocator pressure, and allocator fragmentation.
351-
const INITIAL_PPROF_BUFFER_SIZE: usize = 32 * 1024;
352-
let mut compressor = FrameEncoder::new(Vec::with_capacity(INITIAL_PPROF_BUFFER_SIZE));
353-
354368
for (sample, timestamp, mut values) in std::mem::take(&mut self.observations).into_iter() {
355369
let labels = self.enrich_sample_labels(sample, timestamp)?;
356370
let location_ids: Vec<_> = self
@@ -369,7 +383,7 @@ impl Profile {
369383
labels: &labels,
370384
};
371385

372-
item.field(2).encode(&mut compressor)?;
386+
item.field(2).encode(writer)?;
373387
}
374388

375389
// `Sample`s must be emitted before `SampleTypes` since we consume
@@ -383,7 +397,7 @@ impl Profile {
383397
// In this case, we use `sample_types` during upscaling of `samples`,
384398
// so we must serialize `Sample` before `SampleType`.
385399
for sample_type in self.sample_types.iter() {
386-
sample_type.field(1).encode(&mut compressor)?;
400+
sample_type.field(1).encode(writer)?;
387401
}
388402

389403
for (offset, item) in self.mappings.into_iter().enumerate() {
@@ -395,7 +409,7 @@ impl Profile {
395409
filename: item.filename,
396410
build_id: item.build_id,
397411
};
398-
mapping.field(3).encode(&mut compressor)?;
412+
mapping.field(3).encode(writer)?;
399413
}
400414

401415
for (offset, item) in self.locations.into_iter().enumerate() {
@@ -408,7 +422,7 @@ impl Profile {
408422
lineno: item.line,
409423
},
410424
};
411-
location.field(4).encode(&mut compressor)?;
425+
location.field(4).encode(writer)?;
412426
}
413427

414428
for (offset, item) in self.functions.into_iter().enumerate() {
@@ -418,12 +432,12 @@ impl Profile {
418432
system_name: item.system_name,
419433
filename: item.filename,
420434
};
421-
function.field(5).encode(&mut compressor)?;
435+
function.field(5).encode(writer)?;
422436
}
423437

424438
let mut lender = self.strings.into_lending_iter();
425439
while let Some(item) = lender.next() {
426-
item.field(6).encode(&mut compressor)?;
440+
item.field(6).encode(writer)?;
427441
}
428442

429443
let time_nanos = self
@@ -433,20 +447,18 @@ impl Profile {
433447
duration.as_nanos().min(i64::MAX as u128) as i64
434448
});
435449

436-
Varint::from(time_nanos).field(9).encode(&mut compressor)?;
437-
Varint::from(duration_nanos)
438-
.field(10)
439-
.encode(&mut compressor)?;
450+
Varint::from(time_nanos).field(9).encode(writer)?;
451+
Varint::from(duration_nanos).field(10).encode(writer)?;
440452

441453
if let Some((period, period_type)) = self.period {
442-
period_type.field(11).encode(&mut compressor)?;
443-
Varint::from(period).field(12).encode(&mut compressor)?;
454+
period_type.field(11).encode(writer)?;
455+
Varint::from(period).field(12).encode(writer)?;
444456
};
445457

446458
Ok(EncodedProfile {
447459
start,
448460
end,
449-
buffer: compressor.finish()?,
461+
buffer: Vec::new(),
450462
endpoints_stats,
451463
})
452464
}

0 commit comments

Comments
 (0)