Skip to content

Commit 86e555e

Browse files
authored
Merge pull request #41 from michaelwoerister/file-header
Add a simple file header to binary files created by measureme.
2 parents e0d7945 + ddd0f9c commit 86e555e

File tree

9 files changed

+161
-21
lines changed

9 files changed

+161
-21
lines changed

measureme/src/file_header.rs

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
//! All binary files generated by measureme have a simple file header that
2+
//! consists of a 4 byte file magic string and a 4 byte little-endian version
3+
//! number.
4+
5+
use byteorder::{ByteOrder, LittleEndian};
6+
use crate::serialization::SerializationSink;
7+
use std::error::Error;
8+
9+
pub const CURRENT_FILE_FORMAT_VERSION: u32 = 0;
10+
pub const FILE_MAGIC_EVENT_STREAM: &[u8; 4] = b"MMES";
11+
pub const FILE_MAGIC_STRINGTABLE_DATA: &[u8; 4] = b"MMSD";
12+
pub const FILE_MAGIC_STRINGTABLE_INDEX: &[u8; 4] = b"MMSI";
13+
14+
/// The size of the file header in bytes. Note that functions in this module
15+
/// rely on this size to be `8`.
16+
pub const FILE_HEADER_SIZE: usize = 8;
17+
18+
pub fn write_file_header<S: SerializationSink>(s: &S, file_magic: &[u8; 4]) {
19+
// The implementation here relies on FILE_HEADER_SIZE to have the value 8.
20+
// Let's make sure this assumption cannot be violated without being noticed.
21+
assert_eq!(FILE_HEADER_SIZE, 8);
22+
23+
s.write_atomic(FILE_HEADER_SIZE, |bytes| {
24+
bytes[0 .. 4].copy_from_slice(file_magic);
25+
LittleEndian::write_u32(&mut bytes[4..8], CURRENT_FILE_FORMAT_VERSION);
26+
});
27+
}
28+
29+
pub fn read_file_header(
30+
bytes: &[u8],
31+
expected_magic: &[u8; 4]
32+
) -> Result<u32, Box<dyn Error>> {
33+
// The implementation here relies on FILE_HEADER_SIZE to have the value 8.
34+
// Let's make sure this assumption cannot be violated without being noticed.
35+
assert_eq!(FILE_HEADER_SIZE, 8);
36+
37+
let actual_magic = &bytes[0 .. 4];
38+
39+
if actual_magic != expected_magic {
40+
// FIXME: The error message should mention the file path in order to be
41+
// more useful.
42+
let msg = format!(
43+
"Unexpected file magic `{:?}`. Expected `{:?}`",
44+
actual_magic,
45+
expected_magic,
46+
);
47+
48+
return Err(From::from(msg));
49+
}
50+
51+
Ok(LittleEndian::read_u32(&bytes[4..8]))
52+
}
53+
54+
pub fn strip_file_header(data: &[u8]) -> &[u8] {
55+
&data[FILE_HEADER_SIZE ..]
56+
}
57+
58+
59+
#[cfg(test)]
60+
mod tests {
61+
use super::*;
62+
use crate::serialization::test::TestSink;
63+
64+
#[test]
65+
fn roundtrip() {
66+
let data_sink = TestSink::new();
67+
68+
write_file_header(&data_sink, FILE_MAGIC_EVENT_STREAM);
69+
70+
let data = data_sink.into_bytes();
71+
72+
assert_eq!(read_file_header(&data, FILE_MAGIC_EVENT_STREAM).unwrap(),
73+
CURRENT_FILE_FORMAT_VERSION);
74+
}
75+
76+
#[test]
77+
fn invalid_magic() {
78+
let data_sink = TestSink::new();
79+
write_file_header(&data_sink, FILE_MAGIC_STRINGTABLE_DATA);
80+
let mut data = data_sink.into_bytes();
81+
82+
// Invalidate the filemagic
83+
data[2] = 0;
84+
assert!(read_file_header(&data, FILE_MAGIC_STRINGTABLE_DATA).is_err());
85+
}
86+
87+
#[test]
88+
fn other_version() {
89+
let data_sink = TestSink::new();
90+
91+
write_file_header(&data_sink, FILE_MAGIC_STRINGTABLE_INDEX);
92+
93+
let mut data = data_sink.into_bytes();
94+
95+
// Change version
96+
data[4] = 0xFF;
97+
data[5] = 0xFF;
98+
data[6] = 0xFF;
99+
data[7] = 0xFF;
100+
assert_eq!(read_file_header(&data, FILE_MAGIC_STRINGTABLE_INDEX).unwrap(),
101+
0xFFFF_FFFF);
102+
}
103+
}

measureme/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
mod event;
2+
mod file_header;
23
mod file_serialization_sink;
34
mod mmap_serialization_sink;
45
mod profiler;

measureme/src/profiler.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::file_header::{write_file_header, FILE_MAGIC_EVENT_STREAM};
12
use crate::raw_event::{RawEvent, Timestamp, TimestampKind};
23
use crate::serialization::SerializationSink;
34
use crate::stringtable::{SerializableString, StringId, StringTableBuilder};
@@ -32,6 +33,10 @@ impl<S: SerializationSink> Profiler<S> {
3233
pub fn new(path_stem: &Path) -> Result<Profiler<S>, Box<dyn Error>> {
3334
let paths = ProfilerFiles::new(path_stem);
3435
let event_sink = Arc::new(S::from_path(&paths.events_file)?);
36+
37+
// The first thing in every file we generate must be the file header.
38+
write_file_header(&*event_sink, FILE_MAGIC_EVENT_STREAM);
39+
3540
let string_table = StringTableBuilder::new(
3641
Arc::new(S::from_path(&paths.string_data_file)?),
3742
Arc::new(S::from_path(&paths.string_index_file)?),

measureme/src/profiling_data.rs

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
use crate::file_header::FILE_HEADER_SIZE;
12
use crate::event::Event;
23
use crate::{ProfilerFiles, RawEvent, StringTable, TimestampKind};
4+
use std::error::Error;
35
use std::fs;
46
use std::mem;
57
use std::path::Path;
@@ -11,19 +13,19 @@ pub struct ProfilingData {
1113
}
1214

1315
impl ProfilingData {
14-
pub fn new(path_stem: &Path) -> ProfilingData {
16+
pub fn new(path_stem: &Path) -> Result<ProfilingData, Box<dyn Error>> {
1517
let paths = ProfilerFiles::new(path_stem);
1618

1719
let string_data = fs::read(paths.string_data_file).expect("couldn't read string_data file");
1820
let index_data = fs::read(paths.string_index_file).expect("couldn't read string_index file");
1921
let event_data = fs::read(paths.events_file).expect("couldn't read events file");
2022

21-
let string_table = StringTable::new(string_data, index_data);
23+
let string_table = StringTable::new(string_data, index_data)?;
2224

23-
ProfilingData {
25+
Ok(ProfilingData {
2426
string_table,
2527
event_data,
26-
}
28+
})
2729
}
2830

2931
pub fn iter(&self) -> impl Iterator<Item = Event<'_>> {
@@ -53,15 +55,16 @@ impl<'a> Iterator for ProfilerEventIterator<'a> {
5355
type Item = Event<'a>;
5456

5557
fn next(&mut self) -> Option<Event<'a>> {
56-
let raw_idx = self.curr_event_idx * mem::size_of::<RawEvent>();
57-
let raw_idx_end = raw_idx + mem::size_of::<RawEvent>();
58-
if raw_idx_end > self.data.event_data.len() {
58+
let event_start_addr = FILE_HEADER_SIZE +
59+
self.curr_event_idx * mem::size_of::<RawEvent>();
60+
let event_end_addr = event_start_addr + mem::size_of::<RawEvent>();
61+
if event_end_addr > self.data.event_data.len() {
5962
return None;
6063
}
6164

6265
self.curr_event_idx += 1;
6366

64-
let raw_event_bytes = &self.data.event_data[raw_idx..raw_idx_end];
67+
let raw_event_bytes = &self.data.event_data[event_start_addr..event_end_addr];
6568

6669
let mut raw_event = RawEvent::default();
6770
unsafe {

measureme/src/stringtable.rs

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,13 @@
1212
//! UTF-8 bytes. The content of a `TAG_STR_REF` is the contents of the entry
1313
//! it references.
1414
15+
use crate::file_header::{write_file_header, read_file_header, strip_file_header,
16+
FILE_MAGIC_STRINGTABLE_DATA, FILE_MAGIC_STRINGTABLE_INDEX};
1517
use crate::serialization::{Addr, SerializationSink};
1618
use byteorder::{ByteOrder, LittleEndian};
1719
use rustc_hash::FxHashMap;
1820
use std::borrow::Cow;
21+
use std::error::Error;
1922
use std::sync::atomic::{AtomicU32, Ordering};
2023
use std::sync::Arc;
2124

@@ -117,6 +120,11 @@ fn deserialize_index_entry(bytes: &[u8]) -> (StringId, Addr) {
117120

118121
impl<S: SerializationSink> StringTableBuilder<S> {
119122
pub fn new(data_sink: Arc<S>, index_sink: Arc<S>) -> StringTableBuilder<S> {
123+
124+
// The first thing in every file we generate must be the file header.
125+
write_file_header(&*data_sink, FILE_MAGIC_STRINGTABLE_DATA);
126+
write_file_header(&*index_sink, FILE_MAGIC_STRINGTABLE_INDEX);
127+
120128
StringTableBuilder {
121129
data_sink,
122130
index_sink,
@@ -230,12 +238,27 @@ pub struct StringTable {
230238
}
231239

232240
impl<'data> StringTable {
233-
pub fn new(string_data: Vec<u8>, index_data: Vec<u8>) -> StringTable {
234-
assert!(index_data.len() % 8 == 0);
241+
pub fn new(string_data: Vec<u8>, index_data: Vec<u8>) -> Result<StringTable, Box<dyn Error>> {
235242

236-
let index: FxHashMap<_, _> = index_data.chunks(8).map(deserialize_index_entry).collect();
243+
let string_data_format = read_file_header(&string_data, FILE_MAGIC_STRINGTABLE_DATA)?;
244+
let index_data_format = read_file_header(&index_data, FILE_MAGIC_STRINGTABLE_INDEX)?;
245+
246+
if string_data_format != index_data_format {
247+
Err("Mismatch between StringTable DATA and INDEX format version")?;
248+
}
249+
250+
if string_data_format != 0 {
251+
Err(format!("StringTable file format version '{}' is not supported
252+
by this version of `measureme`.", string_data_format))?;
253+
}
254+
255+
assert!(index_data.len() % 8 == 0);
256+
let index: FxHashMap<_, _> = strip_file_header(&index_data)
257+
.chunks(8)
258+
.map(deserialize_index_entry)
259+
.collect();
237260

238-
StringTable { string_data, index }
261+
Ok(StringTable { string_data, index })
239262
}
240263

241264
#[inline]
@@ -245,7 +268,7 @@ impl<'data> StringTable {
245268
}
246269

247270
#[cfg(test)]
248-
mod test {
271+
mod tests {
249272
use super::*;
250273

251274
#[test]
@@ -278,7 +301,7 @@ mod test {
278301
let data_bytes = Arc::try_unwrap(data_sink).unwrap().into_bytes();
279302
let index_bytes = Arc::try_unwrap(index_sink).unwrap().into_bytes();
280303

281-
let string_table = StringTable::new(data_bytes, index_bytes);
304+
let string_table = StringTable::new(data_bytes, index_bytes).unwrap();
282305

283306
for (&id, &expected_string) in string_ids.iter().zip(expected_strings.iter()) {
284307
let str_ref = string_table.get(id);

measureme/src/testing_common.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ fn generate_profiling_data<S: SerializationSink>(filestem: &Path) -> Vec<Event>
8888
// Process some profiling data. This is the part that would run in a
8989
// post processing tool.
9090
fn process_profiling_data(filestem: &Path, expected_events: &[Event]) {
91-
let profiling_data = ProfilingData::new(filestem);
91+
let profiling_data = ProfilingData::new(filestem).unwrap();
9292

9393
let mut count = 0;
9494

mmview/src/main.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::error::Error;
12
use std::path::PathBuf;
23
use measureme::ProfilingData;
34

@@ -8,12 +9,14 @@ struct Opt {
89
file_prefix: PathBuf,
910
}
1011

11-
fn main() {
12+
fn main() -> Result<(), Box<dyn Error>> {
1213
let opt = Opt::from_args();
1314

14-
let data = ProfilingData::new(&opt.file_prefix);
15+
let data = ProfilingData::new(&opt.file_prefix)?;
1516

1617
for event in data.iter() {
1718
println!("{:?}", event);
1819
}
20+
21+
Ok(())
1922
}

stack_collapse/src/main.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::error::Error;
12
use std::fs::File;
23
use std::io::{BufWriter, Write};
34
use std::path::PathBuf;
@@ -20,10 +21,10 @@ struct Opt {
2021
interval: u64,
2122
}
2223

23-
fn main() -> Result<(), Box<std::error::Error>> {
24+
fn main() -> Result<(), Box<dyn Error>> {
2425
let opt = Opt::from_args();
2526

26-
let profiling_data = ProfilingData::new(&opt.file_prefix);
27+
let profiling_data = ProfilingData::new(&opt.file_prefix)?;
2728

2829
let first_event_time = {
2930
let current_time = profiling_data.iter().next().unwrap().timestamp;

summarize/src/main.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#[macro_use]
22
extern crate prettytable;
33

4+
use std::error::Error;
45
use std::fs::File;
56
use std::io::BufWriter;
67
use std::path::PathBuf;
@@ -24,10 +25,10 @@ struct Opt {
2425
percent_above: f64,
2526
}
2627

27-
fn main() -> Result<(), Box<std::error::Error>> {
28+
fn main() -> Result<(), Box<dyn Error>> {
2829
let opt = Opt::from_args();
2930

30-
let data = ProfilingData::new(&opt.file_prefix);
31+
let data = ProfilingData::new(&opt.file_prefix)?;
3132

3233
let mut results = analysis::perform_analysis(data);
3334

0 commit comments

Comments
 (0)