Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
6ae9093
feat(cbor_plus_zstd): added reference to zeekstd version 0.5.0 (the l…
nickysn Jul 18, 2025
331992e
feat(cbor_plus_zstd): upgrade to Rust 2024 edition, because of zeekst…
nickysn Jul 18, 2025
23c2324
feat(cbor_plus_zstd): added reference to cbor4ii version 1.0.0 (the l…
nickysn Jul 22, 2025
07bc2ff
feat(cbor_plus_zstd): enable the serde1 feature of cbor4ii
nickysn Jul 22, 2025
6bbc2c7
feat(cbor_plus_zstd): started implementing a StreamingTraceWriter. No…
nickysn Jul 22, 2025
84b053b
feat(cbor_plus_zstd): introduced TraceEventsFileFormat::BinaryV0. Use…
nickysn Jul 22, 2025
c43e711
feat(cbor_plus_zstd): implemented the actual writing of the CBOR comp…
nickysn Jul 22, 2025
4a74c09
feat(cbor_plus_zstd): removed commented out code
nickysn Jul 22, 2025
e5f65a2
feat(cbor_plus_zstd): updated test to test both versions of the binar…
nickysn Jul 22, 2025
794b58c
feat(cbor_plus_zstd): removed the load_trace_events function from Non…
nickysn Jul 22, 2025
42667be
feat(cbor_plus_zstd): initial attempt at implementing reading of CBOR…
nickysn Jul 23, 2025
07f5796
feat(cbor_plus_zstd): CBOR + zstd reading fixed. Also, forgot to add …
nickysn Jul 23, 2025
8c8842c
feat(cbor_plus_zstd): fix tests
nickysn Jul 23, 2025
96ec64b
feat(cbor_plus_zstd): removed unused use
nickysn Jul 23, 2025
b02cd9a
feat(cbor_plus_zstd): removed unused test code
nickysn Jul 23, 2025
5970c19
feat(cbor_plus_zstd): refactored trace writer code, to avoid copy dup…
nickysn Jul 24, 2025
a12bd92
refactor(cbor_plus_zstd): abstracttracewriter.rs renamed abstract_tra…
nickysn Jul 24, 2025
e5960f7
refactor(cbor_plus_zstd): cborzstdreader.rs renamed cbor_zstd_reader.rs
nickysn Jul 24, 2025
21c77ec
refactor(cbor_plus_zstd): cborzstdwriter.rs renamed cbor_zstd_writer.rs
nickysn Jul 24, 2025
b088823
refactor(cbor_plus_zstd): moved some code to trace_readers.rs
nickysn Jul 24, 2025
747b17f
refactor(cbor_plus_zstd): moved the TraceWriter trait to a new file t…
nickysn Jul 24, 2025
5cc8b39
refactor(cbor_plus_zstd): moved the NonStreamingTraceWriter to a new …
nickysn Jul 24, 2025
de4cb0b
refactor(cbor_plus_zstd): factored out the repeated code for begin_wr…
nickysn Jul 24, 2025
295a012
refactor(cbor_plus_zstd): factored out the repeated initialization co…
nickysn Jul 24, 2025
2d52028
refactor(cbor_plus_zstd): StreamingTraceWriter renamed CborZstdTraceW…
nickysn Jul 24, 2025
6084732
chore: cargo fmt
nickysn Jul 24, 2025
23f65bb
chore: use write_all() instead of write() in CborZstdTraceWriter::add…
nickysn Jul 24, 2025
923ca9b
chore: magic Rust lifetime elision woo-woo
nickysn Jul 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion runtime_tracing/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "runtime_tracing"
version = "0.13.0"
edition = "2021"
edition = "2024"
authors = ["Metacraft Labs Ltd"]
description = "A library for the schema and tracing helpers for the CodeTracer db trace format"
readme = "README.md"
Expand All @@ -20,6 +20,9 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
serde_repr = "0.1"
capnp = "0.21.1"
zeekstd = "0.5.0"
cbor4ii = { version = "1.0.0", features = ["serde1", "use_std"] }
fscommon = "0.1.1"

[build-dependencies]
capnpc = "0.21.0"
Expand Down
311 changes: 311 additions & 0 deletions runtime_tracing/src/abstract_trace_writer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,311 @@
use std::{
collections::HashMap,
env,
error::Error,
fs,
path::{Path, PathBuf},
};

use crate::{
AssignCellRecord, AssignCompoundItemRecord, AssignmentRecord, CallRecord, CellValueRecord, CompoundValueRecord, FullValueRecord, FunctionId,
FunctionRecord, Line, NONE_TYPE_ID, PathId, RValue, RecordEvent, ReturnRecord, StepRecord, TraceLowLevelEvent, TraceMetadata, TypeId, TypeKind,
TypeRecord, TypeSpecificInfo, VariableCellRecord, VariableId, tracer::TOP_LEVEL_FUNCTION_ID,
};

pub struct AbstractTraceWriterData {
// trace metadata:
pub workdir: PathBuf,
pub program: String,
pub args: Vec<String>,
// internal tracer state:
pub path_list: Vec<PathBuf>,
pub function_list: Vec<(String, PathId, Line)>,

pub paths: HashMap<PathBuf, PathId>,
pub functions: HashMap<String, FunctionId>,
pub variables: HashMap<String, VariableId>,
pub types: HashMap<String, TypeId>,

pub trace_metadata_path: Option<PathBuf>,
pub trace_paths_path: Option<PathBuf>,
}

impl AbstractTraceWriterData {
pub fn new(program: &str, args: &[String]) -> Self {
AbstractTraceWriterData {
workdir: env::current_dir().expect("can access the current dir"),
program: program.to_string(),
args: args.to_vec(),

path_list: vec![],
function_list: vec![],
paths: HashMap::new(),
functions: HashMap::new(),
variables: HashMap::new(),
types: HashMap::new(),

trace_metadata_path: None,
trace_paths_path: None,
}
}
}

pub trait AbstractTraceWriter {
fn get_data(&self) -> &AbstractTraceWriterData;
fn get_mut_data(&mut self) -> &mut AbstractTraceWriterData;

fn add_event(&mut self, event: TraceLowLevelEvent);
fn append_events(&mut self, events: &mut Vec<TraceLowLevelEvent>);

fn begin_writing_trace_metadata(&mut self, path: &Path) -> Result<(), Box<dyn Error>> {
self.get_mut_data().trace_metadata_path = Some(path.to_path_buf());
Ok(())
}

fn begin_writing_trace_paths(&mut self, path: &Path) -> Result<(), Box<dyn Error>> {
self.get_mut_data().trace_paths_path = Some(path.to_path_buf());
Ok(())
}

fn start(&mut self, path: &std::path::Path, line: Line) {
let function_id = self.ensure_function_id("<toplevel>", path, line);
self.register_call(function_id, vec![]);
assert!(function_id == TOP_LEVEL_FUNCTION_ID);

// probably we let the user choose, as different languages have
// different base types/names
// assert!(EXAMPLE_INT_TYPE_ID == self.load_type_id(TypeKind::Int, "Int"));
// assert!(EXAMPLE_FLOAT_TYPE_ID == self.load_type_id(TypeKind::Float, "Float"));
// assert!(EXAMPLE_BOOL_TYPE_ID == self.load_type_id(TypeKind::Bool, "Bool"));
// assert!(EXAMPLE_STRING_TYPE_ID == self.load_type_id(TypeKind::Bool, "String"));
assert!(NONE_TYPE_ID == self.ensure_type_id(TypeKind::None, "None"));
}

fn ensure_path_id(&mut self, path: &std::path::Path) -> PathId {
if !self.get_data().paths.contains_key(path) {
let mut_data = self.get_mut_data();
mut_data.paths.insert(path.to_path_buf(), PathId(mut_data.paths.len()));
self.register_path(path);
}
*self.get_data().paths.get(path).unwrap()
}

fn ensure_function_id(&mut self, function_name: &str, path: &std::path::Path, line: Line) -> FunctionId {
if !self.get_data().functions.contains_key(function_name) {
// same function names for different path line? TODO
let mut_data = self.get_mut_data();
mut_data.functions.insert(function_name.to_string(), FunctionId(mut_data.functions.len()));
self.register_function(function_name, path, line);
}
*self.get_data().functions.get(function_name).unwrap()
}

fn ensure_type_id(&mut self, kind: crate::TypeKind, lang_type: &str) -> TypeId {
let typ = self.to_raw_type(kind, lang_type);
self.ensure_raw_type_id(typ)
}

fn ensure_raw_type_id(&mut self, typ: crate::TypeRecord) -> TypeId {
if !self.get_data().types.contains_key(&typ.lang_type) {
let mut_data = self.get_mut_data();
mut_data.types.insert(typ.lang_type.clone(), TypeId(mut_data.types.len()));
self.register_raw_type(typ.clone());
}
*self.get_data().types.get(&typ.lang_type).unwrap()
}

fn ensure_variable_id(&mut self, variable_name: &str) -> VariableId {
if !self.get_data().variables.contains_key(variable_name) {
let mut_data = self.get_mut_data();
mut_data.variables.insert(variable_name.to_string(), VariableId(mut_data.variables.len()));
self.register_variable_name(variable_name);
}
*self.get_data().variables.get(variable_name).unwrap()
}

fn register_path(&mut self, path: &std::path::Path) {
self.get_mut_data().path_list.push(path.to_path_buf());
self.add_event(TraceLowLevelEvent::Path(path.to_path_buf()));
}

fn register_function(&mut self, name: &str, path: &std::path::Path, line: Line) {
let path_id = self.ensure_path_id(path);
self.get_mut_data().function_list.push((name.to_string(), path_id, line));
self.add_event(TraceLowLevelEvent::Function(FunctionRecord {
name: name.to_string(),
path_id,
line,
}));
}

fn register_step(&mut self, path: &std::path::Path, line: Line) {
let path_id = self.ensure_path_id(path);
self.add_event(TraceLowLevelEvent::Step(StepRecord { path_id, line }));
}

fn register_call(&mut self, function_id: FunctionId, args: Vec<crate::FullValueRecord>) {
// register a step for each call, the backend expects this for
// non-toplevel calls, so
// we ensure it directly from register_call
if function_id != TOP_LEVEL_FUNCTION_ID {
for arg in &args {
self.register_full_value(arg.variable_id, arg.value.clone());
}
let function = &self.get_data().function_list[function_id.0];
self.add_event(TraceLowLevelEvent::Step(StepRecord {
path_id: function.1,
line: function.2,
}));
}
// the actual call event:
self.add_event(TraceLowLevelEvent::Call(CallRecord { function_id, args }));
}

fn arg(&mut self, name: &str, value: crate::ValueRecord) -> FullValueRecord {
let variable_id = self.ensure_variable_id(name);
FullValueRecord { variable_id, value }
}

fn register_return(&mut self, return_value: crate::ValueRecord) {
self.add_event(TraceLowLevelEvent::Return(ReturnRecord { return_value }));
}

fn register_special_event(&mut self, kind: crate::EventLogKind, content: &str) {
self.add_event(TraceLowLevelEvent::Event(RecordEvent {
kind,
metadata: "".to_string(),
content: content.to_string(),
}));
}

fn to_raw_type(&self, kind: crate::TypeKind, lang_type: &str) -> crate::TypeRecord {
TypeRecord {
kind,
lang_type: lang_type.to_string(),
specific_info: TypeSpecificInfo::None,
}
}

fn register_type(&mut self, kind: crate::TypeKind, lang_type: &str) {
let typ = self.to_raw_type(kind, lang_type);
self.add_event(TraceLowLevelEvent::Type(typ));
}

fn register_raw_type(&mut self, typ: crate::TypeRecord) {
self.add_event(TraceLowLevelEvent::Type(typ));
}

fn register_asm(&mut self, instructions: &[String]) {
self.add_event(TraceLowLevelEvent::Asm(instructions.to_vec()));
}

fn register_variable_with_full_value(&mut self, name: &str, value: crate::ValueRecord) {
let variable_id = self.ensure_variable_id(name);
self.register_full_value(variable_id, value);
}

fn register_variable_name(&mut self, variable_name: &str) {
self.add_event(TraceLowLevelEvent::VariableName(variable_name.to_string()));
}

fn register_full_value(&mut self, variable_id: VariableId, value: crate::ValueRecord) {
self.add_event(TraceLowLevelEvent::Value(FullValueRecord { variable_id, value }));
}

fn register_compound_value(&mut self, place: crate::Place, value: crate::ValueRecord) {
self.add_event(TraceLowLevelEvent::CompoundValue(CompoundValueRecord { place, value }));
}

fn register_cell_value(&mut self, place: crate::Place, value: crate::ValueRecord) {
self.add_event(TraceLowLevelEvent::CellValue(CellValueRecord { place, value }));
}

fn assign_compound_item(&mut self, place: crate::Place, index: usize, item_place: crate::Place) {
self.add_event(TraceLowLevelEvent::AssignCompoundItem(AssignCompoundItemRecord {
place,
index,
item_place,
}));
}

fn assign_cell(&mut self, place: crate::Place, new_value: crate::ValueRecord) {
self.add_event(TraceLowLevelEvent::AssignCell(AssignCellRecord { place, new_value }));
}

fn register_variable(&mut self, variable_name: &str, place: crate::Place) {
let variable_id = self.ensure_variable_id(variable_name);
self.add_event(TraceLowLevelEvent::VariableCell(VariableCellRecord { variable_id, place }));
}

fn drop_variable(&mut self, variable_name: &str) {
let variable_id = self.ensure_variable_id(variable_name);
self.add_event(TraceLowLevelEvent::DropVariable(variable_id));
}

// history event helpers
fn assign(&mut self, variable_name: &str, rvalue: crate::RValue, pass_by: crate::PassBy) {
let variable_id = self.ensure_variable_id(variable_name);
self.add_event(TraceLowLevelEvent::Assignment(AssignmentRecord {
to: variable_id,
from: rvalue,
pass_by,
}));
}

fn bind_variable(&mut self, variable_name: &str, place: crate::Place) {
let variable_id = self.ensure_variable_id(variable_name);
self.add_event(TraceLowLevelEvent::BindVariable(crate::BindVariableRecord { variable_id, place }));
}

fn drop_variables(&mut self, variable_names: &[String]) {
let variable_ids: Vec<VariableId> = variable_names
.to_vec()
.iter()
.map(|variable_name| self.ensure_variable_id(variable_name))
.collect();
self.add_event(TraceLowLevelEvent::DropVariables(variable_ids))
}

fn simple_rvalue(&mut self, variable_name: &str) -> crate::RValue {
let variable_id = self.ensure_variable_id(variable_name);
RValue::Simple(variable_id)
}

fn compound_rvalue(&mut self, variable_dependencies: &[String]) -> crate::RValue {
let variable_ids: Vec<VariableId> = variable_dependencies
.to_vec()
.iter()
.map(|variable_dependency| self.ensure_variable_id(variable_dependency))
.collect();
RValue::Compound(variable_ids)
}

fn drop_last_step(&mut self) {
self.add_event(TraceLowLevelEvent::DropLastStep);
}

fn finish_writing_trace_metadata(&mut self) -> Result<(), Box<dyn Error>> {
if let Some(path) = &self.get_data().trace_metadata_path {
let trace_metadata = TraceMetadata {
program: self.get_data().program.clone(),
args: self.get_data().args.clone(),
workdir: self.get_data().workdir.clone(),
};
let json = serde_json::to_string(&trace_metadata)?;
fs::write(path, json)?;
Ok(())
} else {
panic!("finish_writing_trace_metadata() called without previous call to begin_writing_trace_metadata()");
}
}

fn finish_writing_trace_paths(&mut self) -> Result<(), Box<dyn Error>> {
if let Some(path) = &self.get_data().trace_paths_path {
let json = serde_json::to_string(&self.get_data().path_list)?;
fs::write(path, json)?;
Ok(())
} else {
panic!("finish_writing_trace_paths() called without previous call to begin_writing_trace_paths()");
}
}
}
2 changes: 1 addition & 1 deletion runtime_tracing/src/capnptrace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::str::FromStr;
/// The next 3 bytes are reserved/version info. In the initial version, they are zero. Non-zero values might
/// indicate incompatible future versions.
/// The header is 8 bytes in size, ensuring 64-bit alignment for the rest of the file.
const HEADER: &[u8] = &[0xC0, 0xDE, 0x72, 0xAC, 0xE2, 0x00, 0x00, 0x00];
pub const HEADER: &[u8] = &[0xC0, 0xDE, 0x72, 0xAC, 0xE2, 0x00, 0x00, 0x00];

impl From<crate::TypeKind> for trace::TypeKind {
fn from(item: crate::TypeKind) -> Self {
Expand Down
38 changes: 38 additions & 0 deletions runtime_tracing/src/cbor_zstd_reader.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
use std::io::{self, BufRead, BufReader, Read, Seek, Write};

use fscommon::StreamSlice;
use zeekstd::Decoder;

use crate::{TraceLowLevelEvent, cbor_zstd_writer::HEADERV1};

fn is_at_eof<R: BufRead>(reader: &mut R) -> io::Result<bool> {
let buffer = reader.fill_buf()?;
Ok(buffer.is_empty())
}

pub fn read_trace(input: &mut (impl Read + Write + Seek)) -> Result<Vec<TraceLowLevelEvent>, Box<dyn std::error::Error>> {
let end_pos = input.seek(io::SeekFrom::End(0))?;
input.seek(io::SeekFrom::Start(0))?;

let mut header_buf = [0; 8];
let mut buf_reader = BufReader::new(&mut *input);
buf_reader.read_exact(&mut header_buf)?;
if header_buf != HEADERV1 {
panic!("Invalid file header (wrong file format or incompatible version)");
}

input.seek(io::SeekFrom::Start(0))?;
let input2 = StreamSlice::new(&mut *input, 8, end_pos)?;

let decoder = Decoder::new(input2)?;
let mut buf_reader = BufReader::new(decoder);

let mut result: Vec<TraceLowLevelEvent> = vec![];

while !is_at_eof(&mut buf_reader)? {
let obj = cbor4ii::serde::from_reader::<TraceLowLevelEvent, _>(&mut buf_reader)?;
result.push(obj);
}

Ok(result)
}
Loading