Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 88 additions & 3 deletions crates/audit/src/archiver.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
use crate::metrics::Metrics;
use crate::reader::{Event, EventReader};
use crate::storage::EventWriter;
use crate::metrics::{
EventType, Metrics, decrement_in_flight_archive_tasks, increment_events_processed,
increment_failed_archive_tasks, increment_in_flight_archive_tasks,
record_archive_event_duration, record_event_age, record_kafka_commit_duration,
record_kafka_read_duration,
};
use crate::reader::{Event, EventReader, UserOpEventReader};
use crate::storage::{EventWriter, UserOpEventWriter};
use anyhow::Result;
use std::fmt;
use std::marker::PhantomData;
Expand Down Expand Up @@ -161,3 +166,83 @@ where
}
}
}

/// Archives UserOp audit events from Kafka to S3 storage.
pub struct KafkaUserOpAuditArchiver<R, W>
where
R: UserOpEventReader,
W: UserOpEventWriter + Clone + Send + 'static,
{
reader: R,
writer: W,
}

impl<R, W> std::fmt::Debug for KafkaUserOpAuditArchiver<R, W>
where
R: UserOpEventReader,
W: UserOpEventWriter + Clone + Send + 'static,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("KafkaUserOpAuditArchiver")
.finish_non_exhaustive()
}
}

impl<R, W> KafkaUserOpAuditArchiver<R, W>
where
R: UserOpEventReader,
W: UserOpEventWriter + Clone + Send + 'static,
{
/// Creates a new UserOp archiver with the given reader and writer.
pub const fn new(reader: R, writer: W) -> Self {
Self { reader, writer }
}

/// Runs the archiver loop, reading UserOp events and writing them to storage.
pub async fn run(&mut self) -> Result<()> {
info!("Starting Kafka UserOp archiver");

loop {
let read_start = Instant::now();
match self.reader.read_event().await {
Ok(event) => {
record_kafka_read_duration(read_start.elapsed(), EventType::UserOp);

let now_ms = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as i64;
let event_age_ms = now_ms.saturating_sub(event.timestamp);
record_event_age(event_age_ms as f64, EventType::UserOp);

let writer = self.writer.clone();
increment_in_flight_archive_tasks(EventType::UserOp);
tokio::spawn(async move {
let archive_start = Instant::now();
if let Err(e) = writer.archive_userop_event(event).await {
error!(error = %e, "Failed to write UserOp event");
increment_failed_archive_tasks(EventType::UserOp);
} else {
record_archive_event_duration(
archive_start.elapsed(),
EventType::UserOp,
);
increment_events_processed(EventType::UserOp);
}
decrement_in_flight_archive_tasks(EventType::UserOp);
});

let commit_start = Instant::now();
if let Err(e) = self.reader.commit().await {
error!(error = %e, "Failed to commit message");
}
record_kafka_commit_duration(commit_start.elapsed(), EventType::UserOp);
}
Err(e) => {
error!(error = %e, "Error reading UserOp events");
sleep(Duration::from_secs(1)).await;
}
}
}
}
}
2 changes: 1 addition & 1 deletion crates/audit/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#![cfg_attr(not(test), warn(unused_crate_dependencies))]

mod archiver;
pub use archiver::KafkaAuditArchiver;
pub use archiver::{KafkaAuditArchiver, KafkaUserOpAuditArchiver};

mod metrics;
pub use metrics::Metrics;
Expand Down
90 changes: 71 additions & 19 deletions crates/audit/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,90 @@
use metrics::{Counter, Gauge, Histogram};
use metrics_derive::Metrics;
use std::time::Duration;

/// Event type tag for metrics differentiation
#[derive(Clone, Copy)]
#[allow(dead_code)] // Bundle variant kept for future use when Bundle archiver uses tagged metrics
pub(crate) enum EventType {
Bundle,
UserOp,
}

impl EventType {
const fn as_str(&self) -> &'static str {
match self {
Self::Bundle => "bundle",
Self::UserOp => "userop",
}
}
}

pub(crate) fn record_archive_event_duration(duration: Duration, event_type: EventType) {
metrics::histogram!("tips_audit_archive_event_duration", "type" => event_type.as_str())
.record(duration.as_secs_f64());
}

pub(crate) fn record_event_age(age_ms: f64, event_type: EventType) {
metrics::histogram!("tips_audit_event_age", "type" => event_type.as_str()).record(age_ms);
}

pub(crate) fn record_kafka_read_duration(duration: Duration, event_type: EventType) {
metrics::histogram!("tips_audit_kafka_read_duration", "type" => event_type.as_str())
.record(duration.as_secs_f64());
}

pub(crate) fn record_kafka_commit_duration(duration: Duration, event_type: EventType) {
metrics::histogram!("tips_audit_kafka_commit_duration", "type" => event_type.as_str())
.record(duration.as_secs_f64());
}

pub(crate) fn increment_events_processed(event_type: EventType) {
metrics::counter!("tips_audit_events_processed", "type" => event_type.as_str()).increment(1);
}

pub(crate) fn increment_in_flight_archive_tasks(event_type: EventType) {
metrics::gauge!("tips_audit_in_flight_archive_tasks", "type" => event_type.as_str())
.increment(1.0);
}

pub(crate) fn decrement_in_flight_archive_tasks(event_type: EventType) {
metrics::gauge!("tips_audit_in_flight_archive_tasks", "type" => event_type.as_str())
.decrement(1.0);
}

pub(crate) fn increment_failed_archive_tasks(event_type: EventType) {
metrics::counter!("tips_audit_failed_archive_tasks", "type" => event_type.as_str())
.increment(1);
}

/// Metrics for audit operations including Kafka reads, S3 writes, and event processing.
#[derive(Metrics, Clone)]
#[metrics(scope = "tips_audit")]
pub struct Metrics {
/// Duration of archive_event operations.
#[metric(describe = "Duration of archive_event")]
/// Duration of archiving a single event.
#[metric(describe = "Duration of archiving a single event")]
pub archive_event_duration: Histogram,

/// Age of event when processed (now - event timestamp).
#[metric(describe = "Age of event when processed (now - event timestamp)")]
/// Age of events when processed (ms since event creation).
#[metric(describe = "Age of events when processed")]
pub event_age: Histogram,

/// Duration of Kafka read_event operations.
#[metric(describe = "Duration of Kafka read_event")]
/// Duration of Kafka read operations.
#[metric(describe = "Duration of Kafka read operations")]
pub kafka_read_duration: Histogram,

/// Duration of Kafka commit operations.
#[metric(describe = "Duration of Kafka commit")]
#[metric(describe = "Duration of Kafka commit operations")]
pub kafka_commit_duration: Histogram,

/// Total events processed.
#[metric(describe = "Total events processed")]
pub events_processed: Counter,

/// Number of in-flight archive tasks.
#[metric(describe = "Number of in-flight archive tasks")]
pub in_flight_archive_tasks: Gauge,

/// Duration of update_bundle_history operations.
#[metric(describe = "Duration of update_bundle_history")]
pub update_bundle_history_duration: Histogram,
Expand All @@ -37,19 +101,7 @@ pub struct Metrics {
#[metric(describe = "Duration of S3 put_object")]
pub s3_put_duration: Histogram,

/// Total events processed.
#[metric(describe = "Total events processed")]
pub events_processed: Counter,

/// Total S3 writes skipped due to deduplication.
#[metric(describe = "Total S3 writes skipped due to dedup")]
pub s3_writes_skipped: Counter,

/// Number of in-flight archive tasks.
#[metric(describe = "Number of in-flight archive tasks")]
pub in_flight_archive_tasks: Gauge,

/// Number of failed archive tasks.
#[metric(describe = "Number of failed archive tasks")]
pub failed_archive_tasks: Counter,
}
Loading