Skip to content

Commit 16a4cfa

Browse files
authored
Wire stats aggregation, fix batch doc counting, extract publish_to_sinks (#118)
1 parent 4601465 commit 16a4cfa

File tree

5 files changed

+74
-46
lines changed

5 files changed

+74
-46
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "mstream"
3-
version = "0.44.0"
3+
version = "0.45.0"
44
edition = "2024"
55
authors = ["Simon Makarski"]
66

src/api/handler/system.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,13 @@ pub async fn stats(State(state): State<AppState>) -> Result<impl IntoResponse, A
4646

4747
let uptime = state.start_time.elapsed().as_secs();
4848

49-
// TODO: wire total_docs_processed / total_bytes_transferred once
50-
// in-engine metrics collection is implemented
49+
let (total_events, total_bytes, _) = jm.aggregate_metrics();
50+
5151
Ok((
5252
StatusCode::OK,
5353
Json(SystemStats {
54-
total_docs_processed: 0,
55-
total_bytes_transferred: 0,
54+
total_events_processed: total_events,
55+
total_bytes_processed: total_bytes,
5656
uptime_seconds: uptime,
5757
running_jobs: counts.running,
5858
stopped_jobs: counts.stopped,

src/api/types.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -310,8 +310,8 @@ pub struct HealthStatus {
310310

311311
#[derive(Serialize)]
312312
pub struct SystemStats {
313-
pub total_docs_processed: u64,
314-
pub total_bytes_transferred: u64,
313+
pub total_events_processed: u64,
314+
pub total_bytes_processed: u64,
315315
pub uptime_seconds: u64,
316316
pub running_jobs: usize,
317317
pub stopped_jobs: usize,
@@ -688,15 +688,15 @@ mod tests {
688688
fn system_stats_serializes() {
689689
assert_serializes_to(
690690
&SystemStats {
691-
total_docs_processed: 1000,
692-
total_bytes_transferred: 50000,
691+
total_events_processed: 1000,
692+
total_bytes_processed: 50000,
693693
uptime_seconds: 3600,
694694
running_jobs: 3,
695695
stopped_jobs: 1,
696696
error_jobs: 2,
697697
},
698698
json!({
699-
"total_docs_processed": 1000, "total_bytes_transferred": 50000,
699+
"total_events_processed": 1000, "total_bytes_processed": 50000,
700700
"uptime_seconds": 3600, "running_jobs": 3, "stopped_jobs": 1, "error_jobs": 2,
701701
}),
702702
);

src/job_manager/mod.rs

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,9 @@ impl JobMetricsCounter {
6666
}
6767
}
6868

69-
pub fn record_success(&self, bytes: u64) {
70-
self.events_processed.fetch_add(1, Ordering::Relaxed);
69+
pub fn record_success(&self, doc_count: u64, bytes: u64) {
70+
self.events_processed
71+
.fetch_add(doc_count, Ordering::Relaxed);
7172
self.bytes_processed.fetch_add(bytes, Ordering::Relaxed);
7273
self.last_processed_at
7374
.store(chrono::Utc::now().timestamp_millis(), Ordering::Relaxed);
@@ -259,6 +260,18 @@ impl JobManager {
259260
self.running_jobs.get(name).map(|jc| &jc.metrics)
260261
}
261262

263+
/// Sums metrics across all running jobs.
264+
pub fn aggregate_metrics(&self) -> (u64, u64, u64) {
265+
self.running_jobs.values().fold((0, 0, 0), |acc, jc| {
266+
let m = &jc.metrics;
267+
(
268+
acc.0 + m.events_processed.load(Ordering::Relaxed),
269+
acc.1 + m.bytes_processed.load(Ordering::Relaxed),
270+
acc.2 + m.errors.load(Ordering::Relaxed),
271+
)
272+
})
273+
}
274+
262275
/// Returns job counts grouped by state.
263276
pub async fn job_state_counts(&self) -> anyhow::Result<JobStateCounts> {
264277
let jobs = self.job_store.list_all().await?;
@@ -686,8 +699,8 @@ mod tests {
686699
#[test]
687700
fn metrics_counter_record_success() {
688701
let m = JobMetricsCounter::new();
689-
m.record_success(256);
690-
m.record_success(512);
702+
m.record_success(1, 256);
703+
m.record_success(1, 512);
691704

692705
assert_eq!(m.events_processed.load(Ordering::Relaxed), 2);
693706
assert_eq!(m.bytes_processed.load(Ordering::Relaxed), 768);
@@ -709,13 +722,22 @@ mod tests {
709722
#[test]
710723
fn metrics_counter_mixed_success_and_errors() {
711724
let m = JobMetricsCounter::new();
712-
m.record_success(100);
725+
m.record_success(1, 100);
713726
m.record_error();
714-
m.record_success(200);
727+
m.record_success(1, 200);
715728
m.record_error();
716729

717730
assert_eq!(m.events_processed.load(Ordering::Relaxed), 2);
718731
assert_eq!(m.bytes_processed.load(Ordering::Relaxed), 300);
719732
assert_eq!(m.errors.load(Ordering::Relaxed), 2);
720733
}
734+
735+
#[test]
736+
fn metrics_counter_batch_counts_all_docs() {
737+
let m = JobMetricsCounter::new();
738+
m.record_success(100, 5000);
739+
740+
assert_eq!(m.events_processed.load(Ordering::Relaxed), 100);
741+
assert_eq!(m.bytes_processed.load(Ordering::Relaxed), 5000);
742+
}
721743
}

src/provision/pipeline/processor.rs

Lines changed: 36 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ impl EventHandler {
6666
.map_err(|err| anyhow!("failed to apply source schema: {}", err))
6767
})?;
6868

69-
if let Err(err) = self.process_event(event).await {
69+
if let Err(err) = self.process_event(event, 1).await {
7070
error!(job_name = %self.pipeline.name, "failed to process event: {}", err)
7171
}
7272
}
@@ -127,6 +127,7 @@ impl EventHandler {
127127
let source_encoding = batch[0].encoding.clone();
128128
let attributes = batch[0].attributes.clone();
129129
let last_cursor = batch.last().and_then(|event| event.cursor.clone());
130+
let doc_count = batch.len() as u64;
130131
let payloads: Vec<Vec<u8>> = batch.drain(..).map(|event| event.raw_bytes).collect();
131132

132133
let batch_event = block_in_place(|| {
@@ -150,37 +151,55 @@ impl EventHandler {
150151
batch_event.raw_bytes.len()
151152
);
152153

153-
if let Err(err) = self.process_event(batch_event).await {
154+
if let Err(err) = self.process_event(batch_event, doc_count).await {
154155
error!(job_name = %self.pipeline.name, "failed to process event: {}", err)
155156
}
156157

157158
Ok(())
158159
}
159160

160-
async fn process_event(&mut self, source_event: SourceEvent) -> anyhow::Result<()> {
161+
async fn process_event(
162+
&mut self,
163+
source_event: SourceEvent,
164+
doc_count: u64,
165+
) -> anyhow::Result<()> {
161166
let event_bytes = source_event.raw_bytes.len() as u64;
162-
let event_holder = self.apply_middlewares(source_event).await?;
163-
let mut all_sinks_succeeded = true;
164167

165-
// Extract cursor before the loop for checkpointing
168+
let event_holder = match self.apply_middlewares(source_event).await {
169+
Ok(holder) => holder,
170+
Err(err) => {
171+
self.record_event_outcome(false, doc_count, event_bytes);
172+
return Err(err);
173+
}
174+
};
175+
166176
let cursor = event_holder.cursor.clone();
177+
let all_sinks_succeeded = self.publish_to_sinks(event_holder).await;
178+
179+
self.record_event_outcome(all_sinks_succeeded, doc_count, event_bytes);
180+
181+
if self.pipeline.with_checkpoints && all_sinks_succeeded {
182+
self.save_checkpoint(cursor).await?;
183+
}
167184

185+
Ok(())
186+
}
187+
188+
async fn publish_to_sinks(&mut self, event_holder: SourceEvent) -> bool {
189+
let mut all_succeeded = true;
168190
let mut event_option = Some(event_holder);
169191
let sinks_len = self.pipeline.sinks.len();
170192

171193
for (i, sink_def) in self.pipeline.sinks.iter_mut().enumerate() {
172194
let sink_event_result = block_in_place(|| {
173195
let event = if i == sinks_len - 1 {
174-
// Move the event on the last iteration to avoid clone
175196
event_option.take().expect("event should be present")
176197
} else {
177-
// Clone for intermediate sinks
178198
event_option
179199
.as_ref()
180200
.expect("event should be present")
181201
.clone()
182202
};
183-
// apply sink schema
184203
event.apply_schema(Some(&sink_def.config.output_encoding), &sink_def.schema)
185204
});
186205

@@ -193,16 +212,13 @@ impl EventHandler {
193212
error!(
194213
job_name = %self.pipeline.name,
195214
"failed to encode for sink {}/{}: {}",
196-
sink_def.config.service_name,
197-
sink_def.config.resource,
198-
err
215+
sink_def.config.service_name, sink_def.config.resource, err
199216
);
200-
all_sinks_succeeded = false;
217+
all_succeeded = false;
201218
continue;
202219
}
203220
};
204221

205-
// maybe we need a config in publisher, ie explode batch
206222
match sink_def
207223
.sink
208224
.publish(sink_event, sink_def.config.resource.clone(), None)
@@ -212,38 +228,28 @@ impl EventHandler {
212228
info!(
213229
job_name = %self.pipeline.name,
214230
"published to {}/{}: {}",
215-
sink_def.config.service_name,
216-
sink_def.config.resource,
217-
message
231+
sink_def.config.service_name, sink_def.config.resource, message
218232
);
219233
}
220234
Err(err) => {
221235
error!(
222236
job_name = %self.pipeline.name,
223237
"failed to publish to {}/{}: {:#}",
224-
sink_def.config.service_name,
225-
sink_def.config.resource,
226-
err
238+
sink_def.config.service_name, sink_def.config.resource, err
227239
);
228-
all_sinks_succeeded = false;
240+
all_succeeded = false;
229241
continue;
230242
}
231243
};
232244
}
233245

234-
self.record_event_outcome(all_sinks_succeeded, event_bytes);
235-
236-
if self.pipeline.with_checkpoints && all_sinks_succeeded {
237-
self.save_checkpoint(cursor).await?;
238-
}
239-
240-
Ok(())
246+
all_succeeded
241247
}
242248

243-
fn record_event_outcome(&self, success: bool, bytes: u64) {
249+
fn record_event_outcome(&self, success: bool, doc_count: u64, bytes: u64) {
244250
if let Some(ref m) = self.metrics {
245251
if success {
246-
m.record_success(bytes);
252+
m.record_success(doc_count, bytes);
247253
} else {
248254
m.record_error();
249255
}

0 commit comments

Comments
 (0)