Skip to content

Commit 106c59a

Browse files
gyuheon0hdanielsn
andauthored
[crashtracking] send initial crash_ping message for crash reporting (#1209)
Co-authored-by: danielsn <[email protected]>
1 parent 316dd3e commit 106c59a

File tree

4 files changed

+421
-17
lines changed

4 files changed

+421
-17
lines changed

bin_tests/tests/crashtracker_bin_test.rs

Lines changed: 90 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,12 @@ fn test_crash_tracking_bin_prechain_sigabrt() {
129129
test_crash_tracking_bin(BuildProfile::Release, "prechain_abort", "null_deref");
130130
}
131131

132+
#[test]
133+
#[cfg_attr(miri, ignore)]
134+
fn test_crash_ping_timing_and_content() {
135+
test_crash_tracking_bin(BuildProfile::Release, "donothing", "null_deref");
136+
}
137+
132138
// This test is disabled for now on x86_64 musl and macos
133139
// It seems that on aarch64 musl, libc has CFI which allows
134140
// unwinding passed the signal frame.
@@ -283,7 +289,7 @@ fn test_crash_tracking_bin(
283289
assert_eq!(Ok(""), String::from_utf8(stdout).as_deref());
284290

285291
// Check the crash data
286-
let crash_profile = fs::read(fixtures.crash_profile_path)
292+
let crash_profile = fs::read(&fixtures.crash_profile_path)
287293
.context("reading crashtracker profiling payload")
288294
.unwrap();
289295
let crash_payload = serde_json::from_slice::<serde_json::Value>(&crash_profile)
@@ -304,7 +310,7 @@ fn test_crash_tracking_bin(
304310
let error = &crash_payload["error"];
305311
assert_error_message(&error["message"], sig_info);
306312

307-
let crash_telemetry = fs::read(fixtures.crash_telemetry_path)
313+
let crash_telemetry = fs::read(&fixtures.crash_telemetry_path)
308314
.context("reading crashtracker telemetry payload")
309315
.unwrap();
310316
assert_telemetry_message(&crash_telemetry, crash_typ);
@@ -538,8 +544,41 @@ fn crash_tracking_empty_endpoint() {
538544
.spawn()
539545
.unwrap();
540546

541-
let (mut stream, _) = listener.accept().unwrap();
547+
// With parallel crash ping, we might receive requests in either order
548+
let (mut stream1, _) = listener.accept().unwrap();
549+
let body1 = read_http_request_body(&mut stream1);
550+
551+
// Send 200 OK response to keep connection open
552+
stream1
553+
.write_all(b"HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n")
554+
.unwrap();
555+
556+
let (mut stream2, _) = listener.accept().unwrap();
557+
let body2 = read_http_request_body(&mut stream2);
558+
559+
// Send 404 response to close connection
560+
stream2
561+
.write_all(b"HTTP/1.1 404\r\nContent-Length: 0\r\n\r\n")
562+
.unwrap();
563+
564+
// Determine which is crash ping vs crash report based on content
565+
let is_body1_crash_ping = body1.contains("is_crash_ping:true");
566+
let is_body2_crash_ping = body2.contains("is_crash_ping:true");
567+
568+
if is_body1_crash_ping && !is_body2_crash_ping {
569+
// body1 = crash ping, body2 = crash report
570+
validate_crash_ping_telemetry(&body1);
571+
assert_telemetry_message(body2.as_bytes(), "null_deref");
572+
} else if is_body2_crash_ping && !is_body1_crash_ping {
573+
// body1 = crash report, body2 = crash ping
574+
assert_telemetry_message(body1.as_bytes(), "null_deref");
575+
validate_crash_ping_telemetry(&body2);
576+
} else {
577+
panic!("Expected one crash ping and one crash report, but got: body1_crash_ping={is_body1_crash_ping}, body2_crash_ping={is_body2_crash_ping}");
578+
}
579+
}
542580

581+
fn read_http_request_body(stream: &mut impl Read) -> String {
543582
// The read call is not guaranteed to collect all available data. On OSX it appears to grab
544583
// data in 8192 byte chunks. This was not an issue when the size of a crashreport was below
545584
// there, but is a problem when the size is greater.
@@ -554,7 +593,7 @@ fn crash_tracking_empty_endpoint() {
554593
// available and deadlock.
555594
// 2: The read call decides not to return some but not all of the available bytes. We exit
556595
// early with a malformed string.
557-
// Since this is a test, the risk of those are low, but if this test spuriously fails, that
596+
// Since this is for testing, the risk of those are low, but if tests spuriously fails, that
558597
// is a good place to look.
559598
let mut out = vec![0; 65536];
560599
let blocksize = 8192;
@@ -569,17 +608,55 @@ fn crash_tracking_empty_endpoint() {
569608
left += blocksize;
570609
right += blocksize;
571610
}
572-
// We write a 404 back to the client to finish the handshake and have them end their
573-
// transmission. Its not clear to me that we should unwrap here: if the client timed out, it
574-
// won't receive the message, but is that an error in the test, or should the test still
575-
// continue and succeed if the message itself was received by the agent?
576-
stream
577-
.write_all(b"HTTP/1.1 404\r\nContent-Length: 0\r\n\r\n")
578-
.unwrap();
579611
let resp = String::from_utf8_lossy(&out[..total_read]);
580612
let pos = resp.find("\r\n\r\n").unwrap();
581-
let body = &resp[pos + 4..];
582-
assert_telemetry_message(body.as_bytes(), "null_deref");
613+
resp[pos + 4..].to_string()
614+
}
615+
616+
fn validate_crash_ping_telemetry(body: &str) {
617+
let telemetry_payload: serde_json::Value =
618+
serde_json::from_str(body).expect("Crash ping should be valid JSON");
619+
620+
assert_eq!(telemetry_payload["request_type"], "logs");
621+
assert_eq!(telemetry_payload["payload"].as_array().unwrap().len(), 1);
622+
623+
let log_entry = &telemetry_payload["payload"][0];
624+
625+
let tags = log_entry["tags"].as_str().unwrap();
626+
assert!(
627+
tags.contains("is_crash_ping:true"),
628+
"Expected crash ping telemetry with is_crash_ping:true, but got tags: {tags}"
629+
);
630+
631+
// Check for specific signal information in tags (for null_deref crash type)
632+
assert!(
633+
tags.contains("si_signo:11"),
634+
"Expected si_signo:11 (SIGSEGV) in tags, but got tags: {tags}"
635+
);
636+
assert!(
637+
tags.contains("si_signo_human_readable:SIGSEGV"),
638+
"Expected si_signo_human_readable:SIGSEGV in tags, but got tags: {tags}"
639+
);
640+
assert!(
641+
tags.contains("si_code_human_readable:SEGV_ACCERR")
642+
|| tags.contains("si_code_human_readable:SEGV_MAPERR"),
643+
"Expected si_code_human_readable:SEGV_ACCERR or SEGV_MAPERR in tags, but got tags: {tags}"
644+
);
645+
646+
let message_str = log_entry["message"]
647+
.as_str()
648+
.expect("Message field should exist as a string");
649+
let message_json: serde_json::Value =
650+
serde_json::from_str(message_str).expect("Message should be valid JSON");
651+
652+
let crash_uuid = message_json["crash_uuid"]
653+
.as_str()
654+
.expect("crash_uuid should be present and be a string");
655+
assert!(!crash_uuid.is_empty(), "crash_uuid should be non-empty");
656+
657+
assert_eq!(message_json["version"].as_str(), Some("1.0"));
658+
659+
assert_eq!(message_json["kind"].as_str(), Some("Crash ping"));
583660
}
584661

585662
struct TestFixtures<'a> {

0 commit comments

Comments
 (0)