Skip to content

Commit f8096cb

Browse files
authored
chore(crashtracking): improve crashtracking tests (#14678)
## Description This PR updates the name of the test helper `wait_for_crash_messages` to `get_all_crash_messages`. The reason for this change is because `wait_for_crash_messages` actually returns all previous crash messages (telemetry requests originating from `crashtracking`) that the test client has received up to one second after the helper is called. This naming issue caused confusion when reasoning about a testing bug fixed in #14654, that was introduced with the addition of the `crash_ping` mechanism from the Crashtracker, where now, we receive more than one message from the Crashtracker per crash. This PR also adds a check for the crash ping for each crashtracking test. ## Testing Existing updated unit tests ## Risks <!-- Note any risks associated with this change, or "None" if no risks --> ## Additional Notes <!-- Any other information that would be helpful for reviewers -->
1 parent 5b46dec commit f8096cb

File tree

2 files changed

+76
-12
lines changed

2 files changed

+76
-12
lines changed

tests/internal/crashtracker/test_crashtracker.py

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ def test_crashtracker_simple():
129129
# 2. Listens on that port for new connections
130130
# 3. Starts the crashtracker with the URL set to the port
131131
# 4. Crashes the process
132-
# 5. Verifies that the crashtracker sends a crash report to the server
132+
# 5. Verifies that the crashtracker sends a crash ping to the server
133+
# 6. Verifies that the crashtracker sends a crash report to the server
133134
import ctypes
134135
import os
135136

@@ -147,8 +148,10 @@ def test_crashtracker_simple():
147148
ctypes.string_at(0)
148149
sys.exit(-1)
149150

150-
# Part 5
151-
# Check to see if the listening socket was triggered, if so accept the connection
151+
# Part 5, Check for the crash ping
152+
_ping = utils.get_crash_ping(client)
153+
154+
# Part 6, Check to see if the listening socket was triggered, if so accept the connection
152155
# then check to see if the resulting connection is readable
153156
report = utils.get_crash_report(client)
154157
# The crash came from string_at. Since the over-the-wire format is multipart, chunked HTTP,
@@ -181,7 +184,10 @@ def test_crashtracker_simple_fork():
181184
ctypes.string_at(0)
182185
sys.exit(-1) # just in case
183186

184-
# Part 5, check
187+
# Part 5, check for crash ping
188+
_ping = utils.get_crash_ping(client)
189+
190+
# Part 6, check for crash report
185191
report = utils.get_crash_report(client)
186192
assert b"string_at" in report["body"]
187193

@@ -235,7 +241,10 @@ def test_crashtracker_simple_sigbus():
235241
arr[4095] = b"x" # sigbus
236242
sys.exit(-1) # just in case
237243

238-
# Part 5, check
244+
# Part 5, check for crash ping
245+
_ping = utils.get_crash_ping(client)
246+
247+
# Part 6, check for crash report
239248
report = utils.get_crash_report(client)
240249
assert report["body"]
241250

@@ -261,7 +270,10 @@ def test_crashtracker_raise_sigsegv():
261270
os.kill(os.getpid(), signal.SIGSEGV.value)
262271
sys.exit(-1)
263272

264-
# Part 5, check
273+
# Part 5, check for crash ping
274+
_ping = utils.get_crash_ping(client)
275+
276+
# Part 6, check for crash report
265277
report = utils.get_crash_report(client)
266278
assert b"os_kill" in report["body"]
267279

@@ -287,7 +299,10 @@ def test_crashtracker_raise_sigbus():
287299
os.kill(os.getpid(), signal.SIGBUS.value)
288300
sys.exit(-1)
289301

290-
# Part 5, check
302+
# Part 5, check for crash ping
303+
_ping = utils.get_crash_ping(client)
304+
305+
# Part 6, check for crash report
291306
report = utils.get_crash_report(client)
292307
assert b"os_kill" in report["body"]
293308

@@ -311,7 +326,10 @@ def test_crashtracker_preload_default(ddtrace_run_python_code_in_subprocess):
311326
assert not stderr
312327
assert exitcode == -11 # exit code for SIGSEGV
313328

314-
# Wait for the connection
329+
# Part 5, check for crash ping
330+
_ping = utils.get_crash_ping(client)
331+
332+
# Part 6, check for crash report
315333
report = utils.get_crash_report(client)
316334
assert b"string_at" in report["body"]
317335

@@ -352,7 +370,10 @@ def test_crashtracker_auto_default(run_python_code_in_subprocess):
352370
assert not stderr
353371
assert exitcode == -11
354372

355-
# Wait for the connection
373+
# Part 5, check for crash ping
374+
_ping = utils.get_crash_ping(client)
375+
376+
# Part 6, check for crash report
356377
report = utils.get_crash_report(client)
357378
assert b"string_at" in report["body"]
358379

@@ -370,6 +391,9 @@ def test_crashtracker_auto_nostack(run_python_code_in_subprocess):
370391
assert not stderr
371392
assert exitcode == -11
372393

394+
# Check for crash ping
395+
_ping = utils.get_crash_ping(client)
396+
373397
# Wait for the connection
374398
report = utils.get_crash_report(client)
375399
assert b"string_at" not in report["body"]
@@ -413,6 +437,10 @@ def test_crashtracker_tags_required():
413437
ctypes.string_at(0)
414438
sys.exit(-1)
415439

440+
# Check for crash ping
441+
_ping = utils.get_crash_ping(client)
442+
443+
# Check for crash report
416444
report = utils.get_crash_report(client)
417445
assert b"string_at" in report["body"]
418446

@@ -446,7 +474,10 @@ def test_crashtracker_user_tags_envvar(run_python_code_in_subprocess):
446474
assert not stderr
447475
assert exitcode == -11
448476

449-
# Wait for the connection
477+
# Check for crash ping
478+
_ping = utils.get_crash_ping(client)
479+
480+
# Check for crash report
450481
report = utils.get_crash_report(client)
451482

452483
# Now check for the tags
@@ -466,6 +497,10 @@ def test_crashtracker_set_tag_profiler_config(snapshot_context, run_python_code_
466497
assert not stderr
467498
assert exitcode == -11
468499

500+
# Check for crash ping
501+
_ping = utils.get_crash_ping(client)
502+
503+
# Check for crash report
469504
report = utils.get_crash_report(client)
470505
# Now check for the profiler_config tag
471506
assert b"profiler_config" in report["body"]
@@ -501,6 +536,10 @@ def test_crashtracker_user_tags_profiling():
501536
ctypes.string_at(0)
502537
sys.exit(-1)
503538

539+
# Check for crash ping
540+
_ping = utils.get_crash_ping(client)
541+
542+
# Check for crash report
504543
report = utils.get_crash_report(client)
505544
assert b"string_at" in report["body"]
506545

@@ -539,6 +578,10 @@ def test_crashtracker_user_tags_core():
539578
ctypes.string_at(0)
540579
sys.exit(-1)
541580

581+
# Check for crash ping
582+
_ping = utils.get_crash_ping(client)
583+
584+
# Check for crash report
542585
report = utils.get_crash_report(client)
543586
assert b"string_at" in report["body"]
544587

tests/internal/crashtracker/utils.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,12 @@ def logs(self):
112112
return read_files([self.stdout, self.stderr])
113113

114114

115-
def wait_for_crash_messages(test_agent_client: TestAgentClient) -> List[TestAgentRequest]:
115+
def get_all_crash_messages(test_agent_client: TestAgentClient) -> List[TestAgentRequest]:
116+
"""
117+
A test helper to get *all* crash messages is necessary, because crash pings and crash reports
118+
are sent through async network requests, so we don't have a guarantee of the order they are received.
119+
We differentiate between crash pings and crash reports downstream
120+
"""
116121
seen_report_ids = set()
117122
crash_messages = []
118123
# 5 iterations * 0.2 second = 1 second total should be enough to get ping + report
@@ -137,7 +142,7 @@ def wait_for_crash_messages(test_agent_client: TestAgentClient) -> List[TestAgen
137142

138143
def get_crash_report(test_agent_client: TestAgentClient) -> TestAgentRequest:
139144
"""Wait for a crash report from the crashtracker listener socket."""
140-
crash_messages = wait_for_crash_messages(test_agent_client)
145+
crash_messages = get_all_crash_messages(test_agent_client)
141146
# We want at least the crash report
142147
assert len(crash_messages) == 2, f"Expected at least 2 messages; got {len(crash_messages)}"
143148

@@ -152,6 +157,22 @@ def get_crash_report(test_agent_client: TestAgentClient) -> TestAgentRequest:
152157
return crash_report
153158

154159

160+
def get_crash_ping(test_agent_client: TestAgentClient) -> TestAgentRequest:
161+
"""Wait for a crash report from the crashtracker listener socket."""
162+
crash_messages = get_all_crash_messages(test_agent_client)
163+
assert len(crash_messages) == 2, f"Expected at least 2 messages; got {len(crash_messages)}"
164+
165+
# Find the crash ping (the one with "is_crash_ping":"true")
166+
crash_ping = None
167+
for message in crash_messages:
168+
if b"is_crash_ping:true" in message["body"]:
169+
crash_ping = message
170+
break
171+
172+
assert crash_ping is not None, "Could not find crash ping with 'is_crash_ping:true' tag"
173+
return crash_ping
174+
175+
155176
@contextmanager
156177
def with_test_agent() -> Generator[TestAgentClient, None, None]:
157178
base_url = ddtrace.tracer.agent_trace_url or "http://localhost:9126" # default to local test agent

0 commit comments

Comments
 (0)