Skip to content

Commit 4412e2b

Browse files
xinhaoyuancopybara-github
authored andcommitted
Add more coordination between the watchdog thread and the test thread.
This is to resolve the racing betwen the watchdog failure reporting and test iterations. PiperOrigin-RevId: 853389106
1 parent 3684535 commit 4412e2b

File tree

1 file changed

+25
-5
lines changed

1 file changed

+25
-5
lines changed

centipede/runner.cc

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include <vector>
4949

5050
#include "absl/base/nullability.h"
51+
#include "absl/base/optimization.h"
5152
#include "./centipede/byte_array_mutator.h"
5253
#include "./centipede/dispatcher_flag_helper.h"
5354
#include "./centipede/execution_metadata.h"
@@ -99,6 +100,26 @@ static uint64_t TimeInUsec() {
99100
return tv.tv_sec * kUsecInSec + tv.tv_usec;
100101
}
101102

103+
// Atomic flags to make sure that (a) watchdog failure is reported only for
104+
// the current input, and (b) only one thread is handling watchdog failures.
105+
106+
// True if the watchdog thread is detecting failures, false otherwise.
107+
static std::atomic<bool> watchdog_thread_busy = false;
108+
// True if a watchdog failure is found, false otherwise.
109+
static std::atomic<bool> watchdog_failure_found = false;
110+
111+
static void WaitWatchdogThreadIdle() {
112+
while (ABSL_PREDICT_FALSE(watchdog_thread_busy.load())) {
113+
if (ABSL_PREDICT_FALSE(watchdog_failure_found.load())) {
114+
// A failure is found - wait for the process to terminate.
115+
sleep(1); // NOLINT
116+
} else {
117+
// Busy-wait for the detection.
118+
sleep(0); // NOLINT
119+
}
120+
}
121+
}
122+
102123
static void CheckWatchdogLimits() {
103124
const uint64_t curr_time = time(nullptr);
104125
struct Resource {
@@ -142,11 +163,7 @@ static void CheckWatchdogLimits() {
142163
};
143164
for (const auto &resource : resources) {
144165
if (resource.limit != 0 && resource.value > resource.limit) {
145-
// Allow only one invocation to handle a failure: needed because we call
146-
// this function periodically in `WatchdogThread()`, but also call it in
147-
// `RunOneInput()` after all the work is done.
148-
static std::atomic<bool> already_handling_failure = false;
149-
if (!already_handling_failure.exchange(true)) {
166+
if (!watchdog_failure_found.exchange(true)) {
150167
if (resource.ignore_report) {
151168
fprintf(stderr,
152169
"========= %s exceeded: %" PRIu64 " > %" PRIu64
@@ -192,7 +209,9 @@ static void CheckWatchdogLimits() {
192209
// No calls to ResetInputTimer() yet: input execution hasn't started.
193210
if (state->input_start_time == 0) continue;
194211

212+
watchdog_thread_busy = true;
195213
CheckWatchdogLimits();
214+
watchdog_thread_busy = false;
196215
}
197216
}
198217

@@ -376,6 +395,7 @@ static void RunOneInput(const uint8_t *data, size_t size,
376395
if (fuzztest::internal::state->input_start_time.exchange(0) != 0) {
377396
PostProcessSancov(target_return_value == -1);
378397
}
398+
WaitWatchdogThreadIdle();
379399
state->stats.post_time_usec = UsecSinceLast();
380400
state->stats.peak_rss_mb = GetPeakRSSMb();
381401
}

0 commit comments

Comments
 (0)