Skip to content

Commit 377d49e

Browse files
committed
Add tracing spans for input gate hold and wait
1 parent 03ac4ad commit 377d49e

File tree

6 files changed

+50
-27
lines changed

6 files changed

+50
-27
lines changed

src/workerd/io/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ wd_cc_library(
271271
hdrs = ["io-gate.h"],
272272
visibility = ["//visibility:public"],
273273
deps = [
274+
":trace",
274275
"@capnp-cpp//src/kj",
275276
"@capnp-cpp//src/kj:kj-async",
276277
],

src/workerd/io/io-context.c++

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,8 @@ IoContext::IncomingRequest::~IoContext_IncomingRequest() noexcept(false) {
338338
}
339339

340340
InputGate::Lock IoContext::getInputLock() {
341-
return KJ_ASSERT_NONNULL(currentInputLock, "no input lock available in this context").addRef();
341+
return KJ_ASSERT_NONNULL(currentInputLock, "no input lock available in this context")
342+
.addRef(getCurrentTraceSpan());
342343
}
343344

344345
kj::Maybe<kj::Own<InputGate::CriticalSection>> IoContext::getCriticalSection() {

src/workerd/io/io-gate-test.c++

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ KJ_TEST("InputGate basics") {
2929
KJ_EXPECT(!promise2.poll(ws));
3030
KJ_EXPECT(!promise3.poll(ws));
3131

32-
auto lock2 = lock.addRef();
32+
auto lock2 = lock.addRef(nullptr);
3333
{ auto drop = kj::mv(lock); }
3434

3535
KJ_EXPECT(!promise2.poll(ws));
@@ -299,7 +299,7 @@ KJ_TEST("InputGate critical section lock outlives critical section") {
299299
KJ_ASSERT(lock.isFor(gate));
300300

301301
// Adding a ref and dropping it shouldn't cause trouble.
302-
lock.addRef();
302+
lock.addRef(nullptr);
303303

304304
// The gate should still be locked
305305
auto waiter = gate.wait();

src/workerd/io/io-gate.c++

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,15 @@ InputGate::~InputGate() noexcept {
2424
KJ_ASSERT(waiters.empty());
2525
}
2626

27-
InputGate::Waiter::Waiter(
28-
kj::PromiseFulfiller<Lock>& fulfiller, InputGate& gate, bool isChildWaiter)
27+
InputGate::Waiter::Waiter(kj::PromiseFulfiller<Lock>& fulfiller,
28+
InputGate& gate,
29+
bool isChildWaiter,
30+
SpanParent parentSpan)
2931
: fulfiller(fulfiller),
3032
gate(&gate),
31-
isChildWaiter(isChildWaiter) {
33+
isChildWaiter(isChildWaiter),
34+
waitSpan(parentSpan.newChild("input_gate_lock_wait"_kjc)),
35+
lockSpanParent(kj::mv(parentSpan)) {
3236
gate.hooks.inputGateWaiterAdded();
3337
if (isChildWaiter) {
3438
gate.waitingChildren.add(*this);
@@ -47,13 +51,13 @@ InputGate::Waiter::~Waiter() noexcept(false) {
4751
}
4852
}
4953

50-
kj::Promise<InputGate::Lock> InputGate::wait() {
54+
kj::Promise<InputGate::Lock> InputGate::wait(SpanParent parentSpan) {
5155
KJ_IF_SOME(e, brokenState.tryGet<kj::Exception>()) {
5256
return kj::cp(e);
5357
} else if (lockCount == 0) {
54-
return Lock(*this);
58+
return Lock(*this, kj::mv(parentSpan));
5559
} else {
56-
return kj::newAdaptedPromise<Lock, Waiter>(*this, false);
60+
return kj::newAdaptedPromise<Lock, Waiter>(*this, false, kj::mv(parentSpan));
5761
}
5862
}
5963

@@ -65,10 +69,11 @@ kj::Promise<void> InputGate::onBroken() {
6569
}
6670
}
6771

68-
InputGate::Lock::Lock(InputGate& gate)
72+
InputGate::Lock::Lock(InputGate& gate, SpanParent parentSpan)
6973
: gate(&gate),
7074
cs(gate.isCriticalSection ? kj::Maybe(kj::addRef(static_cast<CriticalSection&>(gate)))
71-
: kj::none) {
75+
: kj::none),
76+
lockSpan(parentSpan.newChild("input_gate_lock_hold"_kjc)) {
7277
InputGate* gateToLock = &gate;
7378

7479
KJ_IF_SOME(c, cs) {
@@ -107,11 +112,11 @@ void InputGate::releaseLock() {
107112
if (!waitingChildren.empty()) {
108113
auto& waiter = waitingChildren.front();
109114
waitingChildren.remove(waiter);
110-
waiter.fulfiller.fulfill(Lock(*this));
115+
waiter.fulfiller.fulfill(Lock(*this, kj::mv(waiter.lockSpanParent)));
111116
} else if (!waiters.empty()) {
112117
auto& waiter = waiters.front();
113118
waiters.remove(waiter);
114-
waiter.fulfiller.fulfill(Lock(*this));
119+
waiter.fulfiller.fulfill(Lock(*this, kj::mv(waiter.lockSpanParent)));
115120
}
116121
}
117122
}
@@ -167,7 +172,7 @@ InputGate::CriticalSection::~CriticalSection() noexcept(false) {
167172
}
168173
}
169174

170-
kj::Promise<InputGate::Lock> InputGate::CriticalSection::wait() {
175+
kj::Promise<InputGate::Lock> InputGate::CriticalSection::wait(SpanParent parentSpan) {
171176
for (;;) {
172177
switch (state) {
173178
case NOT_STARTED: {
@@ -183,11 +188,12 @@ kj::Promise<InputGate::Lock> InputGate::CriticalSection::wait() {
183188
// Add ourselves to this parent's child waiter list.
184189
if (target.lockCount == 0) {
185190
state = RUNNING;
186-
parentLock = Lock(target);
191+
parentLock = Lock(target, parentSpan.addRef());
187192
continue;
188193
} else {
189194
try {
190-
auto lock = co_await kj::newAdaptedPromise<Lock, Waiter>(target, true);
195+
auto lock =
196+
co_await kj::newAdaptedPromise<Lock, Waiter>(target, true, parentSpan.addRef());
191197
state = RUNNING;
192198
parentLock = kj::mv(lock);
193199
continue;
@@ -206,18 +212,18 @@ kj::Promise<InputGate::Lock> InputGate::CriticalSection::wait() {
206212
KJ_FAIL_REQUIRE("CriticalSection::wait() should be called once initially");
207213
case RUNNING:
208214
// CriticalSection is active, so defer to InputGate implementation.
209-
co_return co_await InputGate::wait();
215+
co_return co_await InputGate::wait(kj::mv(parentSpan));
210216
case REPARENTED:
211217
// Once the CriticalSection has declared itself done, then any straggler tasks it initiated
212218
// are adopted by the parent.
213219
// WARNING: Don't use parentAsInputGate() here as that'll bypass the override of wait() if
214220
// the parent is a CriticalSection itself.
215221
KJ_SWITCH_ONEOF(parent) {
216222
KJ_CASE_ONEOF(p, InputGate*) {
217-
co_return co_await p->wait();
223+
co_return co_await p->wait(kj::mv(parentSpan));
218224
}
219225
KJ_CASE_ONEOF(c, kj::Own<CriticalSection>) {
220-
co_return co_await c->wait();
226+
co_return co_await c->wait(kj::mv(parentSpan));
221227
}
222228
}
223229
KJ_UNREACHABLE;

src/workerd/io/io-gate.h

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
// to the application are still being flushed to disk. If the flush fails, these messages will
2121
// never be sent, so that the rest of the world cannot observe a prematurely-confirmed write.
2222

23+
#include <workerd/io/trace.h>
24+
2325
#include <kj/async.h>
2426
#include <kj/list.h>
2527
#include <kj/one-of.h>
@@ -62,7 +64,7 @@ class InputGate {
6264
class Lock {
6365
public:
6466
KJ_DISALLOW_COPY(Lock);
65-
Lock(Lock&& other): gate(other.gate), cs(kj::mv(other.cs)) {
67+
Lock(Lock&& other): gate(other.gate), cs(kj::mv(other.cs)), lockSpan(kj::mv(other.lockSpan)) {
6668
other.gate = nullptr;
6769
}
6870
~Lock() noexcept(false) {
@@ -71,8 +73,8 @@ class InputGate {
7173

7274
// Increments the lock's refcount, returning a duplicate `Lock`. All `Lock`s must be dropped
7375
// before the gate is unlocked.
74-
Lock addRef() {
75-
return Lock(*gate);
76+
Lock addRef(SpanParent parentSpan) {
77+
return Lock(*gate, kj::mv(parentSpan));
7678
}
7779

7880
// Start a new critical section from this lock. After `wait()` has been called on the returned
@@ -99,12 +101,17 @@ class InputGate {
99101

100102
kj::Maybe<kj::Own<CriticalSection>> cs;
101103

102-
Lock(InputGate& gate);
104+
SpanBuilder lockSpan;
105+
106+
Lock(InputGate& gate, SpanParent parentSpan);
103107
friend class InputGate;
104108
};
105109

106110
// Wait until there are no `Lock`s, then create a new one and return it.
107-
kj::Promise<Lock> wait();
111+
// If parentSpan is provided, child spans will be created to track:
112+
// - Time spent waiting for the lock (if waiting is required)
113+
// - Time spent holding the lock
114+
kj::Promise<Lock> wait(SpanParent parentSpan = nullptr);
108115

109116
// Rejects if and when calls to `wait()` become broken due to a failed critical section. The
110117
// actor should be shut down in this case. This promise never resolves, only rejects.
@@ -122,13 +129,21 @@ class InputGate {
122129
bool isCriticalSection = false;
123130

124131
struct Waiter {
125-
Waiter(kj::PromiseFulfiller<Lock>& fulfiller, InputGate& gate, bool isChildWaiter);
132+
Waiter(kj::PromiseFulfiller<Lock>& fulfiller,
133+
InputGate& gate,
134+
bool isChildWaiter,
135+
SpanParent parentSpan);
126136
~Waiter() noexcept(false);
127137

128138
kj::PromiseFulfiller<Lock>& fulfiller;
129139
InputGate* gate;
130140
bool isChildWaiter;
131141
kj::ListLink<Waiter> link;
142+
143+
// Span tracking how long we wait for the lock. Ends when Waiter is destroyed.
144+
SpanBuilder waitSpan;
145+
// Parent span to pass to Lock when it's created.
146+
SpanParent lockSpanParent;
132147
};
133148

134149
kj::List<Waiter, &Waiter::link> waiters;
@@ -173,7 +188,7 @@ class InputGate::CriticalSection: private InputGate, public kj::Refcounted {
173188
// The first call to wait() begins the CriticalSection. After that wait completes, until the
174189
// CriticalSection is done and dropped, no other locks will be allowed on this InputGate, except
175190
// locks requested by calling wait() on this CriticalSection -- or one of its children.
176-
kj::Promise<Lock> wait();
191+
kj::Promise<Lock> wait(SpanParent parentSpan = nullptr);
177192

178193
// Call when the critical section has completed successfully. If this is not called before the
179194
// CriticalSection is dropped, then failed() is called implicitly.

src/workerd/io/worker.c++

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3793,7 +3793,7 @@ kj::Promise<void> Worker::Actor::ensureConstructedImpl(IoContext& context, Actor
37933793
handler.missingSuperclass = info.missingSuperclass;
37943794

37953795
impl->classInstance = kj::mv(handler);
3796-
}, inputLock.addRef());
3796+
}, inputLock.addRef(context.getCurrentTraceSpan()));
37973797
// We addRef() the inputLock above rather than kj::mv() it so that the lock remains held
37983798
// through the catch block below, if an exception is thrown. This is important since we
37993799
// MUST update `impl->classInstance` to something other than `Initializing` before we

0 commit comments

Comments
 (0)