Skip to content

Commit 40ff9af

Browse files
jasondellalucepoiana
authored andcommitted
fix(symbols/extract): solve deadlock scenario when releasing async worker
Signed-off-by: Jason Dellaluce <[email protected]>
1 parent 98fd1a5 commit 40ff9af

File tree

1 file changed

+19
-3
lines changed

1 file changed

+19
-3
lines changed

pkg/sdk/symbols/extract/async.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,10 @@ const (
9292
// asyncBatchSize is the physical size of batches allocated
9393
// in C memory, namely the total number of locks available
9494
asyncBatchSize = cgo.MaxHandle + 1
95+
//
96+
// max number of seconds we're willing to wait for a worker to exit
97+
// once released before triggering a panic
98+
workerReleaseTimeoutInSeconds = 10
9599
)
96100

97101
var (
@@ -186,7 +190,9 @@ func (a *asyncContext) acquireWorker(workerIdx int32) {
186190
for _, i := range batchIdxs {
187191
// reduce sync overhead by skipping unused batch slots
188192
if i > a.maxBatchIdx {
189-
continue
193+
// from this point on we'll only encountered unused slots
194+
// so we mind as well just start over
195+
break
190196
}
191197

192198
// check for incoming request, if any, otherwise busy waits
@@ -244,13 +250,23 @@ func (a *asyncContext) releaseWorker(workerIdx int32) {
244250
// side, we use the first visible slot and set an exit request. The worker
245251
// will eventually synchronize with the used lock and stop.
246252
idx := a.workerIdxToBatchIdxs(workerIdx)[0]
253+
waitStartTime := time.Now()
247254
for !atomic.CompareAndSwapInt32((*int32)(&a.batch[idx].lock), state_unused, state_exit_req) {
248-
// spin
255+
// spinning, but let's yield first
256+
runtime.Gosched()
257+
if time.Since(waitStartTime).Seconds() > workerReleaseTimeoutInSeconds {
258+
panic("plugin-sdk-go/sdk/symbols/extract: async worker release timeout expired (1)")
259+
}
249260
}
250261

251262
// wait for worker exiting
263+
waitStartTime = time.Now()
252264
for atomic.LoadInt32((*int32)(&a.batch[idx].lock)) != state_exit_ack {
253-
// spin
265+
// spinning, but let's yield first
266+
runtime.Gosched()
267+
if time.Since(waitStartTime).Seconds() > workerReleaseTimeoutInSeconds {
268+
panic("plugin-sdk-go/sdk/symbols/extract: async worker release timeout expired (2)")
269+
}
254270
}
255271

256272
// restore first worker slot

0 commit comments

Comments
 (0)