@@ -30,11 +30,12 @@ package runtime
30
30
// non-select operations) so that the select operation knows which case did
31
31
// proceed.
32
32
// The value is at the same time also a way that goroutines can be the first
33
- // (and only) goroutine to 'take' a channel operation to change it from
34
- // 'waiting' to any other value. This is important for the select statement
35
- // because multiple goroutines could try to let different channels in the
36
- // select statement proceed at the same time. By using Task.Data, only a
37
- // single channel operation in the select statement can proceed.
33
+ // (and only) goroutine to 'take' a channel operation using an atomic CAS
34
+ // operation to change it from 'waiting' to any other value. This is important
35
+ // for the select statement because multiple goroutines could try to let
36
+ // different channels in the select statement proceed at the same time. By
37
+ // using Task.Data, only a single channel operation in the select statement
38
+ // can proceed.
38
39
// - It is possible for the channel queues to contain already-processed senders
39
40
// or receivers. This can happen when the select statement managed to proceed
40
41
// but the goroutine doing the select has not yet cleaned up the stale queue
@@ -49,15 +50,17 @@ import (
49
50
50
51
// The runtime implementation of the Go 'chan' type.
51
52
type channel struct {
52
- closed bool
53
- elementSize uintptr
54
- bufCap uintptr // 'cap'
55
- bufLen uintptr // 'len'
56
- bufHead uintptr
57
- bufTail uintptr
58
- senders chanQueue
59
- receivers chanQueue
60
- buf unsafe.Pointer
53
+ closed bool
54
+ selectLocked bool
55
+ elementSize uintptr
56
+ bufCap uintptr // 'cap'
57
+ bufLen uintptr // 'len'
58
+ bufHead uintptr
59
+ bufTail uintptr
60
+ senders chanQueue
61
+ receivers chanQueue
62
+ lock task.PMutex
63
+ buf unsafe.Pointer
61
64
}
62
65
63
66
const (
@@ -73,7 +76,8 @@ type chanQueue struct {
73
76
74
77
// Pus the next channel operation to the queue. All appropriate fields must have
75
78
// been initialized already.
76
- // This function must be called with interrupts disabled.
79
+ // This function must be called with interrupts disabled and the channel lock
80
+ // held.
77
81
func (q * chanQueue ) push (node * channelOp ) {
78
82
node .next = q .first
79
83
q .first = node
@@ -99,16 +103,17 @@ func (q *chanQueue) pop(chanOp uint32) *channelOp {
99
103
newDataValue := chanOp | popped .index << 2
100
104
101
105
// Try to be the first to proceed with this goroutine.
102
- if popped .task .DataUint32 () == chanOperationWaiting {
103
- popped . task . SetDataUint32 ( newDataValue )
106
+ swapped := popped .task .DataAtomicUint32 (). CompareAndSwap ( 0 , newDataValue )
107
+ if swapped {
104
108
return popped
105
109
}
106
110
}
107
111
}
108
112
109
113
// Remove the given to-be-removed node from the queue if it is part of the
110
114
// queue. If there are multiple, only one will be removed.
111
- // This function must be called with interrupts disabled.
115
+ // This function must be called with interrupts disabled and the channel lock
116
+ // held.
112
117
func (q * chanQueue ) remove (remove * channelOp ) {
113
118
n := & q .first
114
119
for * n != nil {
@@ -159,8 +164,8 @@ func chanCap(c *channel) int {
159
164
}
160
165
161
166
// Push the value to the channel buffer array, for a send operation.
162
- // This function may only be called when interrupts are disabled and it is known
163
- // there is space available in the buffer.
167
+ // This function may only be called when interrupts are disabled, the channel is
168
+ // locked and it is known there is space available in the buffer.
164
169
func (ch * channel ) bufferPush (value unsafe.Pointer ) {
165
170
elemAddr := unsafe .Add (ch .buf , ch .bufHead * ch .elementSize )
166
171
ch .bufLen ++
@@ -174,8 +179,8 @@ func (ch *channel) bufferPush(value unsafe.Pointer) {
174
179
175
180
// Pop a value from the channel buffer and store it in the 'value' pointer, for
176
181
// a receive operation.
177
- // This function may only be called when interrupts are disabled and it is known
178
- // there is at least one value available in the buffer.
182
+ // This function may only be called when interrupts are disabled, the channel is
183
+ // locked and it is known there is at least one value available in the buffer.
179
184
func (ch * channel ) bufferPop (value unsafe.Pointer ) {
180
185
elemAddr := unsafe .Add (ch .buf , ch .bufTail * ch .elementSize )
181
186
ch .bufLen --
@@ -191,7 +196,8 @@ func (ch *channel) bufferPop(value unsafe.Pointer) {
191
196
}
192
197
193
198
// Try to proceed with this send operation without blocking, and return whether
194
- // the send succeeded. Interrupts must be disabled when calling this function.
199
+ // the send succeeded. Interrupts must be disabled and the lock must be held
200
+ // when calling this function.
195
201
func (ch * channel ) trySend (value unsafe.Pointer ) bool {
196
202
// To make sure we send values in the correct order, we can only send
197
203
// directly to a receiver when there are no values in the buffer.
@@ -230,9 +236,11 @@ func chanSend(ch *channel, value unsafe.Pointer, op *channelOp) {
230
236
}
231
237
232
238
mask := interrupt .Disable ()
239
+ ch .lock .Lock ()
233
240
234
241
// See whether we can proceed immediately, and if so, return early.
235
242
if ch .trySend (value ) {
243
+ ch .lock .Unlock ()
236
244
interrupt .Restore (mask )
237
245
return
238
246
}
@@ -244,9 +252,12 @@ func chanSend(ch *channel, value unsafe.Pointer, op *channelOp) {
244
252
op .index = 0
245
253
op .value = value
246
254
ch .senders .push (op )
255
+ ch .lock .Unlock ()
247
256
interrupt .Restore (mask )
248
257
249
258
// Wait until this goroutine is resumed.
259
+ // It might be resumed after Unlock() and before Pause(). In that case,
260
+ // because we use semaphores, the Pause() will continue immediately.
250
261
task .Pause ()
251
262
252
263
// Check whether the sent happened normally (not because the channel was
@@ -258,8 +269,8 @@ func chanSend(ch *channel, value unsafe.Pointer, op *channelOp) {
258
269
}
259
270
260
271
// Try to proceed with this receive operation without blocking, and return
261
- // whether the receive operation succeeded. Interrupts must be disabled when
262
- // calling this function.
272
+ // whether the receive operation succeeded. Interrupts must be disabled and the
273
+ // lock must be held when calling this function.
263
274
func (ch * channel ) tryRecv (value unsafe.Pointer ) (received , ok bool ) {
264
275
// To make sure we keep the values in the channel in the correct order, we
265
276
// first have to read values from the buffer before we can look at the
@@ -303,8 +314,10 @@ func chanRecv(ch *channel, value unsafe.Pointer, op *channelOp) bool {
303
314
}
304
315
305
316
mask := interrupt .Disable ()
317
+ ch .lock .Lock ()
306
318
307
319
if received , ok := ch .tryRecv (value ); received {
320
+ ch .lock .Unlock ()
308
321
interrupt .Restore (mask )
309
322
return ok
310
323
}
@@ -317,6 +330,7 @@ func chanRecv(ch *channel, value unsafe.Pointer, op *channelOp) bool {
317
330
op .task = t
318
331
op .index = 0
319
332
ch .receivers .push (op )
333
+ ch .lock .Unlock ()
320
334
interrupt .Restore (mask )
321
335
322
336
// Wait until the goroutine is resumed.
@@ -335,9 +349,11 @@ func chanClose(ch *channel) {
335
349
}
336
350
337
351
mask := interrupt .Disable ()
352
+ ch .lock .Lock ()
338
353
339
354
if ch .closed {
340
355
// Not allowed by the language spec.
356
+ ch .lock .Unlock ()
341
357
interrupt .Restore (mask )
342
358
runtimePanic ("close of closed channel" )
343
359
}
@@ -370,14 +386,56 @@ func chanClose(ch *channel) {
370
386
371
387
ch .closed = true
372
388
389
+ ch .lock .Unlock ()
373
390
interrupt .Restore (mask )
374
391
}
375
392
393
+ // We currently use a global select lock to avoid deadlocks while locking each
394
+ // individual channel in the select. Without this global lock, two select
395
+ // operations that have a different order of the same channels could end up in a
396
+ // deadlock. This global lock is inefficient if there are many select operations
397
+ // happening in parallel, but gets the job done.
398
+ //
399
+ // If this becomes a performance issue, we can see how the Go runtime does this.
400
+ // I think it does this by sorting all states by channel address and then
401
+ // locking them in that order to avoid this deadlock.
402
+ var chanSelectLock task.PMutex
403
+
404
+ // Lock all channels (taking care to skip duplicate channels).
405
+ func lockAllStates (states []chanSelectState ) {
406
+ if ! hasParallelism {
407
+ return
408
+ }
409
+ for _ , state := range states {
410
+ if state .ch != nil && ! state .ch .selectLocked {
411
+ state .ch .lock .Lock ()
412
+ state .ch .selectLocked = true
413
+ }
414
+ }
415
+ }
416
+
417
+ // Unlock all channels (taking care to skip duplicate channels).
418
+ func unlockAllStates (states []chanSelectState ) {
419
+ if ! hasParallelism {
420
+ return
421
+ }
422
+ for _ , state := range states {
423
+ if state .ch != nil && state .ch .selectLocked {
424
+ state .ch .lock .Unlock ()
425
+ state .ch .selectLocked = false
426
+ }
427
+ }
428
+ }
429
+
376
430
// chanSelect implements blocking or non-blocking select operations.
377
431
// The 'ops' slice must be set if (and only if) this is a blocking select.
378
432
func chanSelect (recvbuf unsafe.Pointer , states []chanSelectState , ops []channelOp ) (uint32 , bool ) {
379
433
mask := interrupt .Disable ()
380
434
435
+ // Lock everything.
436
+ chanSelectLock .Lock ()
437
+ lockAllStates (states )
438
+
381
439
const selectNoIndex = ^ uint32 (0 )
382
440
selectIndex := selectNoIndex
383
441
selectOk := true
@@ -409,6 +467,8 @@ func chanSelect(recvbuf unsafe.Pointer, states []chanSelectState, ops []channelO
409
467
// return early.
410
468
blocking := len (ops ) != 0
411
469
if selectIndex != selectNoIndex || ! blocking {
470
+ unlockAllStates (states )
471
+ chanSelectLock .Unlock ()
412
472
interrupt .Restore (mask )
413
473
return selectIndex , selectOk
414
474
}
@@ -417,8 +477,8 @@ func chanSelect(recvbuf unsafe.Pointer, states []chanSelectState, ops []channelO
417
477
// become more complicated.
418
478
// We add ourselves as a sender/receiver to every channel, and wait for the
419
479
// first one to complete. Only one will successfully complete, because
420
- // senders and receivers will check t.Data for the state so that only one
421
- // will be able to "take" this select operation.
480
+ // senders and receivers use a compare-and-exchange atomic operation on
481
+ // t.Data so that only one will be able to "take" this select operation.
422
482
t := task .Current ()
423
483
t .Ptr = recvbuf
424
484
t .SetDataUint32 (chanOperationWaiting )
@@ -438,13 +498,17 @@ func chanSelect(recvbuf unsafe.Pointer, states []chanSelectState, ops []channelO
438
498
}
439
499
440
500
// Now we wait until one of the send/receive operations can proceed.
501
+ unlockAllStates (states )
502
+ chanSelectLock .Unlock ()
441
503
interrupt .Restore (mask )
442
504
task .Pause ()
443
505
444
506
// Resumed, so one channel operation must have progressed.
445
507
446
508
// Make sure all channel ops are removed from the senders/receivers
447
509
// queue before we return and the memory of them becomes invalid.
510
+ chanSelectLock .Lock ()
511
+ lockAllStates (states )
448
512
for i , state := range states {
449
513
if state .ch == nil {
450
514
continue
@@ -458,6 +522,8 @@ func chanSelect(recvbuf unsafe.Pointer, states []chanSelectState, ops []channelO
458
522
}
459
523
interrupt .Restore (mask )
460
524
}
525
+ unlockAllStates (states )
526
+ chanSelectLock .Unlock ()
461
527
462
528
// Pull the return values out of t.Data (which contains two bitfields).
463
529
selectIndex = t .DataUint32 () >> 2
0 commit comments