Skip to content

Commit e7eb08e

Browse files
committed
Add support for multiple subscribers to CRI container events
Signed-off-by: Derek McGowan <[email protected]>
1 parent 0817c97 commit e7eb08e

File tree

5 files changed

+334
-16
lines changed

5 files changed

+334
-16
lines changed

internal/eventq/eventq.go

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package eventq
18+
19+
import (
20+
"io"
21+
"time"
22+
)
23+
24+
type EventQueue[T any] struct {
25+
events chan<- T
26+
subscriberC chan<- eventSubscription[T]
27+
shutdownC chan struct{}
28+
}
29+
30+
type eventSubscription[T any] struct {
31+
c chan<- T
32+
closeC chan struct{}
33+
}
34+
35+
func (sub eventSubscription[T]) publish(event T) bool {
36+
select {
37+
case <-sub.closeC:
38+
return false
39+
case sub.c <- event:
40+
return true
41+
}
42+
}
43+
44+
func (sub eventSubscription[T]) Close() error {
45+
select {
46+
case <-sub.closeC:
47+
default:
48+
close(sub.closeC)
49+
}
50+
return nil
51+
}
52+
53+
// New provides a queue for sending messages to one or more
54+
// subscribers. Messages are held for the given discardAfter duration
55+
// if there are no subscribers.
56+
func New[T any](discardAfter time.Duration, discardFn func(T)) EventQueue[T] {
57+
events := make(chan T)
58+
subscriberC := make(chan eventSubscription[T])
59+
shutdownC := make(chan struct{})
60+
61+
go func() {
62+
type queuedEvent struct {
63+
event T
64+
discardAt time.Time
65+
}
66+
67+
var discardQueue []queuedEvent
68+
var discardTime <-chan time.Time
69+
var subscribers []eventSubscription[T]
70+
for {
71+
select {
72+
case <-shutdownC:
73+
for _, event := range discardQueue {
74+
discardFn(event.event)
75+
}
76+
for _, sub := range subscribers {
77+
close(sub.c)
78+
}
79+
return
80+
case event := <-events:
81+
if len(subscribers) > 0 {
82+
active := subscribers[:0]
83+
for _, sub := range subscribers {
84+
if sub.publish(event) {
85+
active = append(active, sub)
86+
}
87+
}
88+
subscribers = active
89+
}
90+
if len(subscribers) == 0 {
91+
discardQueue = append(discardQueue, queuedEvent{
92+
event: event,
93+
discardAt: time.Now().Add(discardAfter),
94+
})
95+
if discardTime == nil {
96+
discardTime = time.After(time.Until(discardQueue[0].discardAt).Abs())
97+
}
98+
}
99+
case s := <-subscriberC:
100+
var closed bool
101+
for i, event := range discardQueue {
102+
if !s.publish(event.event) {
103+
discardQueue = discardQueue[i:]
104+
closed = true
105+
break
106+
}
107+
}
108+
if !closed {
109+
discardQueue = nil
110+
discardTime = nil
111+
subscribers = append(subscribers, s)
112+
}
113+
case t := <-discardTime:
114+
toDiscard := discardQueue
115+
discardQueue = nil
116+
for i, event := range toDiscard {
117+
if t.After(event.discardAt) {
118+
discardFn(event.event)
119+
} else {
120+
discardQueue = toDiscard[i:]
121+
break
122+
}
123+
}
124+
if len(discardQueue) == 0 {
125+
discardTime = nil
126+
} else {
127+
// Wait until next item in discard queue plus a small buffer to collect a burst of events
128+
discardTime = time.After(time.Until(discardQueue[0].discardAt).Abs() + 10*time.Millisecond)
129+
}
130+
}
131+
132+
}
133+
}()
134+
135+
return EventQueue[T]{
136+
events: events,
137+
subscriberC: subscriberC,
138+
shutdownC: shutdownC,
139+
}
140+
}
141+
142+
func (eq *EventQueue[T]) Shutdown() {
143+
defer close(eq.shutdownC)
144+
eq.shutdownC <- struct{}{}
145+
}
146+
147+
func (eq *EventQueue[T]) Send(event T) {
148+
select {
149+
case <-eq.shutdownC:
150+
case eq.events <- event:
151+
}
152+
}
153+
154+
func (eq *EventQueue[T]) Subscribe() (<-chan T, io.Closer) {
155+
c := make(chan T, 100)
156+
subscription := eventSubscription[T]{
157+
c: c,
158+
closeC: make(chan struct{}),
159+
}
160+
eq.subscriberC <- subscription
161+
162+
return c, subscription
163+
}

internal/eventq/eventq_test.go

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package eventq
18+
19+
import (
20+
"testing"
21+
"time"
22+
23+
"github.com/stretchr/testify/assert"
24+
)
25+
26+
func TestSingleSubscriber(t *testing.T) {
27+
eq := New[int](time.Second, func(int) {})
28+
c := newCollector(eq)
29+
expected := []int{1, 2, 3, 4, 5}
30+
for _, i := range expected {
31+
eq.Send(i)
32+
}
33+
eq.Shutdown()
34+
assert.Equal(t, expected, c.Collected())
35+
}
36+
37+
func TestMultiSubscriber(t *testing.T) {
38+
eq := New[int](time.Second, func(int) {})
39+
cs := make([]*collector, 10)
40+
for i := range cs {
41+
cs[i] = newCollector(eq)
42+
}
43+
expected := []int{1, 2, 3, 4, 5, 6, 7, 8, 9}
44+
for _, i := range expected {
45+
eq.Send(i)
46+
}
47+
eq.Shutdown()
48+
for i := range cs {
49+
assert.Equal(t, expected, cs[i].Collected())
50+
}
51+
}
52+
53+
func TestMissedEvents(t *testing.T) {
54+
eq := New[int](3600*time.Second, func(int) {})
55+
expected := []int{1, 2, 3, 4, 5, 6, 7, 8, 9}
56+
for _, i := range expected[:2] {
57+
eq.Send(i)
58+
}
59+
c := newCollector(eq)
60+
for _, i := range expected[2:] {
61+
eq.Send(i)
62+
}
63+
64+
eq.Shutdown()
65+
assert.Equal(t, expected, c.Collected())
66+
}
67+
68+
func TestSubscribersDifferentTime(t *testing.T) {
69+
eq := New[int](time.Second, func(int) {})
70+
expected := []int{1, 2, 3, 4, 5, 6, 7, 8, 9}
71+
c1 := newCollector(eq)
72+
for _, i := range expected[:2] {
73+
eq.Send(i)
74+
}
75+
c2 := newCollector(eq)
76+
for _, i := range expected[2:] {
77+
eq.Send(i)
78+
}
79+
80+
eq.Shutdown()
81+
assert.Equal(t, expected, c1.Collected())
82+
assert.Equal(t, expected[2:], c2.Collected())
83+
}
84+
85+
func TestDiscardedAfterTime(t *testing.T) {
86+
discarded := []int{}
87+
eq := New[int](time.Microsecond, func(i int) {
88+
discarded = append(discarded, i)
89+
})
90+
expected := []int{1, 2, 3, 4, 5, 6, 7, 8, 9}
91+
for _, i := range expected[:2] {
92+
eq.Send(i)
93+
}
94+
time.Sleep(50 * time.Millisecond)
95+
c := newCollector(eq)
96+
for _, i := range expected[2:] {
97+
eq.Send(i)
98+
}
99+
100+
eq.Shutdown()
101+
assert.Equal(t, expected[:2], discarded)
102+
assert.Equal(t, expected[2:], c.Collected())
103+
}
104+
105+
func TestDiscardedAtShutdown(t *testing.T) {
106+
discarded := []int{}
107+
expected := []int{1, 2, 3, 4, 5, 6, 7, 8, 9}
108+
done := make(chan struct{})
109+
eq := New[int](3600*time.Second, func(i int) {
110+
discarded = append(discarded, i)
111+
if len(discarded) == len(expected) {
112+
close(done)
113+
}
114+
})
115+
for _, i := range expected {
116+
eq.Send(i)
117+
}
118+
eq.Shutdown()
119+
select {
120+
case <-done:
121+
case <-time.After(time.Second):
122+
}
123+
assert.Equal(t, expected, discarded)
124+
}
125+
126+
type collector struct {
127+
collected []int
128+
c <-chan int
129+
done chan struct{}
130+
}
131+
132+
func newCollector(eq EventQueue[int]) *collector {
133+
eventC, closer := eq.Subscribe()
134+
c := &collector{
135+
c: eventC,
136+
done: make(chan struct{}),
137+
}
138+
go func() {
139+
defer close(c.done)
140+
defer closer.Close()
141+
for i := range c.c {
142+
c.collected = append(c.collected, i)
143+
}
144+
}()
145+
146+
return c
147+
}
148+
149+
func (c *collector) Collected() []int {
150+
<-c.done
151+
return c.collected
152+
}

pkg/cri/server/container_events.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ import (
2121
)
2222

2323
func (c *criService) GetContainerEvents(r *runtime.GetEventsRequest, s runtime.RuntimeService_GetContainerEventsServer) error {
24-
// TODO (https://github.com/containerd/containerd/issues/7318):
25-
// replace with a real implementation that broadcasts containerEventsChan
26-
// to all subscribers.
27-
for event := range c.containerEventsChan {
24+
eventC, closer := c.containerEventsQ.Subscribe()
25+
defer closer.Close()
26+
27+
for event := range eventC {
2828
if err := s.Send(&event); err != nil {
2929
return err
3030
}

pkg/cri/server/helpers.go

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -395,13 +395,7 @@ func (c *criService) generateAndSendContainerEvent(ctx context.Context, containe
395395
ContainersStatuses: containerStatuses,
396396
}
397397

398-
// TODO(ruiwen-zhao): write events to a cache, storage, or increase the size of the channel
399-
select {
400-
case c.containerEventsChan <- event:
401-
default:
402-
containerEventsDroppedCount.Inc()
403-
log.G(ctx).Debugf("containerEventsChan is full, discarding event %+v", event)
404-
}
398+
c.containerEventsQ.Send(event)
405399
}
406400

407401
func (c *criService) getPodSandboxStatus(ctx context.Context, podSandboxID string) (*runtime.PodSandboxStatus, error) {

pkg/cri/server/service.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"net/http"
2424
"sync"
2525
"sync/atomic"
26+
"time"
2627

2728
"github.com/containerd/go-cni"
2829
"github.com/containerd/log"
@@ -32,6 +33,7 @@ import (
3233

3334
containerd "github.com/containerd/containerd/v2/client"
3435
"github.com/containerd/containerd/v2/core/sandbox"
36+
"github.com/containerd/containerd/v2/internal/eventq"
3537
"github.com/containerd/containerd/v2/internal/registrar"
3638
criconfig "github.com/containerd/containerd/v2/pkg/cri/config"
3739
"github.com/containerd/containerd/v2/pkg/cri/nri"
@@ -118,9 +120,9 @@ type criService struct {
118120
// allCaps is the list of the capabilities.
119121
// When nil, parsed from CapEff of /proc/self/status.
120122
allCaps []string //nolint:nolintlint,unused // Ignore on non-Linux
121-
// containerEventsChan is used to capture container events and send them
122-
// to the caller of GetContainerEvents.
123-
containerEventsChan chan runtime.ContainerEventResponse
123+
// containerEventsQ is used to capture container events and send them
124+
// to the callers of GetContainerEvents.
125+
containerEventsQ eventq.EventQueue[runtime.ContainerEventResponse]
124126
// nri is used to hook NRI into CRI request processing.
125127
nri *nri.API
126128
// sandboxService is the sandbox related service for CRI
@@ -165,8 +167,15 @@ func NewCRIService(config criconfig.Config, options *CRIServiceOptions) (CRIServ
165167
sandboxService: newCriSandboxService(&config, options.Client),
166168
}
167169

168-
// TODO: figure out a proper channel size.
169-
c.containerEventsChan = make(chan runtime.ContainerEventResponse, 1000)
170+
// TODO: Make discard time configurable
171+
c.containerEventsQ = eventq.New[runtime.ContainerEventResponse](5*time.Minute, func(event runtime.ContainerEventResponse) {
172+
containerEventsDroppedCount.Inc()
173+
log.L.WithFields(
174+
log.Fields{
175+
"container": event.ContainerId,
176+
"type": event.ContainerEventType,
177+
}).Warn("container event discarded")
178+
})
170179

171180
if err := c.initPlatform(); err != nil {
172181
return nil, nil, fmt.Errorf("initialize platform: %w", err)

0 commit comments

Comments
 (0)