1+ // Package nri integrates the daemon with the NRI (Node Resource Interface) framework.
2+ //
3+ // NRI allows external plugins to observe and adjust container resources and settings
4+ // at creation time, and to observe container lifecycle events. These plugins run with
5+ // the same level of trust as the daemon itself, because they can make arbitrary
6+ // modifications to container settings.
7+ //
8+ // The NRI framework is implemented by https://github.com/containerd/nri - see that
9+ // package for more details about NRI and the framework.
10+ //
11+ // Plugins are long-running processed (not instantiated per-request like runtime shims,
12+ // so they can maintain state across container events). They can either be started by
13+ // the NRI framework itself, it is configured with directories to search for plugins
14+ // and config for those plugins. Or, plugins can independently, and connect to the
15+ // daemon via a listening socket. By default, the listening socket is disabled in this
16+ // implementation.
117package nri
218
319import (
420 "context"
521 "errors"
622 "fmt"
723 "path/filepath"
24+ "strings"
825 "sync"
926
1027 "github.com/containerd/log"
@@ -43,6 +60,7 @@ type Config struct {
4360 ContainerLister ContainerLister
4461}
4562
63+ // NewNRI creates and starts a new NRI instance based on the provided configuration.
4664func NewNRI (ctx context.Context , cfg Config ) (* NRI , error ) {
4765 n := & NRI {cfg : cfg }
4866 if ! n .cfg .DaemonConfig .Enable {
@@ -71,6 +89,7 @@ func NewNRI(ctx context.Context, cfg Config) (*NRI, error) {
7189 return n , nil
7290}
7391
92+ // Shutdown stops the NRI instance and releases its resources.
7493func (n * NRI ) Shutdown (ctx context.Context ) {
7594 n .mu .Lock ()
7695 defer n .mu .Unlock ()
@@ -82,10 +101,76 @@ func (n *NRI) Shutdown(ctx context.Context) {
82101 n .nri = nil
83102}
84103
104+ // CreateContainer notifies plugins of a "creation" NRI-lifecycle event for a container,
105+ // allowing the plugin to adjust settings before the container is created.
106+ func (n * NRI ) CreateContainer (ctx context.Context , ctr * container.Container ) error {
107+ n .mu .RLock ()
108+ defer n .mu .RUnlock ()
109+ if n .nri == nil {
110+ return nil
111+ }
112+ // ctr.State can safely be locked here, but there's no need because it's expected
113+ // to be newly created and not yet accessible in any other thread.
114+
115+ nriPod , nriCtr , err := containerToNRI (ctr )
116+ if err != nil {
117+ return err
118+ }
119+
120+ // TODO(robmry): call RunPodSandbox?
121+
122+ resp , err := n .nri .CreateContainer (ctx , & adaptation.CreateContainerRequest {
123+ Pod : nriPod ,
124+ Container : nriCtr ,
125+ })
126+ if err != nil {
127+ return err
128+ }
129+
130+ if resp .GetUpdate () != nil {
131+ return errors .New ("container update is not supported" )
132+ }
133+ if err := applyAdjustments (ctx , ctr , resp .GetAdjust ()); err != nil {
134+ return err
135+ }
136+ return nil
137+ }
138+
139+ // syncFn is called when a plugin registers, allowing the plugin to learn the
140+ // current state of all containers.
85141func (n * NRI ) syncFn (ctx context.Context , syncCB adaptation.SyncCB ) error {
142+ // Claim a write lock so containers can't be created/removed until sync is done.
143+ // The plugin will get create/remove events after the sync, so won't miss anything.
144+ //
145+ // If a container's state changes during the sync, the plugin may see already-modified
146+ // state, then get a change notification with no changes.
147+ n .mu .Lock ()
148+ defer n .mu .Unlock ()
149+
150+ containers := n .cfg .ContainerLister .List ()
151+ nriPods := make ([]* adaptation.PodSandbox , 0 , len (containers ))
152+ nriCtrs := make ([]* adaptation.Container , 0 , len (containers ))
153+ for _ , ctr := range containers {
154+ ctr .State .Lock ()
155+ nriPod , nriCtr , err := containerToNRI (ctr )
156+ ctr .State .Unlock ()
157+ if err != nil {
158+ return fmt .Errorf ("converting container %s to NRI: %w" , ctr .ID , err )
159+ }
160+ nriPods = append (nriPods , nriPod )
161+ nriCtrs = append (nriCtrs , nriCtr )
162+ }
163+ updates , err := syncCB (ctx , nriPods , nriCtrs )
164+ if err != nil {
165+ return fmt .Errorf ("synchronizing NRI state: %w" , err )
166+ }
167+ if len (updates ) > 0 {
168+ return errors .New ("container updates during sync are not implemented" )
169+ }
86170 return nil
87171}
88172
173+ // updateFn may be called asynchronously by plugins.
89174func (n * NRI ) updateFn (context.Context , []* adaptation.ContainerUpdate ) ([]* adaptation.ContainerUpdate , error ) {
90175 return nil , errors .New ("not implemented" )
91176}
@@ -128,3 +213,95 @@ func nriOptions(cfg opts.NRIOpts) []adaptation.Option {
128213 }
129214 return res
130215}
216+
217+ func containerToNRI (ctr * container.Container ) (* adaptation.PodSandbox , * adaptation.Container , error ) {
218+ // TODO(robmry) - this implementation is incomplete, most fields are not populated.
219+ //
220+ // Many of these fields have straightforward mappings from Docker container fields,
221+ // but each will need consideration and tests for both outgoing settings and
222+ // adjutments from plugins.
223+ //
224+ // Docker doesn't have pods - but PodSandbox is how plugins will learn the container's
225+ // network namespace. So, the intent is to represent each container as having its own
226+ // PodSandbox, with the same ID and lifecycle as the container. We can probably represent
227+ // container-networking as containers sharing a pod.
228+ nriPod := & adaptation.PodSandbox {
229+ Id : ctr .ID ,
230+ Name : ctr .Name ,
231+ Uid : "" ,
232+ Namespace : "" ,
233+ Labels : nil ,
234+ Annotations : nil ,
235+ RuntimeHandler : "" ,
236+ Linux : nil ,
237+ Pid : 0 ,
238+ Ips : nil ,
239+ }
240+
241+ nriCtr := & adaptation.Container {
242+ Id : ctr .ID ,
243+ PodSandboxId : ctr .ID ,
244+ Name : ctr .Name ,
245+ State : adaptation .ContainerState_CONTAINER_UNKNOWN ,
246+ Labels : ctr .Config .Labels ,
247+ Annotations : ctr .HostConfig .Annotations ,
248+ Args : ctr .Config .Cmd ,
249+ Env : ctr .Config .Env ,
250+ Hooks : nil ,
251+ Linux : & adaptation.LinuxContainer {
252+ Namespaces : nil ,
253+ Devices : nil ,
254+ Resources : nil ,
255+ OomScoreAdj : nil ,
256+ CgroupsPath : "" ,
257+ IoPriority : nil ,
258+ SeccompProfile : nil ,
259+ SeccompPolicy : nil ,
260+ },
261+ Mounts : nil ,
262+ Pid : uint32 (ctr .Pid ),
263+ Rlimits : nil ,
264+ CreatedAt : 0 ,
265+ StartedAt : 0 ,
266+ FinishedAt : 0 ,
267+ ExitCode : 0 ,
268+ StatusReason : "" ,
269+ StatusMessage : "" ,
270+ CDIDevices : nil ,
271+ }
272+ return nriPod , nriCtr , nil
273+ }
274+
275+ func applyAdjustments (ctx context.Context , ctr * container.Container , adj * adaptation.ContainerAdjustment ) error {
276+ if adj == nil {
277+ return nil
278+ }
279+ if err := applyEnvVars (ctx , ctr , adj .Env ); err != nil {
280+ return fmt .Errorf ("applying environment variable adjustments: %w" , err )
281+ }
282+ return nil
283+ }
284+
285+ func applyEnvVars (ctx context.Context , ctr * container.Container , envVars []* adaptation.KeyValue ) error {
286+ if len (envVars ) == 0 {
287+ return nil
288+ }
289+ existing := make (map [string ]int , len (ctr .Config .Env ))
290+ for i , e := range ctr .Config .Env {
291+ k , _ , _ := strings .Cut (e , "=" )
292+ existing [k ] = i
293+ }
294+ for _ , kv := range envVars {
295+ if kv .Key == "" {
296+ return errors .New ("empty environment variable key" )
297+ }
298+ val := kv .Key + "=" + kv .Value
299+ log .G (ctx ).Debugf ("Applying NRI env var adjustment to %s" , kv .Key )
300+ if i , found := existing [kv .Key ]; found {
301+ ctr .Config .Env [i ] = val
302+ } else {
303+ ctr .Config .Env = append (ctr .Config .Env , val )
304+ }
305+ }
306+ return nil
307+ }
0 commit comments