@@ -68,8 +68,6 @@ type ManagerImpl struct {
68
68
69
69
// NewManagerImpl creates a new manager.
70
70
func NewManagerImpl (kubeClient clientset.Interface , stateFileDirectory string , nodeName types.NodeName ) (* ManagerImpl , error ) {
71
- klog .V (2 ).InfoS ("Creating DRA manager" )
72
-
73
71
claimInfoCache , err := newClaimInfoCache (stateFileDirectory , draManagerStateFileName )
74
72
if err != nil {
75
73
return nil , fmt .Errorf ("failed to create claimInfo cache: %+v" , err )
@@ -91,15 +89,16 @@ func NewManagerImpl(kubeClient clientset.Interface, stateFileDirectory string, n
91
89
}
92
90
93
91
// Start starts the reconcile loop of the manager.
94
- func (m * ManagerImpl ) Start (activePods ActivePodsFunc , sourcesReady config.SourcesReady ) error {
92
+ func (m * ManagerImpl ) Start (ctx context. Context , activePods ActivePodsFunc , sourcesReady config.SourcesReady ) error {
95
93
m .activePods = activePods
96
94
m .sourcesReady = sourcesReady
97
- go wait .Until ( func () { m .reconcileLoop () }, m .reconcilePeriod , wait . NeverStop )
95
+ go wait .UntilWithContext ( ctx , func (ctx context. Context ) { m .reconcileLoop (ctx ) }, m .reconcilePeriod )
98
96
return nil
99
97
}
100
98
101
99
// reconcileLoop ensures that any stale state in the manager's claimInfoCache gets periodically reconciled.
102
- func (m * ManagerImpl ) reconcileLoop () {
100
+ func (m * ManagerImpl ) reconcileLoop (ctx context.Context ) {
101
+ logger := klog .FromContext (ctx )
103
102
// Only once all sources are ready do we attempt to reconcile.
104
103
// This ensures that the call to m.activePods() below will succeed with
105
104
// the actual active pods list.
@@ -140,8 +139,8 @@ func (m *ManagerImpl) reconcileLoop() {
140
139
141
140
// Loop through all inactive pods and call UnprepareResources on them.
142
141
for _ , podClaims := range inactivePodClaims {
143
- if err := m .unprepareResources (podClaims .uid , podClaims .namespace , podClaims .claimNames ); err != nil {
144
- klog . ErrorS ( err , "Unpreparing pod resources in reconcile loop" , "podUID" , podClaims .uid )
142
+ if err := m .unprepareResources (ctx , podClaims .uid , podClaims .namespace , podClaims .claimNames ); err != nil {
143
+ logger . Info ( "Unpreparing pod resources in reconcile loop failed, will retry " , "podUID" , podClaims .uid , "err" , err )
145
144
}
146
145
}
147
146
}
@@ -150,25 +149,26 @@ func (m *ManagerImpl) reconcileLoop() {
150
149
// for the input container, issue NodePrepareResources rpc requests
151
150
// for each new resource requirement, process their responses and update the cached
152
151
// containerResources on success.
153
- func (m * ManagerImpl ) PrepareResources (pod * v1.Pod ) error {
152
+ func (m * ManagerImpl ) PrepareResources (ctx context.Context , pod * v1.Pod ) error {
153
+ logger := klog .FromContext (ctx )
154
154
batches := make (map [string ][]* drapb.Claim )
155
155
resourceClaims := make (map [types.UID ]* resourceapi.ResourceClaim )
156
156
for i := range pod .Spec .ResourceClaims {
157
157
podClaim := & pod .Spec .ResourceClaims [i ]
158
- klog .V (3 ).InfoS ("Processing resource" , "pod" , klog .KObj (pod ), "podClaim" , podClaim .Name )
158
+ logger .V (3 ).Info ("Processing resource" , "pod" , klog .KObj (pod ), "podClaim" , podClaim .Name )
159
159
claimName , mustCheckOwner , err := resourceclaim .Name (pod , podClaim )
160
160
if err != nil {
161
161
return fmt .Errorf ("prepare resource claim: %v" , err )
162
162
}
163
163
164
164
if claimName == nil {
165
165
// Nothing to do.
166
- klog .V (5 ).InfoS ("No need to prepare resources, no claim generated" , "pod" , klog .KObj (pod ), "podClaim" , podClaim .Name )
166
+ logger .V (5 ).Info ("No need to prepare resources, no claim generated" , "pod" , klog .KObj (pod ), "podClaim" , podClaim .Name )
167
167
continue
168
168
}
169
169
// Query claim object from the API server
170
170
resourceClaim , err := m .kubeClient .ResourceV1alpha3 ().ResourceClaims (pod .Namespace ).Get (
171
- context . TODO () ,
171
+ ctx ,
172
172
* claimName ,
173
173
metav1.GetOptions {})
174
174
if err != nil {
@@ -198,9 +198,9 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error {
198
198
return fmt .Errorf ("claim %s: %w" , klog .KObj (resourceClaim ), err )
199
199
}
200
200
claimInfo = m .cache .add (ci )
201
- klog .V (6 ).InfoS ("Created new claim info cache entry" , "pod" , klog .KObj (pod ), "podClaim" , podClaim .Name , "claim" , klog .KObj (resourceClaim ), "claimInfoEntry" , claimInfo )
201
+ logger .V (6 ).Info ("Created new claim info cache entry" , "pod" , klog .KObj (pod ), "podClaim" , podClaim .Name , "claim" , klog .KObj (resourceClaim ), "claimInfoEntry" , claimInfo )
202
202
} else {
203
- klog .V (6 ).InfoS ("Found existing claim info cache entry" , "pod" , klog .KObj (pod ), "podClaim" , podClaim .Name , "claim" , klog .KObj (resourceClaim ), "claimInfoEntry" , claimInfo )
203
+ logger .V (6 ).Info ("Found existing claim info cache entry" , "pod" , klog .KObj (pod ), "podClaim" , podClaim .Name , "claim" , klog .KObj (resourceClaim ), "claimInfoEntry" , claimInfo )
204
204
}
205
205
206
206
// Add a reference to the current pod in the claim info.
@@ -216,7 +216,7 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error {
216
216
217
217
// If this claim is already prepared, there is no need to prepare it again.
218
218
if claimInfo .isPrepared () {
219
- klog .V (5 ).InfoS ("Resources already prepared" , "pod" , klog .KObj (pod ), "podClaim" , podClaim .Name , "claim" , klog .KObj (resourceClaim ))
219
+ logger .V (5 ).Info ("Resources already prepared" , "pod" , klog .KObj (pod ), "podClaim" , podClaim .Name , "claim" , klog .KObj (resourceClaim ))
220
220
return nil
221
221
}
222
222
@@ -250,7 +250,7 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error {
250
250
if err != nil {
251
251
return fmt .Errorf ("failed to get gRPC client for driver %s: %w" , driverName , err )
252
252
}
253
- response , err := client .NodePrepareResources (context . Background () , & drapb.NodePrepareResourcesRequest {Claims : claims })
253
+ response , err := client .NodePrepareResources (ctx , & drapb.NodePrepareResourcesRequest {Claims : claims })
254
254
if err != nil {
255
255
// General error unrelated to any particular claim.
256
256
return fmt .Errorf ("NodePrepareResources failed: %w" , err )
@@ -338,7 +338,6 @@ func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*Conta
338
338
// was generated for the referenced claim. There are valid use
339
339
// cases when this might happen, so we simply skip it.
340
340
if claimName == nil {
341
- klog .V (5 ).InfoS ("No CDI devices, no claim generated" , "pod" , klog .KObj (pod ), "podClaimName" , podClaim .Name )
342
341
continue
343
342
}
344
343
for _ , claim := range container .Resources .Claims {
@@ -362,16 +361,14 @@ func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*Conta
362
361
}
363
362
}
364
363
}
365
-
366
- klog .V (5 ).InfoS ("Determined CDI devices for pod" , "pod" , klog .KObj (pod ), "cdiDevices" , cdiDevices )
367
364
return & ContainerInfo {CDIDevices : cdiDevices }, nil
368
365
}
369
366
370
367
// UnprepareResources calls a driver's NodeUnprepareResource API for each resource claim owned by a pod.
371
368
// This function is idempotent and may be called multiple times against the same pod.
372
369
// As such, calls to the underlying NodeUnprepareResource API are skipped for claims that have
373
370
// already been successfully unprepared.
374
- func (m * ManagerImpl ) UnprepareResources (pod * v1.Pod ) error {
371
+ func (m * ManagerImpl ) UnprepareResources (ctx context. Context , pod * v1.Pod ) error {
375
372
var claimNames []string
376
373
for i := range pod .Spec .ResourceClaims {
377
374
claimName , _ , err := resourceclaim .Name (pod , & pod .Spec .ResourceClaims [i ])
@@ -386,10 +383,11 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error {
386
383
}
387
384
claimNames = append (claimNames , * claimName )
388
385
}
389
- return m .unprepareResources (pod .UID , pod .Namespace , claimNames )
386
+ return m .unprepareResources (ctx , pod .UID , pod .Namespace , claimNames )
390
387
}
391
388
392
- func (m * ManagerImpl ) unprepareResources (podUID types.UID , namespace string , claimNames []string ) error {
389
+ func (m * ManagerImpl ) unprepareResources (ctx context.Context , podUID types.UID , namespace string , claimNames []string ) error {
390
+ logger := klog .FromContext (ctx )
393
391
batches := make (map [string ][]* drapb.Claim )
394
392
claimNamesMap := make (map [types.UID ]string )
395
393
for _ , claimName := range claimNames {
@@ -445,7 +443,7 @@ func (m *ManagerImpl) unprepareResources(podUID types.UID, namespace string, cla
445
443
if err != nil {
446
444
return fmt .Errorf ("get gRPC client for DRA driver %s: %w" , driverName , err )
447
445
}
448
- response , err := client .NodeUnprepareResources (context . Background () , & drapb.NodeUnprepareResourcesRequest {Claims : claims })
446
+ response , err := client .NodeUnprepareResources (ctx , & drapb.NodeUnprepareResourcesRequest {Claims : claims })
449
447
if err != nil {
450
448
// General error unrelated to any particular claim.
451
449
return fmt .Errorf ("NodeUnprepareResources failed: %w" , err )
@@ -473,7 +471,7 @@ func (m *ManagerImpl) unprepareResources(podUID types.UID, namespace string, cla
473
471
for _ , claimName := range claimNamesMap {
474
472
claimInfo , _ := m .cache .get (claimName , namespace )
475
473
m .cache .delete (claimName , namespace )
476
- klog .V (6 ).InfoS ("Deleted claim info cache entry" , "claim" , klog .KRef (namespace , claimName ), "claimInfoEntry" , claimInfo )
474
+ logger .V (6 ).Info ("Deleted claim info cache entry" , "claim" , klog .KRef (namespace , claimName ), "claimInfoEntry" , claimInfo )
477
475
}
478
476
479
477
// Atomically sync the cache back to the checkpoint.
0 commit comments