@@ -30,6 +30,7 @@ import (
30
30
"k8s.io/apimachinery/pkg/labels"
31
31
"k8s.io/apimachinery/pkg/types"
32
32
podlisterv1 "k8s.io/client-go/listers/core/v1"
33
+ "k8s.io/klog/v2"
33
34
34
35
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
35
36
@@ -64,12 +65,11 @@ func NewOverReserve(ctx context.Context, lh logr.Logger, cfg *apiconfig.NodeReso
64
65
resyncMethod := getCacheResyncMethod (lh , cfg )
65
66
66
67
nrtObjs := & topologyv1alpha2.NodeResourceTopologyList {}
67
- // TODO: we should pass-in a context in the future
68
68
if err := client .List (ctx , nrtObjs ); err != nil {
69
69
return nil , err
70
70
}
71
71
72
- lh .V (3 ).Info ("initializing" , "noderesourcetopologies" , len (nrtObjs .Items ), "method" , resyncMethod )
72
+ lh .V (2 ).Info ("initializing" , "noderesourcetopologies" , len (nrtObjs .Items ), "method" , resyncMethod )
73
73
obj := & OverReserve {
74
74
lh : lh ,
75
75
client : client ,
@@ -100,11 +100,11 @@ func (ov *OverReserve) GetCachedNRTCopy(ctx context.Context, nodeName string, po
100
100
return nrt , true
101
101
}
102
102
103
- logID := logging . PodLogID (pod )
104
- lh := ov .lh .WithValues ("logID" , logID , "podUID" , pod . GetUID ( ), "node" , nodeName )
103
+ logID := klog . KObj (pod )
104
+ lh := ov .lh .WithValues (logging . KeyPod , logID , logging . KeyPodUID , logging . PodUID ( pod ), logging . KeyNode , nodeName )
105
105
106
106
lh .V (6 ).Info ("NRT" , "fromcache" , stringify .NodeResourceTopologyResources (nrt ))
107
- nodeAssumedResources .UpdateNRT (logID , nrt )
107
+ nodeAssumedResources .UpdateNRT (nrt , logging . KeyPod , logID )
108
108
109
109
lh .V (5 ).Info ("NRT" , "withassumed" , stringify .NodeResourceTopologyResources (nrt ))
110
110
return nrt , true
@@ -114,23 +114,23 @@ func (ov *OverReserve) NodeMaybeOverReserved(nodeName string, pod *corev1.Pod) {
114
114
ov .lock .Lock ()
115
115
defer ov .lock .Unlock ()
116
116
val := ov .nodesMaybeOverreserved .Incr (nodeName )
117
- ov .lh .V (4 ).Info ("mark discarded" , "node" , nodeName , "count" , val )
117
+ ov .lh .V (4 ).Info ("mark discarded" , logging . KeyNode , nodeName , "count" , val )
118
118
}
119
119
120
120
func (ov * OverReserve ) NodeHasForeignPods (nodeName string , pod * corev1.Pod ) {
121
- lh := ov .lh .WithValues ("logID" , logging . PodLogID (pod ), "podUID" , pod . GetUID ( ), "node" , nodeName )
121
+ lh := ov .lh .WithValues (logging . KeyPod , klog . KObj (pod ), logging . KeyPodUID , logging . PodUID ( pod ), logging . KeyNode , nodeName )
122
122
ov .lock .Lock ()
123
123
defer ov .lock .Unlock ()
124
124
if ! ov .nrts .Contains (nodeName ) {
125
125
lh .V (5 ).Info ("ignoring foreign pods" , "nrtinfo" , "missing" )
126
126
return
127
127
}
128
128
val := ov .nodesWithForeignPods .Incr (nodeName )
129
- lh .V (4 ).Info ("marked with foreign pods" , "count" , val )
129
+ lh .V (2 ).Info ("marked with foreign pods" , logging . KeyNode , nodeName , "count" , val )
130
130
}
131
131
132
132
func (ov * OverReserve ) ReserveNodeResources (nodeName string , pod * corev1.Pod ) {
133
- lh := ov .lh .WithValues ("logID" , logging . PodLogID (pod ), "podUID" , pod . GetUID ( ), "node" , nodeName )
133
+ lh := ov .lh .WithValues (logging . KeyPod , klog . KObj (pod ), logging . KeyPodUID , logging . PodUID ( pod ), logging . KeyNode , nodeName )
134
134
ov .lock .Lock ()
135
135
defer ov .lock .Unlock ()
136
136
nodeAssumedResources , ok := ov .assumedResources [nodeName ]
@@ -140,26 +140,26 @@ func (ov *OverReserve) ReserveNodeResources(nodeName string, pod *corev1.Pod) {
140
140
}
141
141
142
142
nodeAssumedResources .AddPod (pod )
143
- lh .V (5 ).Info ("post reserve" , "assumedResources" , nodeAssumedResources .String ())
143
+ lh .V (2 ).Info ("post reserve" , logging . KeyNode , nodeName , "assumedResources" , nodeAssumedResources .String ())
144
144
145
145
ov .nodesMaybeOverreserved .Delete (nodeName )
146
- lh .V (6 ).Info ("reset discard counter" )
146
+ lh .V (6 ).Info ("reset discard counter" , logging . KeyNode , nodeName )
147
147
}
148
148
149
149
func (ov * OverReserve ) UnreserveNodeResources (nodeName string , pod * corev1.Pod ) {
150
- lh := ov .lh .WithValues ("logID" , logging . PodLogID (pod ), "podUID" , pod . GetUID ( ), "node" , nodeName )
150
+ lh := ov .lh .WithValues (logging . KeyPod , klog . KObj (pod ), logging . KeyPodUID , logging . PodUID ( pod ), logging . KeyNode , nodeName )
151
151
ov .lock .Lock ()
152
152
defer ov .lock .Unlock ()
153
153
nodeAssumedResources , ok := ov .assumedResources [nodeName ]
154
154
if ! ok {
155
155
// this should not happen, so we're vocal about it
156
156
// we don't return error because not much to do to recover anyway
157
- lh .V (3 ).Info ("no resources tracked" )
157
+ lh .V (2 ).Info ("no resources tracked" , logging . KeyNode , nodeName )
158
158
return
159
159
}
160
160
161
161
nodeAssumedResources .DeletePod (pod )
162
- lh .V (5 ).Info ("post release" , "assumedResources" , nodeAssumedResources .String ())
162
+ lh .V (2 ).Info ("post unreserve" , logging . KeyNode , nodeName , "assumedResources" , nodeAssumedResources .String ())
163
163
}
164
164
165
165
// NodesMaybeOverReserved returns a slice of all the node names which have been discarded previously,
@@ -201,81 +201,78 @@ func (ov *OverReserve) NodesMaybeOverReserved(lh logr.Logger) []string {
201
201
// too aggressive resync attempts, so to more, likely unnecessary, computation work on the scheduler side.
202
202
func (ov * OverReserve ) Resync () {
203
203
// we are not working with a specific pod, so we need a unique key to track this flow
204
- lh := ov .lh .WithValues ("logID" , logging .TimeLogID (), "flow" , logging . FlowCacheSync )
205
- lh .V (4 ).Info (logging .FlowBegin )
206
- defer lh .V (4 ).Info (logging .FlowEnd )
204
+ lh_ := ov .lh .WithName ( logging . FlowCacheSync ). WithValues (logging . KeyLogID , logging .TimeLogID ())
205
+ lh_ .V (4 ).Info (logging .FlowBegin )
206
+ defer lh_ .V (4 ).Info (logging .FlowEnd )
207
207
208
- nodeNames := ov .NodesMaybeOverReserved (lh )
208
+ nodeNames := ov .NodesMaybeOverReserved (lh_ )
209
209
// avoid as much as we can unnecessary work and logs.
210
210
if len (nodeNames ) == 0 {
211
- lh .V (6 ).Info ("no dirty nodes detected" )
211
+ lh_ .V (5 ).Info ("no dirty nodes detected" )
212
212
return
213
213
}
214
214
215
215
// node -> pod identifier (namespace, name)
216
- nodeToObjsMap , err := makeNodeToPodDataMap (lh , ov .podLister , ov .isPodRelevant )
216
+ nodeToObjsMap , err := makeNodeToPodDataMap (lh_ , ov .podLister , ov .isPodRelevant )
217
217
if err != nil {
218
- lh .Error (err , "cannot find the mapping between running pods and nodes" )
218
+ lh_ .Error (err , "cannot find the mapping between running pods and nodes" )
219
219
return
220
220
}
221
221
222
- lh .V (6 ).Info ("resync NodeTopology cache starting" )
223
- defer lh .V (6 ).Info ("resync NodeTopology cache complete" )
224
-
225
222
var nrtUpdates []* topologyv1alpha2.NodeResourceTopology
226
223
for _ , nodeName := range nodeNames {
227
- lh = lh .WithValues ("node" , nodeName )
224
+ lh := lh_ .WithValues (logging . KeyNode , nodeName )
228
225
229
226
nrtCandidate := & topologyv1alpha2.NodeResourceTopology {}
230
227
if err := ov .client .Get (context .Background (), types.NamespacedName {Name : nodeName }, nrtCandidate ); err != nil {
231
- lh .V (3 ).Info ("failed to get NodeTopology" , "error" , err )
228
+ lh .V (2 ).Info ("failed to get NodeTopology" , "error" , err )
232
229
continue
233
230
}
234
231
if nrtCandidate == nil {
235
- lh .V (3 ).Info ("missing NodeTopology" )
232
+ lh .V (2 ).Info ("missing NodeTopology" )
236
233
continue
237
234
}
238
235
239
236
objs , ok := nodeToObjsMap [nodeName ]
240
237
if ! ok {
241
238
// this really should never happen
242
- lh .V ( 3 ). Info ("cannot find any pod for node" )
239
+ lh .Info ("cannot find any pod for node" )
243
240
continue
244
241
}
245
242
246
243
pfpExpected , onlyExclRes := podFingerprintForNodeTopology (nrtCandidate , ov .resyncMethod )
247
244
if pfpExpected == "" {
248
- lh .V (3 ).Info ("missing NodeTopology podset fingerprint data" )
245
+ lh .V (2 ).Info ("missing NodeTopology podset fingerprint data" )
249
246
continue
250
247
}
251
248
252
- lh .V (6 ).Info ("trying to sync NodeTopology" , "fingerprint" , pfpExpected , "onlyExclusiveResources" , onlyExclRes )
249
+ lh .V (4 ).Info ("trying to sync NodeTopology" , "fingerprint" , pfpExpected , "onlyExclusiveResources" , onlyExclRes )
253
250
254
251
err = checkPodFingerprintForNode (lh , objs , nodeName , pfpExpected , onlyExclRes )
255
252
if errors .Is (err , podfingerprint .ErrSignatureMismatch ) {
256
253
// can happen, not critical
257
- lh .V (5 ).Info ("NodeTopology podset fingerprint mismatch" )
254
+ lh .V (4 ).Info ("NodeTopology podset fingerprint mismatch" )
258
255
continue
259
256
}
260
257
if err != nil {
261
258
// should never happen, let's be vocal
262
- lh .V ( 3 ). Error (err , "checking NodeTopology podset fingerprint" )
259
+ lh .Error (err , "checking NodeTopology podset fingerprint" )
263
260
continue
264
261
}
265
262
266
263
lh .V (4 ).Info ("overriding cached info" )
267
264
nrtUpdates = append (nrtUpdates , nrtCandidate )
268
265
}
269
266
270
- ov .FlushNodes (lh , nrtUpdates ... )
267
+ ov .FlushNodes (lh_ , nrtUpdates ... )
271
268
}
272
269
273
270
// FlushNodes drops all the cached information about a given node, resetting its state clean.
274
271
func (ov * OverReserve ) FlushNodes (lh logr.Logger , nrts ... * topologyv1alpha2.NodeResourceTopology ) {
275
272
ov .lock .Lock ()
276
273
defer ov .lock .Unlock ()
277
274
for _ , nrt := range nrts {
278
- lh .V (4 ).Info ("flushing" , "node" , nrt .Name )
275
+ lh .V (2 ).Info ("flushing" , logging . KeyNode , nrt .Name )
279
276
ov .nrts .Update (nrt )
280
277
delete (ov .assumedResources , nrt .Name )
281
278
ov .nodesMaybeOverreserved .Delete (nrt .Name )
0 commit comments