Skip to content

Commit ad45531

Browse files
committed
filter: more granular logging
Add more information, only at high verbosity levels, about the filter behavior; report the precise rejection reason, both in logs and in the user-visible record. The user-visible record (e.g. in pod events) reported in framework.Status must be concise, so we limit to a single extra short word, being it the resource name. Signed-off-by: Francesco Romani <[email protected]>
1 parent 93cda0d commit ad45531

File tree

2 files changed

+42
-22
lines changed

2 files changed

+42
-22
lines changed

pkg/noderesourcetopology/filter.go

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -52,24 +52,24 @@ func singleNUMAContainerLevelHandler(lh logr.Logger, pod *v1.Pod, info *filterIn
5252
clh := lh.WithValues(logging.KeyContainer, initContainer.Name, logging.KeyContainerKind, cntKind)
5353
clh.V(6).Info("desired resources", stringify.ResourceListToLoggable(initContainer.Resources.Requests)...)
5454

55-
_, match := resourcesAvailableInAnyNUMANodes(clh, info, initContainer.Resources.Requests)
55+
_, match, reason := resourcesAvailableInAnyNUMANodes(clh, info, initContainer.Resources.Requests)
5656
if !match {
5757
msg := "cannot align " + cntKind + " container"
5858
// we can't align init container, so definitely we can't align a pod
59-
clh.V(2).Info(msg)
60-
return framework.NewStatus(framework.Unschedulable, msg)
59+
clh.V(2).Info(msg, "reason", reason)
60+
return framework.NewStatus(framework.Unschedulable, msg+": "+reason)
6161
}
6262
}
6363

6464
for _, container := range pod.Spec.Containers {
6565
clh := lh.WithValues(logging.KeyContainer, container.Name, logging.KeyContainerKind, logging.KindContainerApp)
6666
clh.V(6).Info("container requests", stringify.ResourceListToLoggable(container.Resources.Requests)...)
6767

68-
numaID, match := resourcesAvailableInAnyNUMANodes(clh, info, container.Resources.Requests)
68+
numaID, match, reason := resourcesAvailableInAnyNUMANodes(clh, info, container.Resources.Requests)
6969
if !match {
7070
// we can't align container, so definitely we can't align a pod
71-
clh.V(2).Info("cannot align container")
72-
return framework.NewStatus(framework.Unschedulable, "cannot align container")
71+
clh.V(2).Info("cannot align container", "reason", reason)
72+
return framework.NewStatus(framework.Unschedulable, "cannot align container: "+reason)
7373
}
7474

7575
// subtract the resources requested by the container from the given NUMA.
@@ -86,7 +86,7 @@ func singleNUMAContainerLevelHandler(lh logr.Logger, pod *v1.Pod, info *filterIn
8686

8787
// resourcesAvailableInAnyNUMANodes checks for sufficient resource and return the NUMAID that would be selected by Kubelet.
8888
// this function requires NUMANodeList with properly populated NUMANode, NUMAID should be in range 0-63
89-
func resourcesAvailableInAnyNUMANodes(lh logr.Logger, info *filterInfo, resources v1.ResourceList) (int, bool) {
89+
func resourcesAvailableInAnyNUMANodes(lh logr.Logger, info *filterInfo, resources v1.ResourceList) (int, bool, string) {
9090
numaID := highestNUMAID
9191
bitmask := bm.NewEmptyBitMask()
9292
// set all bits, each bit is a NUMA node, if resources couldn't be aligned
@@ -96,50 +96,54 @@ func resourcesAvailableInAnyNUMANodes(lh logr.Logger, info *filterInfo, resource
9696
nodeResources := util.ResourceList(info.node.Allocatable)
9797

9898
for resource, quantity := range resources {
99+
clh := lh.WithValues("resource", resource)
99100
if quantity.IsZero() {
100101
// why bother? everything's fine from the perspective of this resource
101-
lh.V(4).Info("ignoring zero-qty resource request", "resource", resource)
102+
clh.V(4).Info("ignoring zero-qty resource request")
102103
continue
103104
}
104105

105106
if _, ok := nodeResources[resource]; !ok {
106107
// some resources may not expose NUMA affinity (device plugins, extended resources), but all resources
107108
// must be reported at node level; thus, if they are not present at node level, we can safely assume
108109
// we don't have the resource at all.
109-
lh.V(2).Info("early verdict: cannot meet request", "resource", resource, "suitable", "false")
110-
return numaID, false
110+
clh.V(2).Info("early verdict: cannot meet request")
111+
return -1, false, string(resource)
111112
}
112113

113114
// for each requested resource, calculate which NUMA slots are good fits, and then AND with the aggregated bitmask, IOW unset appropriate bit if we can't align resources, or set it
114115
// obvious, bits which are not in the NUMA id's range would be unset
115116
hasNUMAAffinity := false
116117
resourceBitmask := bm.NewEmptyBitMask()
117118
for _, numaNode := range info.numaNodes {
119+
nlh := clh.WithValues("numaCell", numaNode.NUMAID)
118120
numaQuantity, ok := numaNode.Resources[resource]
119121
if !ok {
122+
nlh.V(6).Info("missing")
120123
continue
121124
}
122125

123126
hasNUMAAffinity = true
124127
if !isResourceSetSuitable(info.qos, resource, quantity, numaQuantity) {
128+
nlh.V(6).Info("discarded", "quantity", quantity.String(), "numaQuantity", numaQuantity.String())
125129
continue
126130
}
127131

128132
resourceBitmask.Add(numaNode.NUMAID)
129-
lh.V(6).Info("feasible", "numaCell", numaNode.NUMAID, "resource", resource)
133+
nlh.V(6).Info("feasible")
130134
}
131135

132136
// non-native resources or ephemeral-storage may not expose NUMA affinity,
133137
// but since they are available at node level, this is fine
134138
if !hasNUMAAffinity && isHostLevelResource(resource) {
135-
lh.V(6).Info("resource available at host level (no NUMA affinity)", "resource", resource)
139+
clh.V(6).Info("resource available at host level (no NUMA affinity)")
136140
continue
137141
}
138142

139143
bitmask.And(resourceBitmask)
140144
if bitmask.IsEmpty() {
141-
lh.V(2).Info("early verdict", "resource", resource, "suitable", "false")
142-
return numaID, false
145+
lh.V(2).Info("early verdict: cannot find affinity")
146+
return numaID, false, string(resource)
143147
}
144148
}
145149
// according to TopologyManager, the preferred NUMA affinity, is the narrowest one.
@@ -151,7 +155,7 @@ func resourcesAvailableInAnyNUMANodes(lh logr.Logger, info *filterInfo, resource
151155
// at least one NUMA node is available
152156
ret := !bitmask.IsEmpty()
153157
lh.V(2).Info("final verdict", "suitable", ret, "numaCell", numaID)
154-
return numaID, ret
158+
return numaID, ret, "generic"
155159
}
156160

157161
func singleNUMAPodLevelHandler(lh logr.Logger, pod *v1.Pod, info *filterInfo) *framework.Status {
@@ -160,10 +164,10 @@ func singleNUMAPodLevelHandler(lh logr.Logger, pod *v1.Pod, info *filterInfo) *f
160164
resources := util.GetPodEffectiveRequest(pod)
161165
lh.V(6).Info("pod desired resources", stringify.ResourceListToLoggable(resources)...)
162166

163-
numaID, match := resourcesAvailableInAnyNUMANodes(lh, info, resources)
167+
numaID, match, reason := resourcesAvailableInAnyNUMANodes(lh, info, resources)
164168
if !match {
165-
lh.V(2).Info("cannot align pod", "name", pod.Name)
166-
return framework.NewStatus(framework.Unschedulable, "cannot align pod")
169+
lh.V(2).Info("cannot align pod", "name", pod.Name, "reason", reason)
170+
return framework.NewStatus(framework.Unschedulable, "cannot align pod: "+reason)
167171
}
168172
lh.V(4).Info("all container placed", "numaCell", numaID)
169173
return nil

pkg/noderesourcetopology/filter_test.go

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@ package noderesourcetopology
1919
import (
2020
"context"
2121
"fmt"
22-
"reflect"
22+
"strings"
2323
"testing"
2424

25+
"github.com/google/go-cmp/cmp"
2526
topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2"
2627

2728
v1 "k8s.io/api/core/v1"
@@ -696,7 +697,7 @@ func TestNodeResourceTopology(t *testing.T) {
696697
}
697698
gotStatus := tm.Filter(context.Background(), framework.NewCycleState(), tt.pod, nodeInfo)
698699

699-
if !reflect.DeepEqual(gotStatus, tt.wantStatus) {
700+
if !quasiEqualStatus(gotStatus, tt.wantStatus) {
700701
t.Errorf("status does not match: %v, want: %v", gotStatus, tt.wantStatus)
701702
}
702703
})
@@ -913,7 +914,7 @@ func TestNodeResourceTopologyMultiContainerPodScope(t *testing.T) {
913914
}
914915
gotStatus := tm.Filter(context.Background(), framework.NewCycleState(), tt.pod, nodeInfo)
915916

916-
if !reflect.DeepEqual(gotStatus, tt.wantStatus) {
917+
if !quasiEqualStatus(gotStatus, tt.wantStatus) {
917918
t.Errorf("status does not match: %v, want: %v", gotStatus, tt.wantStatus)
918919
}
919920
})
@@ -1172,7 +1173,7 @@ func TestNodeResourceTopologyMultiContainerContainerScope(t *testing.T) {
11721173
nodeInfo.SetNode(nodes[0])
11731174
gotStatus := tm.Filter(context.Background(), framework.NewCycleState(), tt.pod, nodeInfo)
11741175

1175-
if !reflect.DeepEqual(gotStatus, tt.wantStatus) {
1176+
if !quasiEqualStatus(gotStatus, tt.wantStatus) {
11761177
t.Errorf("status does not match: %v, want: %v", gotStatus, tt.wantStatus)
11771178
}
11781179
})
@@ -1286,3 +1287,18 @@ func parseState(error string) *framework.Status {
12861287

12871288
return framework.NewStatus(framework.Unschedulable, error)
12881289
}
1290+
1291+
func quasiEqualStatus(s, x *framework.Status) bool {
1292+
if s == nil || x == nil {
1293+
return s.IsSuccess() && x.IsSuccess()
1294+
}
1295+
if s.Code() != x.Code() {
1296+
return false
1297+
}
1298+
sMsg := s.Message()
1299+
xMsg := x.Message()
1300+
if !strings.HasPrefix(sMsg, xMsg) {
1301+
return false
1302+
}
1303+
return cmp.Equal(s.Plugin(), x.Plugin())
1304+
}

0 commit comments

Comments
 (0)