Skip to content

Commit 62f9d6b

Browse files
committed
filter: more granular logging
Add more information, only at high verbosity levels, about the filter behavior; report the precise rejection reason, both in logs and in the user-visible record. The user-visible record (e.g. in pod events) reported in framework.Status must be concise, so we limit to a single extra short word, being it the resource name. Signed-off-by: Francesco Romani <[email protected]>
1 parent 77a4ebf commit 62f9d6b

File tree

2 files changed

+39
-19
lines changed

2 files changed

+39
-19
lines changed

pkg/noderesourcetopology/filter.go

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,11 @@ func singleNUMAContainerLevelHandler(lh logr.Logger, pod *v1.Pod, info *filterIn
5050
clh := lh.WithValues(logging.KeyContainer, initContainer.Name, logging.KeyContainerKind, cntKind)
5151
clh.V(6).Info("desired resources", stringify.ResourceListToLoggable(initContainer.Resources.Requests)...)
5252

53-
_, match := resourcesAvailableInAnyNUMANodes(clh, info, initContainer.Resources.Requests)
53+
_, match, reason := resourcesAvailableInAnyNUMANodes(clh, info, initContainer.Resources.Requests)
5454
if !match {
5555
msg := "cannot align " + cntKind + " container"
5656
// we can't align init container, so definitely we can't align a pod
57-
clh.V(2).Info(msg)
57+
clh.V(2).Info(msg, "reason", reason)
5858
return framework.NewStatus(framework.Unschedulable, msg)
5959
}
6060
}
@@ -63,10 +63,10 @@ func singleNUMAContainerLevelHandler(lh logr.Logger, pod *v1.Pod, info *filterIn
6363
clh := lh.WithValues(logging.KeyContainer, container.Name, logging.KeyContainerKind, logging.KindContainerApp)
6464
clh.V(6).Info("container requests", stringify.ResourceListToLoggable(container.Resources.Requests)...)
6565

66-
numaID, match := resourcesAvailableInAnyNUMANodes(clh, info, container.Resources.Requests)
66+
numaID, match, reason := resourcesAvailableInAnyNUMANodes(clh, info, container.Resources.Requests)
6767
if !match {
6868
// we can't align container, so definitely we can't align a pod
69-
clh.V(2).Info("cannot align container")
69+
clh.V(2).Info("cannot align container", "reason", reason)
7070
return framework.NewStatus(framework.Unschedulable, "cannot align container")
7171
}
7272

@@ -84,7 +84,7 @@ func singleNUMAContainerLevelHandler(lh logr.Logger, pod *v1.Pod, info *filterIn
8484

8585
// resourcesAvailableInAnyNUMANodes checks for sufficient resource and return the NUMAID that would be selected by Kubelet.
8686
// this function requires NUMANodeList with properly populated NUMANode, NUMAID should be in range 0-63
87-
func resourcesAvailableInAnyNUMANodes(lh logr.Logger, info *filterInfo, resources v1.ResourceList) (int, bool) {
87+
func resourcesAvailableInAnyNUMANodes(lh logr.Logger, info *filterInfo, resources v1.ResourceList) (int, bool, string) {
8888
numaID := highestNUMAID
8989
bitmask := bm.NewEmptyBitMask()
9090
// set all bits, each bit is a NUMA node, if resources couldn't be aligned
@@ -94,50 +94,54 @@ func resourcesAvailableInAnyNUMANodes(lh logr.Logger, info *filterInfo, resource
9494
nodeResources := util.ResourceList(info.node.Allocatable)
9595

9696
for resource, quantity := range resources {
97+
clh := lh.WithValues("resource", resource)
9798
if quantity.IsZero() {
9899
// why bother? everything's fine from the perspective of this resource
99-
lh.V(4).Info("ignoring zero-qty resource request", "resource", resource)
100+
clh.V(4).Info("ignoring zero-qty resource request")
100101
continue
101102
}
102103

103104
if _, ok := nodeResources[resource]; !ok {
104105
// some resources may not expose NUMA affinity (device plugins, extended resources), but all resources
105106
// must be reported at node level; thus, if they are not present at node level, we can safely assume
106107
// we don't have the resource at all.
107-
lh.V(2).Info("early verdict: cannot meet request", "resource", resource, "suitable", "false")
108-
return numaID, false
108+
clh.V(2).Info("early verdict: cannot meet request")
109+
return -1, false, string(resource)
109110
}
110111

111112
// for each requested resource, calculate which NUMA slots are good fits, and then AND with the aggregated bitmask, IOW unset appropriate bit if we can't align resources, or set it
112113
// obvious, bits which are not in the NUMA id's range would be unset
113114
hasNUMAAffinity := false
114115
resourceBitmask := bm.NewEmptyBitMask()
115116
for _, numaNode := range info.numaNodes {
117+
nlh := clh.WithValues("numaCell", numaNode.NUMAID)
116118
numaQuantity, ok := numaNode.Resources[resource]
117119
if !ok {
120+
nlh.V(6).Info("missing")
118121
continue
119122
}
120123

121124
hasNUMAAffinity = true
122125
if !isResourceSetSuitable(info.qos, resource, quantity, numaQuantity) {
126+
nlh.V(6).Info("discarded", "quantity", quantity.String(), "numaQuantity", numaQuantity.String())
123127
continue
124128
}
125129

126130
resourceBitmask.Add(numaNode.NUMAID)
127-
lh.V(6).Info("feasible", "numaCell", numaNode.NUMAID, "resource", resource)
131+
nlh.V(6).Info("feasible")
128132
}
129133

130134
// non-native resources or ephemeral-storage may not expose NUMA affinity,
131135
// but since they are available at node level, this is fine
132136
if !hasNUMAAffinity && isHostLevelResource(resource) {
133-
lh.V(6).Info("resource available at host level (no NUMA affinity)", "resource", resource)
137+
clh.V(6).Info("resource available at host level (no NUMA affinity)")
134138
continue
135139
}
136140

137141
bitmask.And(resourceBitmask)
138142
if bitmask.IsEmpty() {
139-
lh.V(2).Info("early verdict", "resource", resource, "suitable", "false")
140-
return numaID, false
143+
lh.V(2).Info("early verdict: cannot find affinity")
144+
return numaID, false, string(resource)
141145
}
142146
}
143147
// according to TopologyManager, the preferred NUMA affinity, is the narrowest one.
@@ -149,16 +153,16 @@ func resourcesAvailableInAnyNUMANodes(lh logr.Logger, info *filterInfo, resource
149153
// at least one NUMA node is available
150154
ret := !bitmask.IsEmpty()
151155
lh.V(2).Info("final verdict", "suitable", ret, "numaCell", numaID)
152-
return numaID, ret
156+
return numaID, ret, "generic"
153157
}
154158

155159
func singleNUMAPodLevelHandler(lh logr.Logger, pod *v1.Pod, info *filterInfo) *framework.Status {
156160
resources := util.GetPodEffectiveRequest(pod)
157161
lh.V(6).Info("pod desired resources", stringify.ResourceListToLoggable(resources)...)
158162

159-
numaID, match := resourcesAvailableInAnyNUMANodes(lh, info, resources)
163+
numaID, match, reason := resourcesAvailableInAnyNUMANodes(lh, info, resources)
160164
if !match {
161-
lh.V(2).Info("cannot align pod", "name", pod.Name)
165+
lh.V(2).Info("cannot align pod", "name", pod.Name, "reason", reason)
162166
return framework.NewStatus(framework.Unschedulable, "cannot align pod")
163167
}
164168
lh.V(4).Info("all container placed", "numaCell", numaID)

pkg/noderesourcetopology/filter_test.go

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@ package noderesourcetopology
1919
import (
2020
"context"
2121
"fmt"
22-
"reflect"
22+
"strings"
2323
"testing"
2424

25+
"github.com/google/go-cmp/cmp"
2526
topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2"
2627

2728
v1 "k8s.io/api/core/v1"
@@ -696,7 +697,7 @@ func TestNodeResourceTopology(t *testing.T) {
696697
}
697698
gotStatus := tm.Filter(context.Background(), framework.NewCycleState(), tt.pod, nodeInfo)
698699

699-
if !reflect.DeepEqual(gotStatus, tt.wantStatus) {
700+
if !quasiEqualStatus(gotStatus, tt.wantStatus) {
700701
t.Errorf("status does not match: %v, want: %v", gotStatus, tt.wantStatus)
701702
}
702703
})
@@ -913,7 +914,7 @@ func TestNodeResourceTopologyMultiContainerPodScope(t *testing.T) {
913914
}
914915
gotStatus := tm.Filter(context.Background(), framework.NewCycleState(), tt.pod, nodeInfo)
915916

916-
if !reflect.DeepEqual(gotStatus, tt.wantStatus) {
917+
if !quasiEqualStatus(gotStatus, tt.wantStatus) {
917918
t.Errorf("status does not match: %v, want: %v", gotStatus, tt.wantStatus)
918919
}
919920
})
@@ -1172,7 +1173,7 @@ func TestNodeResourceTopologyMultiContainerContainerScope(t *testing.T) {
11721173
nodeInfo.SetNode(nodes[0])
11731174
gotStatus := tm.Filter(context.Background(), framework.NewCycleState(), tt.pod, nodeInfo)
11741175

1175-
if !reflect.DeepEqual(gotStatus, tt.wantStatus) {
1176+
if !quasiEqualStatus(gotStatus, tt.wantStatus) {
11761177
t.Errorf("status does not match: %v, want: %v", gotStatus, tt.wantStatus)
11771178
}
11781179
})
@@ -1286,3 +1287,18 @@ func parseState(error string) *framework.Status {
12861287

12871288
return framework.NewStatus(framework.Unschedulable, error)
12881289
}
1290+
1291+
func quasiEqualStatus(s, x *framework.Status) bool {
1292+
if s == nil || x == nil {
1293+
return s.IsSuccess() && x.IsSuccess()
1294+
}
1295+
if s.Code() != x.Code() {
1296+
return false
1297+
}
1298+
sMsg := s.Message()
1299+
xMsg := x.Message()
1300+
if !strings.HasPrefix(sMsg, xMsg) {
1301+
return false
1302+
}
1303+
return cmp.Equal(s.Plugin(), x.Plugin())
1304+
}

0 commit comments

Comments
 (0)