Skip to content

Commit ed1b856

Browse files
Merge pull request #28299 from dgoodwin/node-pod-monitor-intervals
Port Node and Pod State to Structured Intervals
2 parents dcf6a4e + cba88ea commit ed1b856

File tree

30 files changed

+2164
-791
lines changed

30 files changed

+2164
-791
lines changed

pkg/monitor/monitorapi/construction.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,22 @@ func (b *IntervalBuilder) Build(from, to time.Time) Interval {
6262
return ret
6363
}
6464

65+
// BuildNow creates the final interval with a from/to timestamp of now.
66+
func (b *IntervalBuilder) BuildNow() Interval {
67+
now := time.Now()
68+
ret := Interval{
69+
Condition: b.BuildCondition(),
70+
Display: b.display,
71+
Source: b.source,
72+
From: now,
73+
To: now,
74+
}
75+
76+
return ret
77+
}
78+
6579
func (b *IntervalBuilder) Message(mb *MessageBuilder) *IntervalBuilder {
66-
b.structuredMessage = mb.build()
80+
b.structuredMessage = mb.Build()
6781
return b
6882
}
6983

@@ -417,8 +431,8 @@ func (m *MessageBuilder) HumanMessagef(messageFormat string, args ...interface{}
417431
return m.HumanMessage(fmt.Sprintf(messageFormat, args...))
418432
}
419433

420-
// build creates the final StructuredMessage with all data assembled by this builder.
421-
func (m *MessageBuilder) build() Message {
434+
// Build creates the final StructuredMessage with all data assembled by this builder.
435+
func (m *MessageBuilder) Build() Message {
422436
ret := Message{
423437
Annotations: map[AnnotationKey]string{},
424438
}

pkg/monitor/monitorapi/identification_pod.go

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,6 @@ func LocatePod(pod *corev1.Pod) string {
1313
return fmt.Sprintf("ns/%s pod/%s node/%s uid/%s", pod.Namespace, pod.Name, pod.Spec.NodeName, pod.UID)
1414
}
1515

16-
func LocatePodContainer(pod *corev1.Pod, containerName string) string {
17-
return fmt.Sprintf("ns/%s pod/%s node/%s uid/%s container/%s", pod.Namespace, pod.Name, pod.Spec.NodeName, pod.UID, containerName)
18-
}
19-
2016
// NonUniquePodLocatorFrom produces an inexact locator based on namespace and name. This is useful when dealing with events
2117
// that are produced that do not contain UIDs. Ultimately, we should use UIDs everywhere, but this is will keep some our
2218
// matching working until then.
@@ -26,6 +22,7 @@ func NonUniquePodLocatorFrom(locator string) string {
2622
return fmt.Sprintf("ns/%s pod/%s", namespace, parts["pod"])
2723
}
2824

25+
// TODO: all callers should eventuall be using structured locator variant below:
2926
func PodFrom(locator string) PodReference {
3027
parts := LocatorParts(locator)
3128
namespace := NamespaceFrom(parts)
@@ -43,10 +40,27 @@ func PodFrom(locator string) PodReference {
4340
}
4441
}
4542

46-
func ContainerFrom(locator string) ContainerReference {
47-
pod := PodFrom(locator)
48-
parts := LocatorParts(locator)
49-
name := parts["container"]
43+
// PodFromLocator is used to strip down a locator to just a pod. (as it may contain additional keys like container or node)
44+
// that we do not want for some uses.
45+
func PodFromLocator(locator Locator) PodReference {
46+
namespace := locator.Keys[LocatorNamespaceKey]
47+
name := locator.Keys[LocatorPodKey]
48+
uid := locator.Keys[LocatorUIDKey]
49+
if len(namespace) == 0 || len(name) == 0 {
50+
return PodReference{}
51+
}
52+
return PodReference{
53+
NamespacedReference: NamespacedReference{
54+
Namespace: namespace,
55+
Name: name,
56+
UID: uid,
57+
},
58+
}
59+
}
60+
61+
func ContainerFrom(locator Locator) ContainerReference {
62+
pod := PodFrom(locator.OldLocator())
63+
name := locator.Keys[LocatorContainerKey]
5064
if len(name) == 0 || len(pod.UID) == 0 {
5165
return ContainerReference{}
5266
}
@@ -61,17 +75,17 @@ type PodReference struct {
6175
NamespacedReference
6276
}
6377

64-
func (r PodReference) ToLocator() string {
65-
return fmt.Sprintf("ns/%s pod/%s uid/%s", r.Namespace, r.Name, r.UID)
78+
func (r PodReference) ToLocator() Locator {
79+
return NewLocator().PodFromNames(r.Namespace, r.Name, r.UID)
6680
}
6781

6882
type ContainerReference struct {
6983
Pod PodReference
7084
ContainerName string
7185
}
7286

73-
func (r ContainerReference) ToLocator() string {
74-
return fmt.Sprintf("ns/%s pod/%s uid/%s container/%s", r.Pod.Namespace, r.Pod.Name, r.Pod.UID, r.ContainerName)
87+
func (r ContainerReference) ToLocator() Locator {
88+
return NewLocator().ContainerFromNames(r.Pod.Namespace, r.Pod.Name, r.Pod.UID, r.ContainerName)
7589
}
7690

7791
func AnnotationsFromMessage(message string) map[AnnotationKey]string {

pkg/monitor/monitorapi/types.go

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,9 @@ const (
165165
PodReasonForceDelete IntervalReason = "ForceDelete"
166166
PodReasonDeleted IntervalReason = "Deleted"
167167
PodReasonScheduled IntervalReason = "Scheduled"
168+
PodReasonEvicted IntervalReason = "Evicted"
169+
PodReasonPreempted IntervalReason = "Preempted"
170+
PodReasonFailed IntervalReason = "Failed"
168171

169172
ContainerReasonContainerExit IntervalReason = "ContainerExit"
170173
ContainerReasonContainerStart IntervalReason = "ContainerStart"
@@ -184,6 +187,9 @@ const (
184187
NodeNotReadyReason IntervalReason = "NotReady"
185188
NodeFailedLease IntervalReason = "FailedToUpdateLease"
186189

190+
MachineConfigChangeReason IntervalReason = "MachineConfigChange"
191+
MachineConfigReachedReason IntervalReason = "MachineConfigReached"
192+
187193
Timeout IntervalReason = "Timeout"
188194

189195
E2ETestStarted IntervalReason = "E2ETestStarted"
@@ -196,6 +202,7 @@ const (
196202
AnnotationReason AnnotationKey = "reason"
197203
AnnotationContainerExitCode AnnotationKey = "code"
198204
AnnotationCause AnnotationKey = "cause"
205+
AnnotationConfig AnnotationKey = "config"
199206
AnnotationNode AnnotationKey = "node"
200207
AnnotationEtcdLocalMember AnnotationKey = "local-member-id"
201208
AnnotationEtcdTerm AnnotationKey = "term"
@@ -207,10 +214,18 @@ const (
207214
// TODO this looks wrong. seems like it ought to be set in the to/from
208215
AnnotationDuration AnnotationKey = "duration"
209216
AnnotationRequestAuditID AnnotationKey = "request-audit-id"
217+
AnnotationRoles AnnotationKey = "roles"
210218
AnnotationStatus AnnotationKey = "status"
211219
AnnotationCondition AnnotationKey = "condition"
212220
)
213221

222+
// ConstructionOwner was originally meant to signify that an interval was derived from other intervals.
223+
// This allowed for the possibility of testing interval generation by feeding in only source intervals,
224+
// and checking what was generated.
225+
// TODO: likely want to drop this concept in favor of Source, plus a flag automatically applied to any
226+
// intervals coming back from the monitor test call to generate calculated intervals. Source
227+
// will replace the use of what constructed the interval, and the flag will allow us to see what is derived
228+
// and what isn't.
214229
type ConstructionOwner string
215230

216231
const (
@@ -229,6 +244,9 @@ type Message struct {
229244
Annotations map[AnnotationKey]string `json:"annotations"`
230245
}
231246

247+
// IntervalSource is used to type/categorize all intervals based on what created them.
248+
// This is intended to be used to group, and when combined with the display flag, signal that
249+
// they should be visible by default in the UIs that render interval charts.
232250
type IntervalSource string
233251

234252
const (
@@ -238,13 +256,16 @@ const (
238256
SourceE2ETest IntervalSource = "E2ETest"
239257
SourceNetworkManagerLog IntervalSource = "NetworkMangerLog"
240258
SourceNodeMonitor IntervalSource = "NodeMonitor"
259+
SourceSystemJournalScanner IntervalSource = "KubeletLogScanner"
241260
SourcePodLog IntervalSource = "PodLog"
242261
SourcePodMonitor IntervalSource = "PodMonitor"
243262
SourceKubeEvent IntervalSource = "KubeEvent"
244263
SourceTestData IntervalSource = "TestData" // some tests have no real source to assign
245264
SourcePathologicalEventMarker IntervalSource = "PathologicalEventMarker" // not sure if this is really helpful since the events all have a different origin
246265
SourceClusterOperatorMonitor IntervalSource = "ClusterOperatorMonitor"
247266
SourceOperatorState IntervalSource = "OperatorState"
267+
SourceNodeState = "NodeState"
268+
SourcePodState = "PodState"
248269
)
249270

250271
type Interval struct {
@@ -303,25 +324,73 @@ func (i Message) OldMessage() string {
303324
}
304325

305326
func (i Locator) OldLocator() string {
306-
keys := sets.NewString()
327+
keys := []string{}
307328
for k := range i.Keys {
308-
keys.Insert(string(k))
329+
keys = append(keys, string(k))
309330
}
310331

332+
keys = sortKeys(keys)
333+
311334
annotations := []string{}
312-
for _, k := range keys.List() {
335+
for _, k := range keys {
313336
v := i.Keys[LocatorKey(k)]
314337
if LocatorKey(k) == LocatorE2ETestKey {
315338
annotations = append(annotations, fmt.Sprintf("%v/%q", k, v))
316339
} else {
317340
annotations = append(annotations, fmt.Sprintf("%v/%v", k, v))
318341
}
319342
}
343+
320344
annotationString := strings.Join(annotations, " ")
321345

322346
return annotationString
323347
}
324348

349+
// sortKeys ensures that some keys appear in the order we require (least specific to most), so rows with locators
350+
// are grouped together. (i.e. keeping containers within the same pod together, or rows for a specific container)
351+
// Blindly going through the keys results in alphabetical ordering, container comes first, and then we've
352+
// got container events separated from their pod events on the intervals chart.
353+
// This will hopefully eventually go away but for now we need it.
354+
// Courtesy of ChatGPT but unit tested.
355+
func sortKeys(keys []string) []string {
356+
357+
// Ensure these keys appear in this order. Other keys can be mixed in and will appear at the end in alphabetical
358+
// order.
359+
orderedKeys := []string{"namespace", "node", "pod", "uid", "server", "container", "shutdown"}
360+
361+
// Create a map to store the indices of keys in the orderedKeys array.
362+
// This will allow us to efficiently check if a key is in orderedKeys and find its position.
363+
orderedKeyIndices := make(map[string]int)
364+
365+
for i, key := range orderedKeys {
366+
orderedKeyIndices[key] = i
367+
}
368+
369+
// Define a custom sorting function that orders the keys based on the orderedKeys array.
370+
sort.Slice(keys, func(i, j int) bool {
371+
// Get the indices of keys i and j in orderedKeys.
372+
indexI, existsI := orderedKeyIndices[keys[i]]
373+
indexJ, existsJ := orderedKeyIndices[keys[j]]
374+
375+
// If both keys exist in orderedKeys, sort them based on their order.
376+
if existsI && existsJ {
377+
return indexI < indexJ
378+
}
379+
380+
// If only one of the keys exists in orderedKeys, move it to the front.
381+
if existsI {
382+
return true
383+
} else if existsJ {
384+
return false
385+
}
386+
387+
// If neither key is in orderedKeys, sort alphabetically so we have predictable ordering
388+
return keys[i] < keys[j]
389+
})
390+
391+
return keys
392+
}
393+
325394
type IntervalFilter func(i Interval) bool
326395

327396
type IntervalFilters []IntervalFilter

pkg/monitor/monitorapi/types_test.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package monitorapi
33
import (
44
"testing"
55
"time"
6+
7+
"github.com/stretchr/testify/assert"
68
)
79

810
func TestIntervals_Duration(t *testing.T) {
@@ -106,3 +108,45 @@ func TestIntervals_Duration(t *testing.T) {
106108
})
107109
}
108110
}
111+
112+
// Not sure this test needs to live forever, but while working through the move
113+
//
114+
// to structured locators, it would be best if the legacy one kept coming out with
115+
// keys in the same order they were before, as the intervals chart sorts on these and
116+
// some are expected to be grouped together.
117+
func TestLocatorOldLocator(t *testing.T) {
118+
tests := []struct {
119+
name string
120+
locator Locator
121+
expected string // the legacy locator
122+
}{
123+
{
124+
name: "container locator",
125+
locator: NewLocator().ContainerFromNames("mynamespace", "mypod", "fakeuid", "mycontainer"),
126+
expected: "namespace/mynamespace pod/mypod uid/fakeuid container/mycontainer",
127+
},
128+
{
129+
name: "pod locator",
130+
locator: NewLocator().PodFromNames("mynamespace", "mypod", "fakeuid"),
131+
expected: "namespace/mynamespace pod/mypod uid/fakeuid",
132+
},
133+
{
134+
name: "container locator with keys mixed in", // not sure if this can happen but make sure what we expect occurs
135+
locator: Locator{Keys: map[LocatorKey]string{
136+
"a": "b",
137+
"container": "mycontainer",
138+
"foo": "bar",
139+
"namespace": "mynamespace",
140+
"pod": "mypod",
141+
"uid": "fakeuid",
142+
"zzz": "foobar",
143+
}},
144+
expected: "namespace/mynamespace pod/mypod uid/fakeuid container/mycontainer a/b foo/bar zzz/foobar",
145+
},
146+
}
147+
for _, tt := range tests {
148+
t.Run(tt.name, func(t *testing.T) {
149+
assert.Equal(t, tt.expected, tt.locator.OldLocator())
150+
})
151+
}
152+
}

pkg/monitor/serialization/serialize.go

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,11 @@ func EventsFromJSON(data []byte) (monitorapi.Intervals, error) {
6666
events = append(events, monitorapi.Interval{
6767
Source: monitorapi.IntervalSource(interval.Source),
6868
Condition: monitorapi.Condition{
69-
Level: level,
70-
Locator: interval.Locator,
71-
Message: interval.Message,
69+
Level: level,
70+
Locator: interval.Locator,
71+
Message: interval.Message,
72+
StructuredLocator: interval.StructuredLocator,
73+
StructuredMessage: interval.StructuredMessage,
7274
},
7375

7476
From: interval.From.Time,
@@ -176,8 +178,16 @@ func (intervals byTime) Less(i, j int) bool {
176178
case d > 0:
177179
return false
178180
}
179-
return intervals[i].Locator < intervals[j].Locator
181+
182+
switch d := intervals[i].To.Sub(intervals[j].To.Time); {
183+
case d < 0:
184+
return true
185+
case d > 0:
186+
return false
187+
}
188+
return intervals[i].Message < intervals[j].Message
180189
}
190+
181191
func (intervals byTime) Len() int { return len(intervals) }
182192
func (intervals byTime) Swap(i, j int) {
183193
intervals[i], intervals[j] = intervals[j], intervals[i]

0 commit comments

Comments
 (0)