Skip to content

Commit a3c206c

Browse files
mprahlopenshift-merge-bot[bot]
authored andcommitted
Record compliance events in the compliance history API with nanoseconds
This helps with ordering when compliance events have a timestamp that is the same down to the second. Relates: https://issues.redhat.com/browse/ACM-10155 Signed-off-by: mprahl <[email protected]>
1 parent a332e0a commit a3c206c

File tree

3 files changed

+60
-24
lines changed

3 files changed

+60
-24
lines changed

controllers/statussync/policy_status_sync.go

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"crypto/tls"
1010
"crypto/x509"
1111
"encoding/json"
12+
"errors"
1213
"fmt"
1314
"io"
1415
"net/http"
@@ -21,7 +22,7 @@ import (
2122

2223
corev1 "k8s.io/api/core/v1"
2324
"k8s.io/apimachinery/pkg/api/equality"
24-
"k8s.io/apimachinery/pkg/api/errors"
25+
k8serrors "k8s.io/apimachinery/pkg/api/errors"
2526
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2627
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
2728
"k8s.io/apimachinery/pkg/runtime"
@@ -124,7 +125,7 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ
124125

125126
err := r.ManagedClient.Get(ctx, request.NamespacedName, instance)
126127
if err != nil {
127-
if errors.IsNotFound(err) {
128+
if k8serrors.IsNotFound(err) {
128129
// The replicated policy on the managed cluster was deleted.
129130
// check if it was deleted by user by checking if it still exists on hub
130131
hubInstance := &policiesv1.Policy{}
@@ -133,7 +134,7 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ
133134
ctx, types.NamespacedName{Namespace: r.ClusterNamespaceOnHub, Name: request.Name}, hubInstance,
134135
)
135136
if err != nil {
136-
if errors.IsNotFound(err) {
137+
if k8serrors.IsNotFound(err) {
137138
// confirmed deleted on hub, doing nothing
138139
reqLogger.Info("Policy was deleted, no status to update")
139140

@@ -172,11 +173,11 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ
172173
err = r.HubClient.Get(ctx, types.NamespacedName{Namespace: r.ClusterNamespaceOnHub, Name: request.Name}, hubPlc)
173174
if err != nil {
174175
// hub policy not found, it has been deleted
175-
if errors.IsNotFound(err) {
176+
if k8serrors.IsNotFound(err) {
176177
reqLogger.Info("Hub policy not found, it has been deleted")
177178
// try to delete local one
178179
err = r.ManagedClient.Delete(ctx, instance)
179-
if err == nil || errors.IsNotFound(err) {
180+
if err == nil || k8serrors.IsNotFound(err) {
180181
// no err or err is not found means local policy has been deleted
181182
reqLogger.Info("Managed policy was deleted")
182183

@@ -355,32 +356,25 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ
355356

356357
return !history[i].eventTime.Before(&history[j].eventTime)
357358
}
358-
// Timestamps are the same: attempt to use the event name.
359-
// Conventionally (in client-go), the event name has a hexadecimal
360-
// nanosecond timestamp as a suffix after a period.
361-
iNameParts := strings.Split(history[i].EventName, ".")
362-
jNameParts := strings.Split(history[j].EventName, ".")
363-
errMsg := "Unable to interpret hexadecimal timestamp in event name, " +
364-
"can't guarantee ordering of events in this status"
365-
366-
iNanos, err := strconv.ParseInt(iNameParts[len(iNameParts)-1], 16, 64)
359+
360+
iTime, err := parseTimestampFromEventName(history[i].EventName)
367361
if err != nil {
368-
reqLogger.Error(err, errMsg, "eventName", history[i].EventName)
362+
reqLogger.Error(err, "Can't guarantee ordering of events in this status")
369363

370364
return false
371365
}
372366

373-
jNanos, err := strconv.ParseInt(jNameParts[len(jNameParts)-1], 16, 64)
367+
jTime, err := parseTimestampFromEventName(history[j].EventName)
374368
if err != nil {
375-
reqLogger.Error(err, errMsg, "eventName", history[j].EventName)
369+
reqLogger.Error(err, "Can't guarantee ordering of events in this status")
376370

377371
return false
378372
}
379373

380374
reqLogger.V(2).Info("Event timestamp collision, order determined by hex timestamp in name",
381375
"event1Name", history[i].EventName, "event2Name", history[j].EventName)
382376

383-
return iNanos > jNanos
377+
return iTime.After(jTime.Time)
384378
}
385379

386380
return !history[i].LastTimestamp.Time.Before(history[j].LastTimestamp.Time)
@@ -495,6 +489,19 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ
495489
return reconcile.Result{}, nil
496490
}
497491

492+
// parseTimestampFromEventName will parse the event name for a hexadecimal nanosecond timestamp as a suffix after a
493+
// period. This is a client-go convention that is repeated in the policy framework.
494+
func parseTimestampFromEventName(eventName string) (metav1.Time, error) {
495+
nameParts := strings.Split(eventName, ".")
496+
497+
nanos, err := strconv.ParseInt(nameParts[len(nameParts)-1], 16, 64)
498+
if err != nil {
499+
return metav1.Time{}, errors.New("Unable to find a valid hexadecimal timestamp in event name: " + eventName)
500+
}
501+
502+
return metav1.Unix(0, nanos), nil
503+
}
504+
498505
func parseComplianceFromMessage(message string) policiesv1.ComplianceState {
499506
cleanMsg := strings.ToLower(
500507
strings.TrimSpace(
@@ -535,10 +542,18 @@ func ceRequestFromEvent(event *corev1.Event) (utils.ComplianceAPIEventRequest, e
535542

536543
compliance := parseComplianceFromMessage(event.Message)
537544

545+
var timestamp metav1.Time
546+
547+
if timestampFromEvent, err := parseTimestampFromEventName(event.Name); err == nil {
548+
timestamp = timestampFromEvent
549+
} else {
550+
timestamp = event.LastTimestamp
551+
}
552+
538553
ce.Event = utils.ComplianceAPIEvent{
539554
Compliance: compliance,
540555
Message: strings.TrimLeft(event.Message[len(compliance):], " ;"),
541-
Timestamp: event.LastTimestamp.Format(time.RFC3339Nano),
556+
Timestamp: timestamp.Format(time.RFC3339Nano),
542557
ReportedBy: "governance-policy-framework",
543558
}
544559

@@ -571,7 +586,7 @@ func StartComplianceEventsSyncer(
571586
var clusterID string
572587

573588
idClusterClaim, err := managedClient.Resource(clusterClaimGVR).Get(ctx, "id.k8s.io", metav1.GetOptions{})
574-
if err != nil && !errors.IsNotFound(err) {
589+
if err != nil && !k8serrors.IsNotFound(err) {
575590
return err
576591
}
577592

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Copyright Contributors to the Open Cluster Management project
2+
package statussync
3+
4+
import (
5+
"testing"
6+
)
7+
8+
func TestParseTimestampFromEventName(t *testing.T) {
9+
output, err := parseTimestampFromEventName("event.17b80d88a995e12c")
10+
if err != nil {
11+
t.Errorf("Expected no error but got: %v", err)
12+
} else if output.Time.UnixNano() != 1709130939198988588 {
13+
t.Errorf("Expected 1709130939198988588 but got: %d", output.Time.UnixNano())
14+
}
15+
16+
output, err = parseTimestampFromEventName("event.with-no-timestamp")
17+
if err == nil {
18+
t.Errorf("Expected an error but got none: %s", output)
19+
}
20+
}

test/e2e/case23_compliance_api_recording_test.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,8 @@ var _ = Describe("Compliance API recording", Ordered, Label("compliance-events-a
154154
})
155155

156156
AfterAll(func(ctx context.Context) {
157-
err := server.Shutdown(ctx)
158-
Expect(err).ToNot(HaveOccurred())
159-
160157
By("Deleting a policy on hub cluster in ns:" + clusterNamespaceOnHub)
161-
_, err = kubectlHub("delete", "-f", yamlPath, "-n", clusterNamespaceOnHub, "--ignore-not-found")
158+
_, err := kubectlHub("delete", "-f", yamlPath, "-n", clusterNamespaceOnHub, "--ignore-not-found")
162159
Expect(err).ToNot(HaveOccurred())
163160
opt := metav1.ListOptions{}
164161
utils.ListWithTimeout(clientHubDynamic, gvrPolicy, opt, 0, true, defaultTimeoutSeconds)
@@ -167,6 +164,10 @@ var _ = Describe("Compliance API recording", Ordered, Label("compliance-events-a
167164
By("clean up all events")
168165
_, err = kubectlManaged("delete", "events", "-n", clusterNamespace, "--all")
169166
Expect(err).ShouldNot(HaveOccurred())
167+
168+
// Shutdown after all clean up is done in case there were some events queued up by the controllers.
169+
err = server.Shutdown(ctx)
170+
Expect(err).ToNot(HaveOccurred())
170171
})
171172

172173
AfterEach(func() {

0 commit comments

Comments
 (0)