Skip to content

Commit 5dc08d0

Browse files
committed
follow-up: implement tracing
1 parent 7f287a1 commit 5dc08d0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1132
-9
lines changed

controllers/alias.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"time"
2222

23+
oteltrace "go.opentelemetry.io/otel/trace"
2324
ctrl "sigs.k8s.io/controller-runtime"
2425
"sigs.k8s.io/controller-runtime/pkg/client"
2526
"sigs.k8s.io/controller-runtime/pkg/controller"
@@ -65,6 +66,7 @@ type MachineReconciler struct {
6566
UnstructuredCachingClient client.Client
6667
APIReader client.Reader
6768
Tracker *remote.ClusterCacheTracker
69+
TraceProvider oteltrace.TracerProvider
6870

6971
// WatchFilterValue is the label value used to filter events prior to reconciliation.
7072
WatchFilterValue string
@@ -79,6 +81,7 @@ func (r *MachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manag
7981
UnstructuredCachingClient: r.UnstructuredCachingClient,
8082
APIReader: r.APIReader,
8183
Tracker: r.Tracker,
84+
TraceProvider: r.TraceProvider,
8285
WatchFilterValue: r.WatchFilterValue,
8386
NodeDrainClientTimeout: r.NodeDrainClientTimeout,
8487
}).SetupWithManager(ctx, mgr, options)
@@ -150,6 +153,8 @@ type ClusterTopologyReconciler struct {
150153

151154
RuntimeClient runtimeclient.Client
152155

156+
TraceProvider oteltrace.TracerProvider
157+
153158
// WatchFilterValue is the label value used to filter events prior to reconciliation.
154159
WatchFilterValue string
155160

@@ -164,6 +169,7 @@ func (r *ClusterTopologyReconciler) SetupWithManager(ctx context.Context, mgr ct
164169
APIReader: r.APIReader,
165170
RuntimeClient: r.RuntimeClient,
166171
UnstructuredCachingClient: r.UnstructuredCachingClient,
172+
TraceProvider: r.TraceProvider,
167173
WatchFilterValue: r.WatchFilterValue,
168174
}).SetupWithManager(ctx, mgr, options)
169175
}

controllers/external/util.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,14 @@ import (
2828
"sigs.k8s.io/controller-runtime/pkg/client"
2929

3030
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
31+
traceutil "sigs.k8s.io/cluster-api/internal/util/trace"
3132
)
3233

3334
// Get uses the client and reference to get an external, unstructured object.
3435
func Get(ctx context.Context, c client.Reader, ref *corev1.ObjectReference, namespace string) (*unstructured.Unstructured, error) {
36+
ctx, span := traceutil.Start(ctx, "external.Get")
37+
defer span.End()
38+
3539
if ref == nil {
3640
return nil, errors.Errorf("cannot get object - object reference not set")
3741
}
@@ -48,6 +52,9 @@ func Get(ctx context.Context, c client.Reader, ref *corev1.ObjectReference, name
4852

4953
// Delete uses the client and reference to delete an external, unstructured object.
5054
func Delete(ctx context.Context, c client.Writer, ref *corev1.ObjectReference) error {
55+
ctx, span := traceutil.Start(ctx, "external.Delete")
56+
defer span.End()
57+
5158
obj := new(unstructured.Unstructured)
5259
obj.SetAPIVersion(ref.APIVersion)
5360
obj.SetKind(ref.Kind)

controllers/remote/cluster_cache_tracker.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727

2828
"github.com/go-logr/logr"
2929
"github.com/pkg/errors"
30+
oteltrace "go.opentelemetry.io/otel/trace"
3031
corev1 "k8s.io/api/core/v1"
3132
apierrors "k8s.io/apimachinery/pkg/api/errors"
3233
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -97,6 +98,8 @@ type ClusterCacheTracker struct {
9798
// This information will be used to detected if the controller is running on a workload cluster, so
9899
// that we can then access the apiserver directly.
99100
controllerPodMetadata *metav1.ObjectMeta
101+
102+
traceProvider oteltrace.TracerProvider
100103
}
101104

102105
// ClusterCacheTrackerOptions defines options to configure
@@ -121,6 +124,8 @@ type ClusterCacheTrackerOptions struct {
121124
// This is used to calculate the user agent string.
122125
// If not set, it defaults to "cluster-cache-tracker".
123126
ControllerName string
127+
128+
TraceProvider oteltrace.TracerProvider
124129
}
125130

126131
func setDefaultOptions(opts *ClusterCacheTrackerOptions) {
@@ -135,6 +140,10 @@ func setDefaultOptions(opts *ClusterCacheTrackerOptions) {
135140
&corev1.Secret{},
136141
}
137142
}
143+
144+
if opts.TraceProvider == nil {
145+
opts.TraceProvider = oteltrace.NewNoopTracerProvider()
146+
}
138147
}
139148

140149
// NewClusterCacheTracker creates a new ClusterCacheTracker.
@@ -166,6 +175,7 @@ func NewClusterCacheTracker(manager ctrl.Manager, options ClusterCacheTrackerOpt
166175
controllerPodMetadata: controllerPodMetadata,
167176
log: *options.Log,
168177
clientUncachedObjects: options.ClientUncachedObjects,
178+
traceProvider: options.TraceProvider,
169179
client: manager.GetClient(),
170180
secretCachingClient: options.SecretCachingClient,
171181
scheme: manager.GetScheme(),
@@ -294,6 +304,8 @@ func (t *ClusterCacheTracker) newClusterAccessor(ctx context.Context, cluster cl
294304
if err != nil {
295305
return nil, errors.Wrapf(err, "error fetching REST client config for remote cluster %q", cluster.String())
296306
}
307+
// FIXME: this seems to lead to problems with spans (random 10s spans in the trace)
308+
// config.Wrap(tracing.WrapperFor(t.traceProvider)) //nolint:gocritic
297309

298310
// Create a client and a cache for the cluster.
299311
c, uncachedClient, cache, err := t.createClient(ctx, config, cluster, indexes)

controlplane/kubeadm/controllers/alias.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"time"
2222

23+
"go.opentelemetry.io/otel/trace"
2324
ctrl "sigs.k8s.io/controller-runtime"
2425
"sigs.k8s.io/controller-runtime/pkg/client"
2526
"sigs.k8s.io/controller-runtime/pkg/controller"
@@ -33,6 +34,7 @@ type KubeadmControlPlaneReconciler struct {
3334
Client client.Client
3435
SecretCachingClient client.Client
3536
Tracker *remote.ClusterCacheTracker
37+
TraceProvider trace.TracerProvider
3638

3739
EtcdDialTimeout time.Duration
3840
EtcdCallTimeout time.Duration
@@ -47,6 +49,7 @@ func (r *KubeadmControlPlaneReconciler) SetupWithManager(ctx context.Context, mg
4749
Client: r.Client,
4850
SecretCachingClient: r.SecretCachingClient,
4951
Tracker: r.Tracker,
52+
TraceProvider: r.TraceProvider,
5053
EtcdDialTimeout: r.EtcdDialTimeout,
5154
EtcdCallTimeout: r.EtcdCallTimeout,
5255
WatchFilterValue: r.WatchFilterValue,

controlplane/kubeadm/internal/cluster.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3333
"sigs.k8s.io/cluster-api/controllers/remote"
3434
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
35+
traceutil "sigs.k8s.io/cluster-api/internal/util/trace"
3536
"sigs.k8s.io/cluster-api/util/collections"
3637
"sigs.k8s.io/cluster-api/util/secret"
3738
)
@@ -84,11 +85,17 @@ func (m *Management) List(ctx context.Context, list client.ObjectList, opts ...c
8485
// GetMachinesForCluster returns a list of machines that can be filtered or not.
8586
// If no filter is supplied then all machines associated with the target cluster are returned.
8687
func (m *Management) GetMachinesForCluster(ctx context.Context, cluster *clusterv1.Cluster, filters ...collections.Func) (collections.Machines, error) {
88+
ctx, span := traceutil.Start(ctx, "Management.GetMachinesForCluster")
89+
defer span.End()
90+
8791
return collections.GetFilteredMachinesForCluster(ctx, m.Client, cluster, filters...)
8892
}
8993

9094
// GetMachinePoolsForCluster returns a list of machine pools owned by the cluster.
9195
func (m *Management) GetMachinePoolsForCluster(ctx context.Context, cluster *clusterv1.Cluster) (*expv1.MachinePoolList, error) {
96+
ctx, span := traceutil.Start(ctx, "Management.GetMachinesForCluster")
97+
defer span.End()
98+
9299
selectors := []client.ListOption{
93100
client.InNamespace(cluster.GetNamespace()),
94101
client.MatchingLabels{
@@ -103,6 +110,9 @@ func (m *Management) GetMachinePoolsForCluster(ctx context.Context, cluster *clu
103110
// GetWorkloadCluster builds a cluster object.
104111
// The cluster comes with an etcd client generator to connect to any etcd pod living on a managed machine.
105112
func (m *Management) GetWorkloadCluster(ctx context.Context, clusterKey client.ObjectKey) (WorkloadCluster, error) {
113+
ctx, span := traceutil.Start(ctx, "Management.GetWorkloadCluster")
114+
defer span.End()
115+
106116
// TODO(chuckha): Inject this dependency.
107117
// TODO(chuckha): memoize this function. The workload client only exists as long as a reconciliation loop.
108118
restConfig, err := m.Tracker.GetRESTConfig(ctx, clusterKey)
@@ -178,6 +188,9 @@ func (m *Management) GetWorkloadCluster(ctx context.Context, clusterKey client.O
178188
}
179189

180190
func (m *Management) getEtcdCAKeyPair(ctx context.Context, clusterKey client.ObjectKey) ([]byte, []byte, error) {
191+
ctx, span := traceutil.Start(ctx, "Management.getEtcdCAKeyPair")
192+
defer span.End()
193+
181194
etcdCASecret := &corev1.Secret{}
182195
etcdCAObjectKey := client.ObjectKey{
183196
Namespace: clusterKey.Namespace,
@@ -207,6 +220,9 @@ func (m *Management) getEtcdCAKeyPair(ctx context.Context, clusterKey client.Obj
207220
}
208221

209222
func (m *Management) getAPIServerEtcdClientCert(ctx context.Context, clusterKey client.ObjectKey) (tls.Certificate, error) {
223+
ctx, span := traceutil.Start(ctx, "Management.getAPIServerEtcdClientCert")
224+
defer span.End()
225+
210226
apiServerEtcdClientCertificateSecret := &corev1.Secret{}
211227
apiServerEtcdClientCertificateObjectKey := client.ObjectKey{
212228
Namespace: clusterKey.Namespace,

controlplane/kubeadm/internal/control_plane.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
3131
"sigs.k8s.io/cluster-api/controllers/external"
3232
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
33+
traceutil "sigs.k8s.io/cluster-api/internal/util/trace"
3334
"sigs.k8s.io/cluster-api/util/collections"
3435
"sigs.k8s.io/cluster-api/util/failuredomains"
3536
"sigs.k8s.io/cluster-api/util/patch"
@@ -58,6 +59,9 @@ type ControlPlane struct {
5859

5960
// NewControlPlane returns an instantiated ControlPlane.
6061
func NewControlPlane(ctx context.Context, managementCluster ManagementCluster, client client.Client, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, ownedMachines collections.Machines) (*ControlPlane, error) {
62+
ctx, span := traceutil.Start(ctx, "NewControlPlane")
63+
defer span.End()
64+
6165
infraObjects, err := getInfraResources(ctx, client, ownedMachines)
6266
if err != nil {
6367
return nil, err
@@ -243,6 +247,9 @@ func (c *ControlPlane) HasUnhealthyMachine() bool {
243247

244248
// PatchMachines patches all the machines conditions.
245249
func (c *ControlPlane) PatchMachines(ctx context.Context) error {
250+
ctx, span := traceutil.Start(ctx, "ControlPlane.PatchMachines")
251+
defer span.End()
252+
246253
errList := []error{}
247254
for i := range c.Machines {
248255
machine := c.Machines[i]

controlplane/kubeadm/internal/controllers/controller.go

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323

2424
"github.com/blang/semver"
2525
"github.com/pkg/errors"
26+
oteltrace "go.opentelemetry.io/otel/trace"
2627
corev1 "k8s.io/api/core/v1"
2728
apierrors "k8s.io/apimachinery/pkg/api/errors"
2829
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -46,6 +47,7 @@ import (
4647
"sigs.k8s.io/cluster-api/feature"
4748
"sigs.k8s.io/cluster-api/internal/contract"
4849
"sigs.k8s.io/cluster-api/internal/util/ssa"
50+
traceutil "sigs.k8s.io/cluster-api/internal/util/trace"
4951
"sigs.k8s.io/cluster-api/util"
5052
"sigs.k8s.io/cluster-api/util/annotations"
5153
"sigs.k8s.io/cluster-api/util/collections"
@@ -75,6 +77,7 @@ type KubeadmControlPlaneReconciler struct {
7577
controller controller.Controller
7678
recorder record.EventRecorder
7779
Tracker *remote.ClusterCacheTracker
80+
TraceProvider oteltrace.TracerProvider
7881

7982
EtcdDialTimeout time.Duration
8083
EtcdCallTimeout time.Duration
@@ -94,6 +97,10 @@ type KubeadmControlPlaneReconciler struct {
9497
}
9598

9699
func (r *KubeadmControlPlaneReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
100+
if r.TraceProvider == nil {
101+
r.TraceProvider = oteltrace.NewNoopTracerProvider()
102+
}
103+
tr := traceutil.Reconciler(r, r.TraceProvider, "kubeadmcontrolplane", "KubeadmControlPlane")
97104
c, err := ctrl.NewControllerManagedBy(mgr).
98105
For(&controlplanev1.KubeadmControlPlane{}).
99106
Owns(&clusterv1.Machine{}).
@@ -108,7 +115,7 @@ func (r *KubeadmControlPlaneReconciler) SetupWithManager(ctx context.Context, mg
108115
predicates.ClusterUnpausedAndInfrastructureReady(ctrl.LoggerFrom(ctx)),
109116
),
110117
),
111-
).Build(r)
118+
).Build(tr)
112119
if err != nil {
113120
return errors.Wrap(err, "failed setting up with a controller manager")
114121
}
@@ -300,6 +307,9 @@ func (r *KubeadmControlPlaneReconciler) initControlPlaneScope(ctx context.Contex
300307
}
301308

302309
func patchKubeadmControlPlane(ctx context.Context, patchHelper *patch.Helper, kcp *controlplanev1.KubeadmControlPlane) error {
310+
ctx, span := traceutil.Start(ctx, "patchKubeadmControlPlane")
311+
defer span.End()
312+
303313
// Always update the readyCondition by summarizing the state of other conditions.
304314
conditions.SetSummary(kcp,
305315
conditions.WithConditions(
@@ -331,6 +341,9 @@ func patchKubeadmControlPlane(ctx context.Context, patchHelper *patch.Helper, kc
331341

332342
// reconcile handles KubeadmControlPlane reconciliation.
333343
func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, controlPlane *internal.ControlPlane) (res ctrl.Result, reterr error) {
344+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.reconcile")
345+
defer span.End()
346+
334347
log := ctrl.LoggerFrom(ctx)
335348
log.Info("Reconcile KubeadmControlPlane")
336349

@@ -503,6 +516,9 @@ func (r *KubeadmControlPlaneReconciler) reconcileClusterCertificates(ctx context
503516
// The implementation does not take non-control plane workloads into consideration. This may or may not change in the future.
504517
// Please see https://github.com/kubernetes-sigs/cluster-api/issues/2064.
505518
func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
519+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.reconcileDelete")
520+
defer span.End()
521+
506522
log := ctrl.LoggerFrom(ctx)
507523
log.Info("Reconcile KubeadmControlPlane deletion")
508524

@@ -590,6 +606,9 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(_ context.C
590606
// Otherwise, fields would be co-owned by our "old" "manager" and "capi-kubeadmcontrolplane" and then we would not be
591607
// able to e.g. drop labels and annotations.
592608
func (r *KubeadmControlPlaneReconciler) syncMachines(ctx context.Context, controlPlane *internal.ControlPlane) error {
609+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.syncMachines")
610+
defer span.End()
611+
593612
patchHelpers := map[string]*patch.Helper{}
594613
for machineName := range controlPlane.Machines {
595614
m := controlPlane.Machines[machineName]
@@ -673,6 +692,9 @@ func (r *KubeadmControlPlaneReconciler) syncMachines(ctx context.Context, contro
673692
// reconcileControlPlaneConditions is responsible of reconciling conditions reporting the status of static pods and
674693
// the status of the etcd cluster.
675694
func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneConditions(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
695+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.reconcileControlPlaneConditions")
696+
defer span.End()
697+
676698
// If the cluster is not yet initialized, there is no way to connect to the workload cluster and fetch information
677699
// for updating conditions. Return early.
678700
if !controlPlane.KCP.Status.Initialized {
@@ -702,6 +724,9 @@ func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneConditions(ctx cont
702724
//
703725
// NOTE: this func uses KCP conditions, it is required to call reconcileControlPlaneConditions before this.
704726
func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
727+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.reconcileEtcdMembers")
728+
defer span.End()
729+
705730
log := ctrl.LoggerFrom(ctx)
706731

707732
// If etcd is not managed by KCP this is a no-op.
@@ -754,6 +779,9 @@ func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context
754779
}
755780

756781
func (r *KubeadmControlPlaneReconciler) reconcileCertificateExpiries(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
782+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.reconcileCertificateExpiries")
783+
defer span.End()
784+
757785
log := ctrl.LoggerFrom(ctx)
758786

759787
// Return if there are no KCP-owned control-plane machines.
@@ -824,6 +852,9 @@ func (r *KubeadmControlPlaneReconciler) reconcileCertificateExpiries(ctx context
824852
}
825853

826854
func (r *KubeadmControlPlaneReconciler) adoptMachines(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, machines collections.Machines, cluster *clusterv1.Cluster) error {
855+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.adoptMachines")
856+
defer span.End()
857+
827858
// We do an uncached full quorum read against the KCP to avoid re-adopting Machines the garbage collector just intentionally orphaned
828859
// See https://github.com/kubernetes/kubernetes/issues/42639
829860
uncached := controlplanev1.KubeadmControlPlane{}
@@ -901,6 +932,9 @@ func (r *KubeadmControlPlaneReconciler) adoptMachines(ctx context.Context, kcp *
901932
}
902933

903934
func (r *KubeadmControlPlaneReconciler) adoptOwnedSecrets(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, currentOwner *bootstrapv1.KubeadmConfig, clusterName string) error {
935+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.adoptOwnedSecrets")
936+
defer span.End()
937+
904938
secrets := corev1.SecretList{}
905939
if err := r.Client.List(ctx, &secrets, client.InNamespace(kcp.Namespace), client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}); err != nil {
906940
return errors.Wrap(err, "error finding secrets for adoption")
@@ -937,6 +971,8 @@ func (r *KubeadmControlPlaneReconciler) adoptOwnedSecrets(ctx context.Context, k
937971

938972
// ensureCertificatesOwnerRef ensures an ownerReference to the owner is added on the Secrets holding certificates.
939973
func (r *KubeadmControlPlaneReconciler) ensureCertificatesOwnerRef(ctx context.Context, certificates secret.Certificates, owner metav1.OwnerReference) error {
974+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.ensureCertificatesOwnerRef")
975+
defer span.End()
940976
for _, c := range certificates {
941977
if c.Secret == nil {
942978
continue

0 commit comments

Comments
 (0)