Skip to content

Commit 9fde59c

Browse files
committed
follow-up: implement tracing
1 parent 27d6257 commit 9fde59c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1132
-9
lines changed

controllers/alias.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"time"
2222

23+
oteltrace "go.opentelemetry.io/otel/trace"
2324
ctrl "sigs.k8s.io/controller-runtime"
2425
"sigs.k8s.io/controller-runtime/pkg/client"
2526
"sigs.k8s.io/controller-runtime/pkg/controller"
@@ -65,6 +66,7 @@ type MachineReconciler struct {
6566
UnstructuredCachingClient client.Client
6667
APIReader client.Reader
6768
Tracker *remote.ClusterCacheTracker
69+
TraceProvider oteltrace.TracerProvider
6870

6971
// WatchFilterValue is the label value used to filter events prior to reconciliation.
7072
WatchFilterValue string
@@ -79,6 +81,7 @@ func (r *MachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manag
7981
UnstructuredCachingClient: r.UnstructuredCachingClient,
8082
APIReader: r.APIReader,
8183
Tracker: r.Tracker,
84+
TraceProvider: r.TraceProvider,
8285
WatchFilterValue: r.WatchFilterValue,
8386
NodeDrainClientTimeout: r.NodeDrainClientTimeout,
8487
}).SetupWithManager(ctx, mgr, options)
@@ -150,6 +153,8 @@ type ClusterTopologyReconciler struct {
150153

151154
RuntimeClient runtimeclient.Client
152155

156+
TraceProvider oteltrace.TracerProvider
157+
153158
// WatchFilterValue is the label value used to filter events prior to reconciliation.
154159
WatchFilterValue string
155160

@@ -164,6 +169,7 @@ func (r *ClusterTopologyReconciler) SetupWithManager(ctx context.Context, mgr ct
164169
APIReader: r.APIReader,
165170
RuntimeClient: r.RuntimeClient,
166171
UnstructuredCachingClient: r.UnstructuredCachingClient,
172+
TraceProvider: r.TraceProvider,
167173
WatchFilterValue: r.WatchFilterValue,
168174
}).SetupWithManager(ctx, mgr, options)
169175
}

controllers/external/util.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,14 @@ import (
2828
"sigs.k8s.io/controller-runtime/pkg/client"
2929

3030
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
31+
traceutil "sigs.k8s.io/cluster-api/internal/util/trace"
3132
)
3233

3334
// Get uses the client and reference to get an external, unstructured object.
3435
func Get(ctx context.Context, c client.Reader, ref *corev1.ObjectReference, namespace string) (*unstructured.Unstructured, error) {
36+
ctx, span := traceutil.Start(ctx, "external.Get")
37+
defer span.End()
38+
3539
if ref == nil {
3640
return nil, errors.Errorf("cannot get object - object reference not set")
3741
}
@@ -48,6 +52,9 @@ func Get(ctx context.Context, c client.Reader, ref *corev1.ObjectReference, name
4852

4953
// Delete uses the client and reference to delete an external, unstructured object.
5054
func Delete(ctx context.Context, c client.Writer, ref *corev1.ObjectReference) error {
55+
ctx, span := traceutil.Start(ctx, "external.Delete")
56+
defer span.End()
57+
5158
obj := new(unstructured.Unstructured)
5259
obj.SetAPIVersion(ref.APIVersion)
5360
obj.SetKind(ref.Kind)

controllers/remote/cluster_cache_tracker.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727

2828
"github.com/go-logr/logr"
2929
"github.com/pkg/errors"
30+
oteltrace "go.opentelemetry.io/otel/trace"
3031
corev1 "k8s.io/api/core/v1"
3132
apierrors "k8s.io/apimachinery/pkg/api/errors"
3233
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -97,6 +98,8 @@ type ClusterCacheTracker struct {
9798
// This information will be used to detected if the controller is running on a workload cluster, so
9899
// that we can then access the apiserver directly.
99100
controllerPodMetadata *metav1.ObjectMeta
101+
102+
traceProvider oteltrace.TracerProvider
100103
}
101104

102105
// ClusterCacheTrackerOptions defines options to configure
@@ -121,6 +124,8 @@ type ClusterCacheTrackerOptions struct {
121124
// This is used to calculate the user agent string.
122125
// If not set, it defaults to "cluster-cache-tracker".
123126
ControllerName string
127+
128+
TraceProvider oteltrace.TracerProvider
124129
}
125130

126131
func setDefaultOptions(opts *ClusterCacheTrackerOptions) {
@@ -135,6 +140,10 @@ func setDefaultOptions(opts *ClusterCacheTrackerOptions) {
135140
&corev1.Secret{},
136141
}
137142
}
143+
144+
if opts.TraceProvider == nil {
145+
opts.TraceProvider = oteltrace.NewNoopTracerProvider()
146+
}
138147
}
139148

140149
// NewClusterCacheTracker creates a new ClusterCacheTracker.
@@ -166,6 +175,7 @@ func NewClusterCacheTracker(manager ctrl.Manager, options ClusterCacheTrackerOpt
166175
controllerPodMetadata: controllerPodMetadata,
167176
log: *options.Log,
168177
clientUncachedObjects: options.ClientUncachedObjects,
178+
traceProvider: options.TraceProvider,
169179
client: manager.GetClient(),
170180
secretCachingClient: options.SecretCachingClient,
171181
scheme: manager.GetScheme(),
@@ -294,6 +304,8 @@ func (t *ClusterCacheTracker) newClusterAccessor(ctx context.Context, cluster cl
294304
if err != nil {
295305
return nil, errors.Wrapf(err, "error fetching REST client config for remote cluster %q", cluster.String())
296306
}
307+
// FIXME: this seems to lead to problems with spans (random 10s spans in the trace)
308+
// config.Wrap(tracing.WrapperFor(t.traceProvider)) //nolint:gocritic
297309

298310
// Create a client and a cache for the cluster.
299311
c, uncachedClient, cache, err := t.createClient(ctx, config, cluster, indexes)

controlplane/kubeadm/controllers/alias.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"time"
2222

23+
"go.opentelemetry.io/otel/trace"
2324
ctrl "sigs.k8s.io/controller-runtime"
2425
"sigs.k8s.io/controller-runtime/pkg/client"
2526
"sigs.k8s.io/controller-runtime/pkg/controller"
@@ -33,6 +34,7 @@ type KubeadmControlPlaneReconciler struct {
3334
Client client.Client
3435
SecretCachingClient client.Client
3536
Tracker *remote.ClusterCacheTracker
37+
TraceProvider trace.TracerProvider
3638

3739
EtcdDialTimeout time.Duration
3840
EtcdCallTimeout time.Duration
@@ -47,6 +49,7 @@ func (r *KubeadmControlPlaneReconciler) SetupWithManager(ctx context.Context, mg
4749
Client: r.Client,
4850
SecretCachingClient: r.SecretCachingClient,
4951
Tracker: r.Tracker,
52+
TraceProvider: r.TraceProvider,
5053
EtcdDialTimeout: r.EtcdDialTimeout,
5154
EtcdCallTimeout: r.EtcdCallTimeout,
5255
WatchFilterValue: r.WatchFilterValue,

controlplane/kubeadm/internal/cluster.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3333
"sigs.k8s.io/cluster-api/controllers/remote"
3434
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
35+
traceutil "sigs.k8s.io/cluster-api/internal/util/trace"
3536
"sigs.k8s.io/cluster-api/util/collections"
3637
"sigs.k8s.io/cluster-api/util/secret"
3738
)
@@ -84,11 +85,17 @@ func (m *Management) List(ctx context.Context, list client.ObjectList, opts ...c
8485
// GetMachinesForCluster returns a list of machines that can be filtered or not.
8586
// If no filter is supplied then all machines associated with the target cluster are returned.
8687
func (m *Management) GetMachinesForCluster(ctx context.Context, cluster *clusterv1.Cluster, filters ...collections.Func) (collections.Machines, error) {
88+
ctx, span := traceutil.Start(ctx, "Management.GetMachinesForCluster")
89+
defer span.End()
90+
8791
return collections.GetFilteredMachinesForCluster(ctx, m.Client, cluster, filters...)
8892
}
8993

9094
// GetMachinePoolsForCluster returns a list of machine pools owned by the cluster.
9195
func (m *Management) GetMachinePoolsForCluster(ctx context.Context, cluster *clusterv1.Cluster) (*expv1.MachinePoolList, error) {
96+
ctx, span := traceutil.Start(ctx, "Management.GetMachinesForCluster")
97+
defer span.End()
98+
9299
selectors := []client.ListOption{
93100
client.InNamespace(cluster.GetNamespace()),
94101
client.MatchingLabels{
@@ -103,6 +110,9 @@ func (m *Management) GetMachinePoolsForCluster(ctx context.Context, cluster *clu
103110
// GetWorkloadCluster builds a cluster object.
104111
// The cluster comes with an etcd client generator to connect to any etcd pod living on a managed machine.
105112
func (m *Management) GetWorkloadCluster(ctx context.Context, clusterKey client.ObjectKey) (WorkloadCluster, error) {
113+
ctx, span := traceutil.Start(ctx, "Management.GetWorkloadCluster")
114+
defer span.End()
115+
106116
// TODO(chuckha): Inject this dependency.
107117
// TODO(chuckha): memoize this function. The workload client only exists as long as a reconciliation loop.
108118
restConfig, err := m.Tracker.GetRESTConfig(ctx, clusterKey)
@@ -178,6 +188,9 @@ func (m *Management) GetWorkloadCluster(ctx context.Context, clusterKey client.O
178188
}
179189

180190
func (m *Management) getEtcdCAKeyPair(ctx context.Context, clusterKey client.ObjectKey) ([]byte, []byte, error) {
191+
ctx, span := traceutil.Start(ctx, "Management.getEtcdCAKeyPair")
192+
defer span.End()
193+
181194
etcdCASecret := &corev1.Secret{}
182195
etcdCAObjectKey := client.ObjectKey{
183196
Namespace: clusterKey.Namespace,
@@ -207,6 +220,9 @@ func (m *Management) getEtcdCAKeyPair(ctx context.Context, clusterKey client.Obj
207220
}
208221

209222
func (m *Management) getAPIServerEtcdClientCert(ctx context.Context, clusterKey client.ObjectKey) (tls.Certificate, error) {
223+
ctx, span := traceutil.Start(ctx, "Management.getAPIServerEtcdClientCert")
224+
defer span.End()
225+
210226
apiServerEtcdClientCertificateSecret := &corev1.Secret{}
211227
apiServerEtcdClientCertificateObjectKey := client.ObjectKey{
212228
Namespace: clusterKey.Namespace,

controlplane/kubeadm/internal/control_plane.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
3131
"sigs.k8s.io/cluster-api/controllers/external"
3232
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
33+
traceutil "sigs.k8s.io/cluster-api/internal/util/trace"
3334
"sigs.k8s.io/cluster-api/util/collections"
3435
"sigs.k8s.io/cluster-api/util/failuredomains"
3536
"sigs.k8s.io/cluster-api/util/patch"
@@ -58,6 +59,9 @@ type ControlPlane struct {
5859

5960
// NewControlPlane returns an instantiated ControlPlane.
6061
func NewControlPlane(ctx context.Context, managementCluster ManagementCluster, client client.Client, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, ownedMachines collections.Machines) (*ControlPlane, error) {
62+
ctx, span := traceutil.Start(ctx, "NewControlPlane")
63+
defer span.End()
64+
6165
infraObjects, err := getInfraResources(ctx, client, ownedMachines)
6266
if err != nil {
6367
return nil, err
@@ -255,6 +259,9 @@ func (c *ControlPlane) HasUnhealthyMachine() bool {
255259

256260
// PatchMachines patches all the machines conditions.
257261
func (c *ControlPlane) PatchMachines(ctx context.Context) error {
262+
ctx, span := traceutil.Start(ctx, "ControlPlane.PatchMachines")
263+
defer span.End()
264+
258265
errList := []error{}
259266
for i := range c.Machines {
260267
machine := c.Machines[i]

controlplane/kubeadm/internal/controllers/controller.go

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424

2525
"github.com/blang/semver"
2626
"github.com/pkg/errors"
27+
oteltrace "go.opentelemetry.io/otel/trace"
2728
corev1 "k8s.io/api/core/v1"
2829
apierrors "k8s.io/apimachinery/pkg/api/errors"
2930
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -47,6 +48,7 @@ import (
4748
"sigs.k8s.io/cluster-api/feature"
4849
"sigs.k8s.io/cluster-api/internal/contract"
4950
"sigs.k8s.io/cluster-api/internal/util/ssa"
51+
traceutil "sigs.k8s.io/cluster-api/internal/util/trace"
5052
"sigs.k8s.io/cluster-api/util"
5153
"sigs.k8s.io/cluster-api/util/annotations"
5254
"sigs.k8s.io/cluster-api/util/collections"
@@ -76,6 +78,7 @@ type KubeadmControlPlaneReconciler struct {
7678
controller controller.Controller
7779
recorder record.EventRecorder
7880
Tracker *remote.ClusterCacheTracker
81+
TraceProvider oteltrace.TracerProvider
7982

8083
EtcdDialTimeout time.Duration
8184
EtcdCallTimeout time.Duration
@@ -95,6 +98,10 @@ type KubeadmControlPlaneReconciler struct {
9598
}
9699

97100
func (r *KubeadmControlPlaneReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
101+
if r.TraceProvider == nil {
102+
r.TraceProvider = oteltrace.NewNoopTracerProvider()
103+
}
104+
tr := traceutil.Reconciler(r, r.TraceProvider, "kubeadmcontrolplane", "KubeadmControlPlane")
98105
c, err := ctrl.NewControllerManagedBy(mgr).
99106
For(&controlplanev1.KubeadmControlPlane{}).
100107
Owns(&clusterv1.Machine{}).
@@ -109,7 +116,7 @@ func (r *KubeadmControlPlaneReconciler) SetupWithManager(ctx context.Context, mg
109116
predicates.ClusterUnpausedAndInfrastructureReady(ctrl.LoggerFrom(ctx)),
110117
),
111118
),
112-
).Build(r)
119+
).Build(tr)
113120
if err != nil {
114121
return errors.Wrap(err, "failed setting up with a controller manager")
115122
}
@@ -301,6 +308,9 @@ func (r *KubeadmControlPlaneReconciler) initControlPlaneScope(ctx context.Contex
301308
}
302309

303310
func patchKubeadmControlPlane(ctx context.Context, patchHelper *patch.Helper, kcp *controlplanev1.KubeadmControlPlane) error {
311+
ctx, span := traceutil.Start(ctx, "patchKubeadmControlPlane")
312+
defer span.End()
313+
304314
// Always update the readyCondition by summarizing the state of other conditions.
305315
conditions.SetSummary(kcp,
306316
conditions.WithConditions(
@@ -332,6 +342,9 @@ func patchKubeadmControlPlane(ctx context.Context, patchHelper *patch.Helper, kc
332342

333343
// reconcile handles KubeadmControlPlane reconciliation.
334344
func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, controlPlane *internal.ControlPlane) (res ctrl.Result, reterr error) {
345+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.reconcile")
346+
defer span.End()
347+
335348
log := ctrl.LoggerFrom(ctx)
336349
log.Info("Reconcile KubeadmControlPlane")
337350

@@ -507,6 +520,9 @@ func (r *KubeadmControlPlaneReconciler) reconcileClusterCertificates(ctx context
507520
// The implementation does not take non-control plane workloads into consideration. This may or may not change in the future.
508521
// Please see https://github.com/kubernetes-sigs/cluster-api/issues/2064.
509522
func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
523+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.reconcileDelete")
524+
defer span.End()
525+
510526
log := ctrl.LoggerFrom(ctx)
511527
log.Info("Reconcile KubeadmControlPlane deletion")
512528

@@ -594,6 +610,9 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(_ context.C
594610
// Otherwise, fields would be co-owned by our "old" "manager" and "capi-kubeadmcontrolplane" and then we would not be
595611
// able to e.g. drop labels and annotations.
596612
func (r *KubeadmControlPlaneReconciler) syncMachines(ctx context.Context, controlPlane *internal.ControlPlane) error {
613+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.syncMachines")
614+
defer span.End()
615+
597616
patchHelpers := map[string]*patch.Helper{}
598617
for machineName := range controlPlane.Machines {
599618
m := controlPlane.Machines[machineName]
@@ -677,6 +696,9 @@ func (r *KubeadmControlPlaneReconciler) syncMachines(ctx context.Context, contro
677696
// reconcileControlPlaneConditions is responsible of reconciling conditions reporting the status of static pods and
678697
// the status of the etcd cluster.
679698
func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneConditions(ctx context.Context, controlPlane *internal.ControlPlane) error {
699+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.reconcileControlPlaneConditions")
700+
defer span.End()
701+
680702
// If the cluster is not yet initialized, there is no way to connect to the workload cluster and fetch information
681703
// for updating conditions. Return early.
682704
if !controlPlane.KCP.Status.Initialized {
@@ -706,6 +728,9 @@ func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneConditions(ctx cont
706728
//
707729
// NOTE: this func uses KCP conditions, it is required to call reconcileControlPlaneConditions before this.
708730
func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context, controlPlane *internal.ControlPlane) error {
731+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.reconcileEtcdMembers")
732+
defer span.End()
733+
709734
log := ctrl.LoggerFrom(ctx)
710735

711736
// If etcd is not managed by KCP this is a no-op.
@@ -758,6 +783,9 @@ func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context
758783
}
759784

760785
func (r *KubeadmControlPlaneReconciler) reconcileCertificateExpiries(ctx context.Context, controlPlane *internal.ControlPlane) error {
786+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.reconcileCertificateExpiries")
787+
defer span.End()
788+
761789
log := ctrl.LoggerFrom(ctx)
762790

763791
// Return if there are no KCP-owned control-plane machines.
@@ -828,6 +856,9 @@ func (r *KubeadmControlPlaneReconciler) reconcileCertificateExpiries(ctx context
828856
}
829857

830858
func (r *KubeadmControlPlaneReconciler) adoptMachines(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, machines collections.Machines, cluster *clusterv1.Cluster) error {
859+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.adoptMachines")
860+
defer span.End()
861+
831862
// We do an uncached full quorum read against the KCP to avoid re-adopting Machines the garbage collector just intentionally orphaned
832863
// See https://github.com/kubernetes/kubernetes/issues/42639
833864
uncached := controlplanev1.KubeadmControlPlane{}
@@ -905,6 +936,9 @@ func (r *KubeadmControlPlaneReconciler) adoptMachines(ctx context.Context, kcp *
905936
}
906937

907938
func (r *KubeadmControlPlaneReconciler) adoptOwnedSecrets(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, currentOwner *bootstrapv1.KubeadmConfig, clusterName string) error {
939+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.adoptOwnedSecrets")
940+
defer span.End()
941+
908942
secrets := corev1.SecretList{}
909943
if err := r.Client.List(ctx, &secrets, client.InNamespace(kcp.Namespace), client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}); err != nil {
910944
return errors.Wrap(err, "error finding secrets for adoption")
@@ -941,6 +975,8 @@ func (r *KubeadmControlPlaneReconciler) adoptOwnedSecrets(ctx context.Context, k
941975

942976
// ensureCertificatesOwnerRef ensures an ownerReference to the owner is added on the Secrets holding certificates.
943977
func (r *KubeadmControlPlaneReconciler) ensureCertificatesOwnerRef(ctx context.Context, certificates secret.Certificates, owner metav1.OwnerReference) error {
978+
ctx, span := traceutil.Start(ctx, "kubeadmcontrolplane.Reconciler.ensureCertificatesOwnerRef")
979+
defer span.End()
944980
for _, c := range certificates {
945981
if c.Secret == nil {
946982
continue

0 commit comments

Comments
 (0)