diff --git a/azure/defaults.go b/azure/defaults.go index f35e08c0658..0f86619c7d1 100644 --- a/azure/defaults.go +++ b/azure/defaults.go @@ -17,7 +17,6 @@ limitations under the License. package azure import ( - "context" "fmt" "net/http" "regexp" @@ -28,8 +27,8 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5" "github.com/Azure/azure-sdk-for-go/sdk/tracing/azotel" + "go.opentelemetry.io/otel" - "sigs.k8s.io/cluster-api-provider-azure/pkg/ot" "sigs.k8s.io/cluster-api-provider-azure/util/tele" "sigs.k8s.io/cluster-api-provider-azure/version" ) @@ -380,11 +379,7 @@ func ARMClientOptions(azureEnvironment string, extraPolicies ...policy.Policy) ( opts.PerCallPolicies = append(opts.PerCallPolicies, extraPolicies...) opts.Retry.MaxRetries = -1 // Less than zero means one try and no retries. - otelTP, err := ot.OTLPTracerProvider(context.TODO()) - if err != nil { - return nil, err - } - opts.TracingProvider = azotel.NewTracingProvider(otelTP, nil) + opts.TracingProvider = azotel.NewTracingProvider(otel.GetTracerProvider(), nil) return opts, nil } diff --git a/azure/scope/identity.go b/azure/scope/identity.go index 5df61a59e5f..008252d76ea 100644 --- a/azure/scope/identity.go +++ b/azure/scope/identity.go @@ -27,6 +27,7 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/azidentity" "github.com/Azure/azure-sdk-for-go/sdk/tracing/azotel" "github.com/pkg/errors" + "go.opentelemetry.io/otel" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -34,7 +35,6 @@ import ( infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" "sigs.k8s.io/cluster-api-provider-azure/azure" - "sigs.k8s.io/cluster-api-provider-azure/pkg/ot" "sigs.k8s.io/cluster-api-provider-azure/util/tele" ) @@ -90,11 +90,7 @@ func (p *AzureCredentialsProvider) GetTokenCredential(ctx context.Context, resou var authErr error var cred azcore.TokenCredential - otelTP, err := ot.OTLPTracerProvider(ctx) - if err != nil { - return nil, err - } - tracingProvider := azotel.NewTracingProvider(otelTP, nil) + tracingProvider := azotel.NewTracingProvider(otel.GetTracerProvider(), nil) switch p.Identity.Spec.Type { case infrav1.WorkloadIdentity: @@ -134,6 +130,7 @@ func (p *AzureCredentialsProvider) GetTokenCredential(ctx context.Context, resou case infrav1.ServicePrincipalCertificate: var certsContent []byte if p.Identity.Spec.CertPath != "" { + var err error certsContent, err = os.ReadFile(p.Identity.Spec.CertPath) if err != nil { return nil, errors.Wrap(err, "failed to read certificate file") diff --git a/azure/scope/machinepool.go b/azure/scope/machinepool.go index c636bfb982b..ff195d8f831 100644 --- a/azure/scope/machinepool.go +++ b/azure/scope/machinepool.go @@ -615,19 +615,20 @@ func (m *MachinePoolScope) setProvisioningStateAndConditions(v infrav1.Provision } else { conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetDesiredReplicasCondition, infrav1.ScaleSetScaleDownReason, clusterv1.ConditionSeverityInfo, "") } - m.SetNotReady() + m.SetReady() case v == infrav1.Updating: conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetModelUpdatedCondition, infrav1.ScaleSetModelOutOfDateReason, clusterv1.ConditionSeverityInfo, "") - m.SetNotReady() + m.SetReady() case v == infrav1.Creating: conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetRunningCondition, infrav1.ScaleSetCreatingReason, clusterv1.ConditionSeverityInfo, "") m.SetNotReady() case v == infrav1.Deleting: conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetRunningCondition, infrav1.ScaleSetDeletingReason, clusterv1.ConditionSeverityInfo, "") m.SetNotReady() + case v == infrav1.Failed: + conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetRunningCondition, infrav1.ScaleSetProvisionFailedReason, clusterv1.ConditionSeverityInfo, "") default: conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetRunningCondition, string(v), clusterv1.ConditionSeverityInfo, "") - m.SetNotReady() } } @@ -745,6 +746,9 @@ func (m *MachinePoolScope) GetBootstrapData(ctx context.Context) (string, error) // calculateBootstrapDataHash calculates the sha256 hash of the bootstrap data. func (m *MachinePoolScope) calculateBootstrapDataHash(_ context.Context) (string, error) { + if m.cache == nil { + return "", fmt.Errorf("machinepool cache is nil") + } bootstrapData := m.cache.BootstrapData h := sha256.New() n, err := io.WriteString(h, bootstrapData) diff --git a/azure/scope/machinepool_test.go b/azure/scope/machinepool_test.go index 12f8d8ef892..40c4cbf22f3 100644 --- a/azure/scope/machinepool_test.go +++ b/azure/scope/machinepool_test.go @@ -40,6 +40,7 @@ import ( "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -1577,6 +1578,179 @@ func TestMachinePoolScope_applyAzureMachinePoolMachines(t *testing.T) { } } +func TestMachinePoolScope_setProvisioningStateAndConditions(t *testing.T) { + scheme := runtime.NewScheme() + _ = clusterv1.AddToScheme(scheme) + _ = infrav1exp.AddToScheme(scheme) + + tests := []struct { + Name string + Setup func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, cb *fake.ClientBuilder) + Verify func(g *WithT, amp *infrav1exp.AzureMachinePool, c client.Client) + ProvisioningState infrav1.ProvisioningState + }{ + { + Name: "if provisioning state is set to Succeeded and replicas match, MachinePool is ready and conditions match", + Setup: func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, cb *fake.ClientBuilder) { + mp.Spec.Replicas = ptr.To[int32](1) + amp.Status.Replicas = 1 + }, + Verify: func(g *WithT, amp *infrav1exp.AzureMachinePool, c client.Client) { + g.Expect(amp.Status.Ready).To(BeTrue()) + g.Expect(conditions.Get(amp, infrav1.ScaleSetRunningCondition).Status).To(Equal(corev1.ConditionTrue)) + g.Expect(conditions.Get(amp, infrav1.ScaleSetModelUpdatedCondition).Status).To(Equal(corev1.ConditionTrue)) + g.Expect(conditions.Get(amp, infrav1.ScaleSetDesiredReplicasCondition).Status).To(Equal(corev1.ConditionTrue)) + }, + ProvisioningState: infrav1.Succeeded, + }, + { + Name: "if provisioning state is set to Succeeded and replicas are higher on AzureMachinePool, MachinePool is ready and ScalingDown", + Setup: func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, cb *fake.ClientBuilder) { + mp.Spec.Replicas = ptr.To[int32](1) + amp.Status.Replicas = 2 + }, + Verify: func(g *WithT, amp *infrav1exp.AzureMachinePool, c client.Client) { + g.Expect(amp.Status.Ready).To(BeTrue()) + condition := conditions.Get(amp, infrav1.ScaleSetDesiredReplicasCondition) + g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) + g.Expect(condition.Reason).To(Equal(infrav1.ScaleSetScaleDownReason)) + }, + ProvisioningState: infrav1.Succeeded, + }, + { + Name: "if provisioning state is set to Succeeded and replicas are lower on AzureMachinePool, MachinePool is ready and ScalingUp", + Setup: func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, cb *fake.ClientBuilder) { + mp.Spec.Replicas = ptr.To[int32](2) + amp.Status.Replicas = 1 + }, + Verify: func(g *WithT, amp *infrav1exp.AzureMachinePool, c client.Client) { + g.Expect(amp.Status.Ready).To(BeTrue()) + condition := conditions.Get(amp, infrav1.ScaleSetDesiredReplicasCondition) + g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) + g.Expect(condition.Reason).To(Equal(infrav1.ScaleSetScaleUpReason)) + }, + ProvisioningState: infrav1.Succeeded, + }, + { + Name: "if provisioning state is set to Updating, MachinePool is ready and scale set model is set to OutOfDate", + Setup: func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, cb *fake.ClientBuilder) {}, + Verify: func(g *WithT, amp *infrav1exp.AzureMachinePool, c client.Client) { + g.Expect(amp.Status.Ready).To(BeTrue()) + condition := conditions.Get(amp, infrav1.ScaleSetModelUpdatedCondition) + g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) + g.Expect(condition.Reason).To(Equal(infrav1.ScaleSetModelOutOfDateReason)) + }, + ProvisioningState: infrav1.Updating, + }, + { + Name: "if provisioning state is set to Creating, MachinePool is NotReady and scale set running condition is set to Creating", + Setup: func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, cb *fake.ClientBuilder) {}, + Verify: func(g *WithT, amp *infrav1exp.AzureMachinePool, c client.Client) { + g.Expect(amp.Status.Ready).To(BeFalse()) + condition := conditions.Get(amp, infrav1.ScaleSetRunningCondition) + g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) + g.Expect(condition.Reason).To(Equal(infrav1.ScaleSetCreatingReason)) + }, + ProvisioningState: infrav1.Creating, + }, + { + Name: "if provisioning state is set to Deleting, MachinePool is NotReady and scale set running condition is set to Deleting", + Setup: func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, cb *fake.ClientBuilder) {}, + Verify: func(g *WithT, amp *infrav1exp.AzureMachinePool, c client.Client) { + g.Expect(amp.Status.Ready).To(BeFalse()) + condition := conditions.Get(amp, infrav1.ScaleSetRunningCondition) + g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) + g.Expect(condition.Reason).To(Equal(infrav1.ScaleSetDeletingReason)) + }, + ProvisioningState: infrav1.Deleting, + }, + { + Name: "if provisioning state is set to Failed, MachinePool ready state is not adjusted, and scale set running condition is set to Failed", + Setup: func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, cb *fake.ClientBuilder) {}, + Verify: func(g *WithT, amp *infrav1exp.AzureMachinePool, c client.Client) { + condition := conditions.Get(amp, infrav1.ScaleSetRunningCondition) + g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) + g.Expect(condition.Reason).To(Equal(infrav1.ScaleSetProvisionFailedReason)) + }, + ProvisioningState: infrav1.Failed, + }, + { + Name: "if provisioning state is set to something not explicitly handled, MachinePool ready state is not adjusted, and scale set running condition is set to the ProvisioningState", + Setup: func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, cb *fake.ClientBuilder) {}, + Verify: func(g *WithT, amp *infrav1exp.AzureMachinePool, c client.Client) { + condition := conditions.Get(amp, infrav1.ScaleSetRunningCondition) + g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) + g.Expect(condition.Reason).To(Equal(string(infrav1.Migrating))) + }, + ProvisioningState: infrav1.Migrating, + }, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + var ( + g = NewWithT(t) + mockCtrl = gomock.NewController(t) + cb = fake.NewClientBuilder().WithScheme(scheme) + cluster = &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster1", + Namespace: "default", + }, + Spec: clusterv1.ClusterSpec{ + InfrastructureRef: &corev1.ObjectReference{ + Name: "azCluster1", + }, + }, + Status: clusterv1.ClusterStatus{ + InfrastructureReady: true, + }, + } + mp = &expv1.MachinePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mp1", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{ + { + Name: "cluster1", + Kind: "Cluster", + APIVersion: clusterv1.GroupVersion.String(), + }, + }, + }, + } + amp = &infrav1exp.AzureMachinePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "amp1", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{ + { + Name: "mp1", + Kind: "MachinePool", + APIVersion: expv1.GroupVersion.String(), + }, + }, + }, + } + vmssState = &azure.VMSS{} + ) + defer mockCtrl.Finish() + + tt.Setup(mp, amp, cb.WithObjects(amp, cluster)) + s := &MachinePoolScope{ + client: cb.Build(), + ClusterScoper: &ClusterScope{ + Cluster: cluster, + }, + MachinePool: mp, + AzureMachinePool: amp, + vmssState: vmssState, + } + s.setProvisioningStateAndConditions(tt.ProvisioningState) + tt.Verify(g, s.AzureMachinePool, s.client) + }) + } +} + func TestBootstrapDataChanges(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/controllers/aso_credential_cache.go b/controllers/aso_credential_cache.go index fc6c3a0cbb5..cda62d1d13c 100644 --- a/controllers/aso_credential_cache.go +++ b/controllers/aso_credential_cache.go @@ -26,12 +26,12 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/tracing/azotel" asoannotations "github.com/Azure/azure-service-operator/v2/pkg/common/annotations" "github.com/Azure/azure-service-operator/v2/pkg/common/config" + "go.opentelemetry.io/otel" corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/cluster-api-provider-azure/azure" "sigs.k8s.io/cluster-api-provider-azure/azure/scope" - "sigs.k8s.io/cluster-api-provider-azure/pkg/ot" "sigs.k8s.io/cluster-api-provider-azure/util/tele" ) @@ -98,13 +98,8 @@ func (c *asoCredentialCache) clientOptsForASOResource(ctx context.Context, obj c return azcore.ClientOptions{}, err } - otelTP, err := ot.OTLPTracerProvider(ctx) - if err != nil { - return azcore.ClientOptions{}, err - } - opts := azcore.ClientOptions{ - TracingProvider: azotel.NewTracingProvider(otelTP, nil), + TracingProvider: azotel.NewTracingProvider(otel.GetTracerProvider(), nil), Cloud: cloud.Configuration{ ActiveDirectoryAuthorityHost: string(secret.Data[config.AzureAuthorityHost]), }, diff --git a/exp/controllers/azuremachinepoolmachine_controller.go b/exp/controllers/azuremachinepoolmachine_controller.go index 96aab4a5a82..5198fd27702 100644 --- a/exp/controllers/azuremachinepoolmachine_controller.go +++ b/exp/controllers/azuremachinepoolmachine_controller.go @@ -294,8 +294,6 @@ func (ampmr *AzureMachinePoolMachineController) reconcileNormal(ctx context.Cont switch state { case infrav1.Failed: ampmr.Recorder.Eventf(machineScope.AzureMachinePoolMachine, corev1.EventTypeWarning, "FailedVMState", "Azure scale set VM is in failed state") - machineScope.SetFailureReason(azure.UpdateError) - machineScope.SetFailureMessage(errors.Errorf("Azure VM state is %s", state)) case infrav1.Deleting: log.V(4).Info("deleting machine because state is Deleting", "machine", machineScope.Name()) if err := ampmr.Client.Delete(ctx, machineScope.Machine); err != nil {