Skip to content

Commit 4d25478

Browse files
committed
Improve pre-reconcile owner check
Solves issue where the owner has stale state from Azure. * Removes owner from PreReconcileCheck. Owner checks must now always be done via PreReconcileOwnerCheck. This ensures that we only issue HTTP calls to Azure to refresh the owner state when we actually need the owner for something. * Resources that implement PreReconcileOwnerCheck now issue a GET on the owner ARM resource before calling the check, meaning that the check will always be acting on an up-to-date owner status + spec. * Remove possibility of owner being nil in PreReconcileOwnerCheck. We won't call that API at all if the owner is nil, which saves the individual check implementations from needing to always include a nil check. Test updates: * Updated Kuberentes version in tests and samples as the Kuberentes version we were using is now out of support. * Updated containerservice samples to use a different KeyVault name for each sample so that the KeyVault names don't collide when being recovered/purged. * Removed Test_Samples_CreationAndDeletion/Test_Dbforpostgresql_v20250801_CreationAndDeletion from the list of skipped samples. * Re-recorded required samples (containerservice and dbforpostgresql) Fixes Azure#5040.
1 parent 1416a67 commit 4d25478

File tree

42 files changed

+33896
-46730
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+33896
-46730
lines changed

v2/api/containerservice/customizations/managed_cluster_extensions.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,6 @@ var nonBlockingManagedClusterProvisioningStates = set.Make(
168168
func (ext *ManagedClusterExtension) PreReconcileCheck(
169169
ctx context.Context,
170170
obj genruntime.MetaObject,
171-
owner genruntime.MetaObject,
172171
resourceResolver *resolver.Resolver,
173172
armClient *genericarmclient.GenericClient,
174173
log logr.Logger,
@@ -197,7 +196,7 @@ func (ext *ManagedClusterExtension) PreReconcileCheck(
197196
nil
198197
}
199198

200-
return next(ctx, obj, owner, resourceResolver, armClient, log)
199+
return next(ctx, obj, resourceResolver, armClient, log)
201200
}
202201

203202
func clusterProvisioningStateBlocksReconciliation(provisioningState *string) bool {

v2/api/containerservice/customizations/managed_clusters_agent_pool_extensions.go

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ import (
2222
"github.com/Azure/azure-service-operator/v2/pkg/genruntime/extensions"
2323
)
2424

25-
var _ extensions.PreReconciliationChecker = &ManagedClustersAgentPoolExtension{}
25+
var (
26+
_ extensions.PreReconciliationChecker = &ManagedClustersAgentPoolExtension{}
27+
_ extensions.PreReconciliationOwnerChecker = &ManagedClustersAgentPoolExtension{}
28+
)
2629

2730
// If an agent pool has a provisioningState not in this set, it will reject any attempt to PUT a new state out of
2831
// hand; so there's no point in even trying. This is true even if the PUT we're doing will have no effect on the state
@@ -34,10 +37,30 @@ var nonBlockingManagedClustersAgentPoolProvisioningStates = set.Make(
3437
"canceled",
3538
)
3639

40+
func (ext *ManagedClustersAgentPoolExtension) PreReconcileOwnerCheck(
41+
ctx context.Context,
42+
owner genruntime.MetaObject,
43+
resourceResolver *resolver.Resolver,
44+
armClient *genericarmclient.GenericClient,
45+
log logr.Logger,
46+
next extensions.PreReconcileOwnerCheckFunc,
47+
) (extensions.PreReconcileCheckResult, error) {
48+
// Check to see if the owning cluster is in a state that will block us from reconciling
49+
if managedCluster, ok := owner.(*containerservice.ManagedCluster); ok {
50+
state := managedCluster.Status.ProvisioningState
51+
if state != nil && clusterProvisioningStateBlocksReconciliation(state) {
52+
return extensions.BlockReconcile(
53+
fmt.Sprintf("Managed cluster %q is in provisioning state %q", owner.GetName(), *state)),
54+
nil
55+
}
56+
}
57+
58+
return next(ctx, owner, resourceResolver, armClient, log)
59+
}
60+
3761
func (ext *ManagedClustersAgentPoolExtension) PreReconcileCheck(
3862
ctx context.Context,
3963
obj genruntime.MetaObject,
40-
owner genruntime.MetaObject,
4164
resourceResolver *resolver.Resolver,
4265
armClient *genericarmclient.GenericClient,
4366
log logr.Logger,
@@ -55,20 +78,6 @@ func (ext *ManagedClustersAgentPoolExtension) PreReconcileCheck(
5578
// the hub type has been changed but this extension has not
5679
var _ conversion.Hub = agentPool
5780

58-
// Check to see if the owning cluster is in a state that will block us from reconciling
59-
// Owner nil can happen if the owner of the agent pool is referenced by armID
60-
if owner != nil {
61-
if managedCluster, ok := owner.(*containerservice.ManagedCluster); ok {
62-
state := managedCluster.Status.ProvisioningState
63-
if state != nil && clusterProvisioningStateBlocksReconciliation(state) {
64-
return extensions.BlockReconcile(
65-
fmt.Sprintf("Managed cluster %q is in provisioning state %q", owner.GetName(), *state)),
66-
nil
67-
}
68-
69-
}
70-
}
71-
7281
// If the agent pool is in a state that will reject any PUT, then we should skip reconciliation
7382
// as there's no point in even trying.
7483
// This allows us to "play nice with others" and not use up request quota attempting to make changes when we
@@ -80,7 +89,7 @@ func (ext *ManagedClustersAgentPoolExtension) PreReconcileCheck(
8089
nil
8190
}
8291

83-
return next(ctx, obj, owner, resourceResolver, armClient, log)
92+
return next(ctx, obj, resourceResolver, armClient, log)
8493
}
8594

8695
func agentPoolProvisioningStateBlocksReconciliation(provisioningState *string) bool {

v2/api/dbforpostgresql/customizations/flexible_server_extensions.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ var nonBlockingFlexibleServerStates = set.Make(
101101
func (ext *FlexibleServerExtension) PreReconcileCheck(
102102
ctx context.Context,
103103
obj genruntime.MetaObject,
104-
owner genruntime.MetaObject,
105104
resourceResolver *resolver.Resolver,
106105
armClient *genericarmclient.GenericClient,
107106
log logr.Logger,
@@ -127,7 +126,7 @@ func (ext *FlexibleServerExtension) PreReconcileCheck(
127126
*state)), nil
128127
}
129128

130-
return next(ctx, obj, owner, resourceResolver, armClient, log)
129+
return next(ctx, obj, resourceResolver, armClient, log)
131130
}
132131

133132
func flexibleServerStateBlocksReconciliation(state string) bool {

v2/api/dbforpostgresql/customizations/flexible_servers_database_extensions.go

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,32 +19,28 @@ import (
1919
"github.com/Azure/azure-service-operator/v2/pkg/genruntime/extensions"
2020
)
2121

22-
var _ extensions.PreReconciliationChecker = &FlexibleServersDatabaseExtension{}
22+
var _ extensions.PreReconciliationOwnerChecker = &FlexibleServersDatabaseExtension{}
2323

24-
func (extension *FlexibleServersDatabaseExtension) PreReconcileCheck(
24+
func (extension *FlexibleServersDatabaseExtension) PreReconcileOwnerCheck(
2525
ctx context.Context,
26-
obj genruntime.MetaObject,
2726
owner genruntime.MetaObject,
2827
resourceResolver *resolver.Resolver,
2928
armClient *genericarmclient.GenericClient,
3029
log logr.Logger,
31-
next extensions.PreReconcileCheckFunc,
30+
next extensions.PreReconcileOwnerCheckFunc,
3231
) (extensions.PreReconcileCheckResult, error) {
3332
// Check to see if our owning server is ready for the database to be reconciled
34-
// Owner nil can happen if the server owner of the database is referenced by armID
35-
if owner != nil {
36-
if server, ok := owner.(*hub.FlexibleServer); ok {
37-
serverState := server.Status.State
38-
if serverState != nil && flexibleServerStateBlocksReconciliation(*serverState) {
39-
return extensions.BlockReconcile(
40-
fmt.Sprintf(
41-
"Owning FlexibleServer is in state %q",
42-
*serverState)), nil
43-
}
33+
if server, ok := owner.(*hub.FlexibleServer); ok {
34+
serverState := server.Status.State
35+
if serverState != nil && flexibleServerStateBlocksReconciliation(*serverState) {
36+
return extensions.BlockReconcile(
37+
fmt.Sprintf(
38+
"Owning FlexibleServer is in state %q",
39+
*serverState)), nil
4440
}
4541
}
4642

47-
return next(ctx, obj, owner, resourceResolver, armClient, log)
43+
return next(ctx, owner, resourceResolver, armClient, log)
4844
}
4945

5046
var _ extensions.Importer = &FlexibleServersDatabaseExtension{}

v2/api/dbforpostgresql/customizations/flexible_servers_firewall_rule_extensions.go

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,13 @@ func (ext *FlexibleServersFirewallRuleExtension) PreReconcileOwnerCheck(
2929
next extensions.PreReconcileOwnerCheckFunc,
3030
) (extensions.PreReconcileCheckResult, error) {
3131
// Check to see if our owning server is ready for the database to be reconciled
32-
// Owner nil can happen if the server/owner of the firewall rule is referenced by armID
33-
if owner != nil {
34-
if server, ok := owner.(*postgresql.FlexibleServer); ok {
35-
serverState := server.Status.State
36-
if serverState != nil && flexibleServerStateBlocksReconciliation(*serverState) {
37-
return extensions.BlockReconcile(
38-
fmt.Sprintf(
39-
"Owning FlexibleServer is in state %q",
40-
*serverState)), nil
41-
}
32+
if server, ok := owner.(*postgresql.FlexibleServer); ok {
33+
serverState := server.Status.State
34+
if serverState != nil && flexibleServerStateBlocksReconciliation(*serverState) {
35+
return extensions.BlockReconcile(
36+
fmt.Sprintf(
37+
"Owning FlexibleServer is in state %q",
38+
*serverState)), nil
4239
}
4340
}
4441

v2/api/documentdb/customizations/database_account_extensions.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,6 @@ var _ extensions.PreReconciliationChecker = &DatabaseAccountExtension{}
213213
func (ext *DatabaseAccountExtension) PreReconcileCheck(
214214
ctx context.Context,
215215
obj genruntime.MetaObject,
216-
owner genruntime.MetaObject,
217216
resourceResolver *resolver.Resolver,
218217
armClient *genericarmclient.GenericClient,
219218
log logr.Logger,
@@ -236,5 +235,5 @@ func (ext *DatabaseAccountExtension) PreReconcileCheck(
236235
return extensions.BlockReconcile("reconcile blocked while account is at status deleting"), nil
237236
}
238237

239-
return next(ctx, obj, owner, resourceResolver, armClient, log)
238+
return next(ctx, obj, resourceResolver, armClient, log)
240239
}

v2/api/kusto/customizations/cluster_extensions.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ var clusterTerminalStates = set.Make(
4040
func (ext *ClusterExtension) PreReconcileCheck(
4141
ctx context.Context,
4242
obj genruntime.MetaObject,
43-
owner genruntime.MetaObject,
4443
resourceResolver *resolver.Resolver,
4544
armClient *genericarmclient.GenericClient,
4645
log logr.Logger,
@@ -69,7 +68,7 @@ func (ext *ClusterExtension) PreReconcileCheck(
6968
nil
7069
}
7170

72-
return next(ctx, obj, owner, resourceResolver, armClient, log)
71+
return next(ctx, obj, resourceResolver, armClient, log)
7372
}
7473

7574
func clusterProvisioningStateBlocksReconciliation(provisioningState *string) bool {

v2/api/kusto/customizations/database_extensions.go

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -38,24 +38,21 @@ func (ext *DatabaseExtension) PreReconcileOwnerCheck(
3838
next extensions.PreReconcileOwnerCheckFunc,
3939
) (extensions.PreReconcileCheckResult, error) {
4040
// Check to see if the owning cluster is in a state that will block us from reconciling
41-
// Owner nil can happen if the owner of the database is referenced by armID
42-
if owner != nil {
43-
if cluster, ok := owner.(*kusto.Cluster); ok {
44-
// If our owning *cluster* is in a state that will reject any PUT, then we should skip
45-
// reconciliation of the database as there's no point in even trying.
46-
// One way this can happen is when we reconcile the cluster, putting it into an `Updating`
47-
// state for a period. While it's in that state, we can't even try to reconcile the database as
48-
// the operation will fail with a `Conflict` error.
49-
// Checking the state of our owning cluster allows us to "play nice with others" and not use up
50-
// request quota attempting to make changes when we already know those attempts will fail.
51-
state := cluster.Status.ProvisioningState
52-
if state != nil && clusterProvisioningStateBlocksReconciliation(state) {
53-
return extensions.BlockReconcile(
54-
fmt.Sprintf("Owning cluster is in provisioning state %q", *state)),
55-
nil
56-
}
57-
41+
if cluster, ok := owner.(*kusto.Cluster); ok {
42+
// If our owning *cluster* is in a state that will reject any PUT, then we should skip
43+
// reconciliation of the database as there's no point in even trying.
44+
// One way this can happen is when we reconcile the cluster, putting it into an `Updating`
45+
// state for a period. While it's in that state, we can't even try to reconcile the database as
46+
// the operation will fail with a `Conflict` error.
47+
// Checking the state of our owning cluster allows us to "play nice with others" and not use up
48+
// request quota attempting to make changes when we already know those attempts will fail.
49+
state := cluster.Status.ProvisioningState
50+
if state != nil && clusterProvisioningStateBlocksReconciliation(state) {
51+
return extensions.BlockReconcile(
52+
fmt.Sprintf("Owning cluster is in provisioning state %q", *state)),
53+
nil
5854
}
55+
5956
}
6057

6158
return next(ctx, owner, resourceResolver, armClient_, log)

v2/internal/controllers/dbforpostgresql_flexibleserver_crud_v1api20221201_test.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ func Test_DBForPostgreSQL_FlexibleServer_20221201_CRUD(t *testing.T) {
3030
ctx := context.Background()
3131
tc := globalTestContext.ForTest(t)
3232

33-
// location := tc.AzureRegion Capacity crunch in West US 2 makes this not work when live
34-
location := "eastus"
33+
tc.AzureRegion = to.Ptr("uksouth")
3534

3635
rg := tc.CreateTestResourceGroupAndWait()
3736

@@ -55,7 +54,7 @@ func Test_DBForPostgreSQL_FlexibleServer_20221201_CRUD(t *testing.T) {
5554
flexibleServer := &postgresql.FlexibleServer{
5655
ObjectMeta: tc.MakeObjectMeta("postgresql"),
5756
Spec: postgresql.FlexibleServer_Spec{
58-
Location: &location,
57+
Location: tc.AzureRegion,
5958
Owner: testcommon.AsOwner(rg),
6059
Version: &version,
6160
Sku: &postgresql.Sku{

v2/internal/controllers/latest_reconciled_generation_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func Test_Latest_Reconciled_Generation_Reconciles_AllEvents(t *testing.T) {
4545
Identity: &aks.ManagedClusterIdentity{
4646
Type: to.Ptr(aks.ManagedClusterIdentity_Type_SystemAssigned),
4747
},
48-
KubernetesVersion: to.Ptr("1.30.0"),
48+
KubernetesVersion: to.Ptr("1.33.5"),
4949
},
5050
}
5151

@@ -59,14 +59,14 @@ func Test_Latest_Reconciled_Generation_Reconciles_AllEvents(t *testing.T) {
5959
VmSize: to.Ptr("Standard_DS2_v2"),
6060
OsType: to.Ptr(aks.OSType_Linux),
6161
Mode: to.Ptr(aks.AgentPoolMode_User),
62-
OrchestratorVersion: to.Ptr("1.29.5"),
62+
OrchestratorVersion: to.Ptr("1.32.9"),
6363
},
6464
}
6565

6666
tc.CreateResourceAndWait(agentPool)
6767

6868
old := agentPool.DeepCopy()
69-
agentPool.Spec.OrchestratorVersion = to.Ptr("1.30.0") // Start an upgrade that we know will take a bit
69+
agentPool.Spec.OrchestratorVersion = to.Ptr("1.33.5") // Start an upgrade that we know will take a bit
7070

7171
tc.PatchResourceAndWaitForState(old, agentPool, metav1.ConditionFalse, conditions.ConditionSeverityInfo)
7272

@@ -80,7 +80,7 @@ func Test_Latest_Reconciled_Generation_Reconciles_AllEvents(t *testing.T) {
8080
tc.Eventually(func() bool {
8181
var updated aks.ManagedClustersAgentPool
8282
tc.GetResource(objectKey, &updated)
83-
return *updated.Status.OrchestratorVersion == "1.30.0" && *updated.Status.Count == 2
83+
return *updated.Status.OrchestratorVersion == "1.33.5" && *updated.Status.Count == 2
8484
}).Should(BeTrue())
8585

8686
tc.DeleteResourcesAndWait(agentPool, cluster)

0 commit comments

Comments
 (0)