Skip to content

Commit c415714

Browse files
Merge pull request #4756 from Azure/cs-153-dump-sub
Add controller that dumps subscriptions
2 parents 304cd34 + 53a0cd0 commit c415714

File tree

5 files changed

+189
-8
lines changed

5 files changed

+189
-8
lines changed

backend/pkg/app/backend.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,8 @@ func (b *Backend) runBackendControllersUnderLeaderElection(ctx context.Context,
262262

263263
maestroClientBuilder := maestro.NewMaestroClientBuilder()
264264

265-
dataDumpController := datadumpcontrollers.NewDataDumpController(b.options.CosmosDBClient, activeOperationLister, backendInformers)
265+
subscriptionNonClusterDataDumpController := datadumpcontrollers.NewSubscriptionNonClusterDataDumpController(b.options.CosmosDBClient, activeOperationLister, backendInformers)
266+
clusterRecursiveDataDumpController := datadumpcontrollers.NewClusterRecursiveDataDumpController(b.options.CosmosDBClient, activeOperationLister, backendInformers)
266267
csStateDumpController := datadumpcontrollers.NewCSStateDumpController(b.options.CosmosDBClient, activeOperationLister, backendInformers, b.options.ClustersServiceClient)
267268
doNothingController := controllers.NewDoNothingExampleController(b.options.CosmosDBClient, subscriptionLister)
268269
operationClusterCreateController := operationcontrollers.NewGenericOperationController(
@@ -504,7 +505,8 @@ func (b *Backend) runBackendControllersUnderLeaderElection(ctx context.Context,
504505
// start the SharedInformers
505506
go backendInformers.RunWithContext(ctx)
506507

507-
go dataDumpController.Run(ctx, 20)
508+
go subscriptionNonClusterDataDumpController.Run(ctx, 20)
509+
go clusterRecursiveDataDumpController.Run(ctx, 20)
508510
go csStateDumpController.Run(ctx, 20)
509511
go doNothingController.Run(ctx, 20)
510512
go operationClusterCreateController.Run(ctx, 20)
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// Copyright 2026 Microsoft Corporation
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package controllerutils
16+
17+
import (
18+
"context"
19+
"time"
20+
21+
azcorearm "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
22+
23+
"github.com/Azure/ARO-HCP/backend/pkg/informers"
24+
)
25+
26+
type SubscriptionSyncer interface {
27+
SyncOnce(ctx context.Context, keyObj SubscriptionKey) error
28+
CooldownChecker() CooldownChecker
29+
}
30+
31+
type subscriptionWatchingController struct {
32+
name string
33+
syncer SubscriptionSyncer
34+
}
35+
36+
// NewSubscriptionWatchingController periodically looks up all subscriptions and queues them.
37+
// cooldownDuration is how long to wait before allowing a new notification to fire the controller.
38+
// Since our detection of change is coarse, we are being triggered every few second without new information.
39+
// Until we get a changefeed, the cooldownDuration value is effectively the min resync time.
40+
// This does NOT prevent us from re-executing on errors, so errors will continue to trigger fast checks as expected.
41+
func NewSubscriptionWatchingController(
42+
name string,
43+
informers informers.BackendInformers,
44+
resyncDuration time.Duration,
45+
syncer SubscriptionSyncer,
46+
) Controller {
47+
subscriptionSyncer := &subscriptionWatchingController{
48+
name: name,
49+
syncer: syncer,
50+
}
51+
subscriptionController := newGenericWatchingController(name, azcorearm.SubscriptionResourceType, subscriptionSyncer)
52+
53+
// this happens when unit tests don't want triggering. This isn't beautiful, but fails to do nothing which is pretty safe.
54+
if informers != nil {
55+
subscriptionInformer, _ := informers.Subscriptions()
56+
err := subscriptionController.QueueForInformers(resyncDuration, subscriptionInformer)
57+
if err != nil {
58+
panic(err) // coding error
59+
}
60+
}
61+
62+
return subscriptionController
63+
}
64+
65+
func (c *subscriptionWatchingController) SyncOnce(ctx context.Context, key SubscriptionKey) error {
66+
return c.syncer.SyncOnce(ctx, key)
67+
}
68+
69+
func (c *subscriptionWatchingController) CooldownChecker() CooldownChecker {
70+
return c.syncer.CooldownChecker()
71+
}
72+
73+
func (c *subscriptionWatchingController) MakeKey(resourceID *azcorearm.ResourceID) SubscriptionKey {
74+
return SubscriptionKey{
75+
SubscriptionID: resourceID.SubscriptionID,
76+
}
77+
}

backend/pkg/controllers/controllerutils/util.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
azcorearm "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
3030

3131
"github.com/Azure/ARO-HCP/internal/api"
32+
"github.com/Azure/ARO-HCP/internal/api/arm"
3233
"github.com/Azure/ARO-HCP/internal/database"
3334
"github.com/Azure/ARO-HCP/internal/utils"
3435
)
@@ -148,6 +149,21 @@ func (k *HCPNodePoolKey) InitialController(controllerName string) *api.Controlle
148149
}
149150
}
150151

152+
// SubscriptionKey is for driving workqueues keyed for subscriptions
153+
type SubscriptionKey struct {
154+
SubscriptionID string `json:"subscriptionID"`
155+
}
156+
157+
func (k *SubscriptionKey) GetResourceID() *azcorearm.ResourceID {
158+
return api.Must(arm.ToSubscriptionResourceID(k.SubscriptionID))
159+
}
160+
161+
func (k *SubscriptionKey) AddLoggerValues(logger logr.Logger) logr.Logger {
162+
return logger.WithValues(
163+
utils.LogValues{}.
164+
AddLogValuesForResourceID(k.GetResourceID())...)
165+
}
166+
151167
// clock is used by helper functions for setting last transition time. It is injectable for unit testing.
152168
var clock utilsclock.Clock = utilsclock.RealClock{}
153169

backend/pkg/controllers/datadumpcontrollers/data_dump.go renamed to backend/pkg/controllers/datadumpcontrollers/dump_cluster_recursive.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,21 +26,21 @@ import (
2626
"github.com/Azure/ARO-HCP/internal/utils"
2727
)
2828

29-
type dataDump struct {
29+
type clusterRecursiveDataDump struct {
3030
cooldownChecker controllerutils.CooldownChecker
3131
cosmosClient database.DBClient
3232

3333
// nextDataDumpChecker ensures we don't hotloop from any source.
3434
nextDataDumpChecker controllerutils.CooldownChecker
3535
}
3636

37-
// NewDataDumpController periodically lists all clusters and logs when the cluster was created and its state.
38-
func NewDataDumpController(
37+
// NewClusterRecursiveDataDumpController periodically lists all clusters and logs when the cluster was created and its state.
38+
func NewClusterRecursiveDataDumpController(
3939
cosmosClient database.DBClient,
4040
activeOperationLister listers.ActiveOperationLister,
4141
backendInformers informers.BackendInformers,
4242
) controllerutils.Controller {
43-
syncer := &dataDump{
43+
syncer := &clusterRecursiveDataDump{
4444
cooldownChecker: controllerutils.DefaultActiveOperationPrioritizingCooldown(activeOperationLister),
4545
cosmosClient: cosmosClient,
4646
nextDataDumpChecker: controllerutils.DefaultActiveOperationPrioritizingCooldown(activeOperationLister),
@@ -55,7 +55,7 @@ func NewDataDumpController(
5555
)
5656
}
5757

58-
func (c *dataDump) SyncOnce(ctx context.Context, key controllerutils.HCPClusterKey) error {
58+
func (c *clusterRecursiveDataDump) SyncOnce(ctx context.Context, key controllerutils.HCPClusterKey) error {
5959
if !c.nextDataDumpChecker.CanSync(ctx, key) {
6060
return nil
6161
}
@@ -70,6 +70,6 @@ func (c *dataDump) SyncOnce(ctx context.Context, key controllerutils.HCPClusterK
7070
return nil
7171
}
7272

73-
func (c *dataDump) CooldownChecker() controllerutils.CooldownChecker {
73+
func (c *clusterRecursiveDataDump) CooldownChecker() controllerutils.CooldownChecker {
7474
return c.cooldownChecker
7575
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// Copyright 2025 Microsoft Corporation
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package datadumpcontrollers
16+
17+
import (
18+
"context"
19+
"fmt"
20+
"time"
21+
22+
"github.com/Azure/ARO-HCP/backend/pkg/controllers/controllerutils"
23+
"github.com/Azure/ARO-HCP/backend/pkg/informers"
24+
"github.com/Azure/ARO-HCP/backend/pkg/listers"
25+
"github.com/Azure/ARO-HCP/internal/database"
26+
"github.com/Azure/ARO-HCP/internal/utils"
27+
)
28+
29+
type subscriptionNonClusterDataDump struct {
30+
cooldownChecker controllerutils.CooldownChecker
31+
cosmosClient database.DBClient
32+
33+
// nextDataDumpChecker ensures we don't hotloop from any source.
34+
nextDataDumpChecker controllerutils.CooldownChecker
35+
}
36+
37+
// NewSubscriptionNonClusterDataDumpController periodically dumps data for a subscription that is NOT related to a cluster.
38+
func NewSubscriptionNonClusterDataDumpController(
39+
cosmosClient database.DBClient,
40+
activeOperationLister listers.ActiveOperationLister,
41+
backendInformers informers.BackendInformers,
42+
) controllerutils.Controller {
43+
syncer := &subscriptionNonClusterDataDump{
44+
cooldownChecker: controllerutils.DefaultActiveOperationPrioritizingCooldown(activeOperationLister),
45+
cosmosClient: cosmosClient,
46+
nextDataDumpChecker: controllerutils.NewTimeBasedCooldownChecker(4 * time.Minute),
47+
}
48+
49+
return controllerutils.NewSubscriptionWatchingController(
50+
"SubscriptionNonClusterDataDump",
51+
backendInformers,
52+
5*time.Minute,
53+
syncer,
54+
)
55+
}
56+
57+
func (c *subscriptionNonClusterDataDump) SyncOnce(ctx context.Context, key controllerutils.SubscriptionKey) error {
58+
if !c.nextDataDumpChecker.CanSync(ctx, key) {
59+
return nil
60+
}
61+
62+
logger := utils.LoggerFromContext(ctx)
63+
64+
cosmosCRUD, err := c.cosmosClient.UntypedCRUD(*key.GetResourceID())
65+
if err != nil {
66+
logger.Error(err, "failed to get cosmos CRUD")
67+
return nil
68+
}
69+
70+
subscription, err := cosmosCRUD.Get(ctx, key.GetResourceID())
71+
if err != nil {
72+
logger.Error(err, "failed to get subscription")
73+
return nil
74+
}
75+
76+
logger.Info(fmt.Sprintf("dumping resourceID %v", key.GetResourceID()),
77+
"currentResourceID", key.GetResourceID().String(),
78+
"content", subscription,
79+
)
80+
81+
return nil
82+
}
83+
84+
func (c *subscriptionNonClusterDataDump) CooldownChecker() controllerutils.CooldownChecker {
85+
return c.cooldownChecker
86+
}

0 commit comments

Comments
 (0)