Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 119 additions & 43 deletions pkg/operator/controller/status/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ import (

corev1 "k8s.io/api/core/v1"

apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
utilclock "k8s.io/utils/clock"

gatewayapiv1 "sigs.k8s.io/gateway-api/apis/v1"
Expand All @@ -50,13 +52,27 @@ const (

ingressesEqualConditionMessage = "desired and current number of IngressControllers are equal"

// gatewaysResourceName is the name of the Gateway API gateways CRD.
gatewaysResourceName = "gateways.gateway.networking.k8s.io"
// gatewayclassesResourceName is the name of the Gateway API
// gatewayclasses CRD.
gatewayclassesResourceName = "gatewayclasses.gateway.networking.k8s.io"
// istiosResourceName is the name of the Sail Operator istios CRD.
istiosResourceName = "istios.sailoperator.io"

controllerName = "status_controller"
)

var log = logf.Logger.WithName(controllerName)
var (
log = logf.Logger.WithName(controllerName)

// clock is to enable unit testing
clock utilclock.Clock = utilclock.RealClock{}

// clock is to enable unit testing
var clock utilclock.Clock = utilclock.RealClock{}
// relatedObjectsCRDs is a set of names of CRDs that we add to
// relatedObjects if they exist.
relatedObjectsCRDs = sets.New[string](gatewaysResourceName, gatewayclassesResourceName, istiosResourceName)
)

// New creates the status controller. This is the controller that handles all
// the logic for creating the ClusterOperator operator and updating its status.
Expand Down Expand Up @@ -99,15 +115,15 @@ func New(mgr manager.Manager, config Config) (controller.Controller, error) {
return nil, err
}

// If the "GatewayAPI" controller featuregate is enabled, watch
// subscriptions so that this controller can update status when the OSSM
// subscription is created or updated. Note that the subscriptions
// resource only exists if the "OperatorLifecycleManager" capability is
// enabled, so we cannot watch it if the capability is not enabled.
// Additionally, the default catalog only exists if the "marketplace"
// capability is enabled, so we cannot install OSSM without that
// capability.
if config.GatewayAPIEnabled && config.MarketplaceEnabled && config.OperatorLifecycleManagerEnabled {
// If the "GatewayAPI" and "GatewayAPIController" featuregates are
// enabled, watch subscriptions so that this controller can update
// status when the OSSM subscription is created or updated. Note that
// the subscriptions resource only exists if the
// "OperatorLifecycleManager" capability is enabled, so we cannot watch
// it if the capability is not enabled. Additionally, the default
// catalog only exists if the "marketplace" capability is enabled, so we
// cannot install OSSM without that capability.
if config.GatewayAPIEnabled && config.GatewayAPIControllerEnabled && config.MarketplaceEnabled && config.OperatorLifecycleManagerEnabled {
if err := c.Watch(source.Kind[client.Object](operatorCache, &operatorsv1alpha1.Subscription{}, handler.EnqueueRequestsFromMapFunc(toDefaultIngressController), predicate.Funcs{
CreateFunc: func(e event.CreateEvent) bool {
return e.Object.GetNamespace() == operatorcontroller.OpenshiftOperatorNamespace
Expand All @@ -124,6 +140,22 @@ func New(mgr manager.Manager, config Config) (controller.Controller, error) {
})); err != nil {
return nil, err
}
if err := c.Watch(source.Kind[client.Object](operatorCache, &apiextensionsv1.CustomResourceDefinition{}, handler.EnqueueRequestsFromMapFunc(toDefaultIngressController), predicate.Funcs{
CreateFunc: func(e event.CreateEvent) bool {
return relatedObjectsCRDs.Has(e.Object.GetName())
},
UpdateFunc: func(e event.UpdateEvent) bool {
return false
},
DeleteFunc: func(e event.DeleteEvent) bool {
return relatedObjectsCRDs.Has(e.Object.GetName())
},
GenericFunc: func(e event.GenericEvent) bool {
return false
},
})); err != nil {
return nil, err
}
}

return c, nil
Expand All @@ -133,6 +165,9 @@ func New(mgr manager.Manager, config Config) (controller.Controller, error) {
type Config struct {
// GatewayAPIEnabled indicates that the "GatewayAPI" featuregate is enabled.
GatewayAPIEnabled bool
// GatewayAPIControllerEnabled indicates that the "GatewayAPIController"
// featuregate is enabled.
GatewayAPIControllerEnabled bool
// MarketplaceEnabled indicates whether the "marketplace" capability is
// enabled.
MarketplaceEnabled bool
Expand Down Expand Up @@ -183,7 +218,7 @@ func (r *reconciler) Reconcile(ctx context.Context, request reconcile.Request) (
}
oldStatus := co.Status.DeepCopy()

state, err := r.getOperatorState(ingressNamespace, canaryNamespace, co)
state, err := r.getOperatorState(ctx, ingressNamespace, canaryNamespace, co)
if err != nil {
return reconcile.Result{}, fmt.Errorf("failed to get operator state: %v", err)
}
Expand Down Expand Up @@ -220,11 +255,7 @@ func (r *reconciler) Reconcile(ctx context.Context, request reconcile.Request) (
Name: state.CanaryNamespace.Name,
})
}
if r.config.GatewayAPIEnabled {
related = append(related, configv1.ObjectReference{
Group: gatewayapiv1.GroupName,
Resource: "gatewayclasses",
})
if r.config.GatewayAPIEnabled && r.config.GatewayAPIControllerEnabled {
if state.haveOSSMSubscription {
subscriptionName := operatorcontroller.ServiceMeshOperatorSubscriptionName()
related = append(related, configv1.ObjectReference{
Expand All @@ -233,18 +264,25 @@ func (r *reconciler) Reconcile(ctx context.Context, request reconcile.Request) (
Namespace: subscriptionName.Namespace,
Name: subscriptionName.Name,
})
if state.IngressNamespace != nil {
related = append(related, configv1.ObjectReference{
Group: sailv1.GroupVersion.Group,
Resource: "istios",
Namespace: state.IngressNamespace.Name,
})
related = append(related, configv1.ObjectReference{
Group: gatewayapiv1.GroupName,
Resource: "gateways",
Namespace: state.IngressNamespace.Name,
})
}
}
if state.haveIstiosResource {
related = append(related, configv1.ObjectReference{
Group: sailv1.GroupVersion.Group,
Resource: "istios",
})
}
if state.haveGatewayclassesResource {
related = append(related, configv1.ObjectReference{
Group: gatewayapiv1.GroupName,
Resource: "gatewayclasses",
})
}
if state.haveGatewaysResource {
related = append(related, configv1.ObjectReference{
Group: gatewayapiv1.GroupName,
Resource: "gateways",
Namespace: "", // Include all namespaces.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it mean that we include all resources with this group to the related objects (not CRD)? If so, I have the question below.

From one hand, PILOT_ENABLE_GATEWAY_CONTROLLER_MODE description says that gateway api resources are watched from all namespaces (btw I failed to find this variable in downstream istio). From the other hand we don't manage DNS for gateways from namespaces other than openshift-ingress. Do we consider gateways with BYO DNS are managed by us?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it mean that we include all resources with this group to the related objects (not CRD)?

Yes. The reference specifies the "gateways" resource but doesn't specify the namespace, so the reference includes any gateway in any namespace.

From one hand, PILOT_ENABLE_GATEWAY_CONTROLLER_MODE description says that gateway api resources are watched from all namespaces (btw I failed to find this variable in downstream istio).

PILOT_ENABLE_GATEWAY_CONTROLLER_MODE is referenced here: https://github.com/openshift-service-mesh/sail-operator/blob/release-3.0/chart/samples/gwControllerMode.yaml#L14

However, I think that this variable is actually ignored by OSSM 3, as you say, as it has been superseded by EnhancedResourceScoping. @dgn, is this correct? Is the example Helm chart out of date?

I intend to remove setting PILOT_ENABLE_GATEWAY_CONTROLLER_MODE from this operator with 43caa07.

From the other hand we don't manage DNS for gateways from namespaces other than openshift-ingress. Do we consider gateways with BYO DNS are managed by us?

Yes, Istio provisions a proxy even if cluster-ingress-operator doesn't publish a DNS record, so I consider such gateways to be managed by the OpenShift platform.

})
}
}

Expand Down Expand Up @@ -317,17 +355,26 @@ type operatorState struct {
IngressControllers []operatorv1.IngressController
DNSRecords []iov1.DNSRecord

haveOSSMSubscription bool
unmanagedGatewayAPICRDNames string
// haveOSSMSubscription means that the subscription for OSSM 3 exists.
haveOSSMSubscription bool
// haveIstiosResource means that the "istios.sailproject.io" CRD exists.
haveIstiosResource bool
// haveGatewaysResource means that the
// "gateways.gateway.networking.k8s.io" CRD exists.
haveGatewaysResource bool
// haveGatewayclassesResource means that the
// "gatewayclasses.gateway.networking.k8s.io" CRD exists.
haveGatewayclassesResource bool
}

// getOperatorState gets and returns the resources necessary to compute the
// operator's current state.
func (r *reconciler) getOperatorState(ingressNamespace, canaryNamespace string, co *configv1.ClusterOperator) (operatorState, error) {
func (r *reconciler) getOperatorState(ctx context.Context, ingressNamespace, canaryNamespace string, co *configv1.ClusterOperator) (operatorState, error) {
state := operatorState{}

ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: ingressNamespace}}
if err := r.client.Get(context.TODO(), types.NamespacedName{Name: ingressNamespace}, ns); err != nil {
if err := r.client.Get(ctx, types.NamespacedName{Name: ingressNamespace}, ns); err != nil {
if !errors.IsNotFound(err) {
return state, fmt.Errorf("failed to get namespace %q: %v", ingressNamespace, err)
}
Expand All @@ -336,7 +383,7 @@ func (r *reconciler) getOperatorState(ingressNamespace, canaryNamespace string,
}

ns = &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: canaryNamespace}}
if err := r.client.Get(context.TODO(), types.NamespacedName{Name: canaryNamespace}, ns); err != nil {
if err := r.client.Get(ctx, types.NamespacedName{Name: canaryNamespace}, ns); err != nil {
if !errors.IsNotFound(err) {
return state, fmt.Errorf("failed to get namespace %q: %v", canaryNamespace, err)
}
Expand All @@ -345,32 +392,61 @@ func (r *reconciler) getOperatorState(ingressNamespace, canaryNamespace string,
}

ingressList := &operatorv1.IngressControllerList{}
if err := r.cache.List(context.TODO(), ingressList, client.InNamespace(r.config.Namespace)); err != nil {
if err := r.cache.List(ctx, ingressList, client.InNamespace(r.config.Namespace)); err != nil {
return state, fmt.Errorf("failed to list ingresscontrollers in %q: %v", r.config.Namespace, err)
} else {
state.IngressControllers = ingressList.Items
}

if r.config.GatewayAPIEnabled {
if r.config.MarketplaceEnabled && r.config.OperatorLifecycleManagerEnabled {
if len(co.Status.Extension.Raw) > 0 {
extension := &IngressOperatorStatusExtension{}
if err := json.Unmarshal(co.Status.Extension.Raw, extension); err != nil {
return state, fmt.Errorf("failed to unmarshal status extension of cluster operator %q: %w", co.Name, err)
}
state.unmanagedGatewayAPICRDNames = extension.UnmanagedGatewayAPICRDNames
}

if r.config.GatewayAPIControllerEnabled && r.config.MarketplaceEnabled && r.config.OperatorLifecycleManagerEnabled {
var subscription operatorsv1alpha1.Subscription
subscriptionName := operatorcontroller.ServiceMeshOperatorSubscriptionName()
if err := r.cache.Get(context.TODO(), subscriptionName, &subscription); err != nil {
if err := r.cache.Get(ctx, subscriptionName, &subscription); err != nil {
if !errors.IsNotFound(err) {
return state, fmt.Errorf("failed to get subscription %q: %v", subscriptionName, err)
}
} else {
state.haveOSSMSubscription = true

}
}

if len(co.Status.Extension.Raw) > 0 {
extension := &IngressOperatorStatusExtension{}
if err := json.Unmarshal(co.Status.Extension.Raw, extension); err != nil {
return state, fmt.Errorf("failed to unmarshal status extension of cluster operator %q: %w", co.Name, err)
var (
crd apiextensionsv1.CustomResourceDefinition
gatewaysResourceNamespacedName = types.NamespacedName{Name: gatewaysResourceName}
gatewayclassesResourceNamespacedName = types.NamespacedName{Name: gatewayclassesResourceName}
istiosResourceNamespacedName = types.NamespacedName{Name: istiosResourceName}
)

if err := r.cache.Get(ctx, gatewaysResourceNamespacedName, &crd); err != nil {
if !errors.IsNotFound(err) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it matter much if there was a prior resource, then it was deleted? E.g. someone removed the Istio or other CRDs we're checking here - do we then need to remove these from relatedObjects? Or is it okay because this is reconciled periodically?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The watch on CRDs should trigger reconciliation if the istios CRD is deleted:

if err := c.Watch(source.Kind[client.Object](operatorCache, &apiextensionsv1.CustomResourceDefinition{}, handler.EnqueueRequestsFromMapFunc(toDefaultIngressController), predicate.Funcs{
CreateFunc: func(e event.CreateEvent) bool {
return relatedObjectsCRDs.Has(e.Object.GetName())
},
UpdateFunc: func(e event.UpdateEvent) bool {
return false
},
DeleteFunc: func(e event.DeleteEvent) bool {
return relatedObjectsCRDs.Has(e.Object.GetName())

getOperatorState intializes operatorState leaving haveIstiosResource false and only sets it to true if a Get on the istios CRD returns a nil error value, meaning the CRD is present:

func (r *reconciler) getOperatorState(ctx context.Context, ingressNamespace, canaryNamespace string) (operatorState, error) {
state := operatorState{}

if err := r.cache.Get(ctx, istiosResourceNamespacedName, &crd); err != nil {
if !errors.IsNotFound(err) {
return state, fmt.Errorf("failed to get CRD %q: %v", istiosResourceName, err)
}
} else {
state.haveIstiosResource = true
}

Then Reconcile only adds istios to relatedObjects if haveIstiosResource is true:

if state.haveIstiosResource {
related = append(related, configv1.ObjectReference{
Group: sailv1.GroupVersion.Group,
Resource: "istios",
})
}

So we should be fine.

return state, fmt.Errorf("failed to get CRD %q: %v", gatewaysResourceName, err)
}
} else {
state.haveGatewaysResource = true
}
if err := r.cache.Get(ctx, gatewayclassesResourceNamespacedName, &crd); err != nil {
if !errors.IsNotFound(err) {
return state, fmt.Errorf("failed to get CRD %q: %v", gatewayclassesResourceName, err)
}
} else {
state.haveGatewayclassesResource = true
}
Comment on lines +436 to +442
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GatewayClasses and Gateways can be created even if OLM capabilities are not present. Should we move them up? Out of if r.config.GatewayAPIControllerEnabled && r.config.MarketplaceEnabled && r.config.OperatorLifecycleManagerEnabled condition.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've gone back and forth on this matter, and I think there are reasonable arguments either way. In my most recent update, I have conditionalized adding gatewayclasses and gateways to relatedObjects with the condition r.config.GatewayAPIControllerEnabled && r.config.MarketplaceEnabled && r.config.OperatorLifecycleManagerEnabled on the grounds that we are only adding these resources to relatedObjects as part of, and in support of, the controller feature. That is, if we didn't provide our own controller and were only providing the CRDs, then we wouldn't bother gathering gatewayclasses or gateways; we only need these resources in order to diagnose issues with the gateway controller.

If the operator could specify gateways and gatewayclasses by controller name in the relatedObjects specification, I would consider doing that. As it is, the operator would need to check the controller name on each gatewayclass and then check the gatewayclass name on each gateway in order to add each individual gatewayclass or gateway that our controller managed, and I don't think that that would be appropriate or worth the effort.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is, if we didn't provide our own controller and were only providing the CRDs, then we wouldn't bother gathering gatewayclasses or gateways; we only need these resources in order to diagnose issues with the gateway controller.

Ack.

if err := r.cache.Get(ctx, istiosResourceNamespacedName, &crd); err != nil {
if !errors.IsNotFound(err) {
return state, fmt.Errorf("failed to get CRD %q: %v", istiosResourceName, err)
}
} else {
state.haveIstiosResource = true
}
state.unmanagedGatewayAPICRDNames = extension.UnmanagedGatewayAPICRDNames
}
}

Expand Down
1 change: 1 addition & 0 deletions pkg/operator/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ func New(config operatorconfig.Config, kubeConfig *rest.Config) (*Operator, erro
CanaryImage: config.CanaryImage,
OperatorReleaseVersion: config.OperatorReleaseVersion,
GatewayAPIEnabled: gatewayAPIEnabled,
GatewayAPIControllerEnabled: gatewayAPIControllerEnabled,
MarketplaceEnabled: marketplaceEnabled,
OperatorLifecycleManagerEnabled: olmEnabled,
}); err != nil {
Expand Down
23 changes: 17 additions & 6 deletions test/e2e/operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,12 +233,23 @@ func TestClusterOperatorStatusRelatedObjects(t *testing.T) {
if gatewayAPIEnabled, err := isFeatureGateEnabled(features.FeatureGateGatewayAPI); err != nil {
t.Fatalf("Failed to look up %q featuregate: %v", features.FeatureGateGatewayAPI, err)
} else if gatewayAPIEnabled {
expected = append(expected, configv1.ObjectReference{
Group: "gateway.networking.k8s.io",
Resource: "gatewayclasses",
})
// This test runs before TestGatewayAPI, so we do *not* expect
// to see subscriptions, istios, or gateways in relatedObjects.
if gatewayAPIControllerEnabled, err := isFeatureGateEnabled(features.FeatureGateGatewayAPIController); err != nil {
Copy link
Contributor

@candita candita Apr 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on the test output, gateways is never appended to expected. Does anything enable FeatureGateGatewayAPIController before this test is run?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The featuregate should be enabled in the techpreview jobs and not currently enabled in any other jobs.

t.Fatalf("Failed to look up %q featuregate: %v", features.FeatureGateGatewayAPIController, err)
} else if gatewayAPIControllerEnabled {
// This test runs before TestGatewayAPI creates the
// subscription to install OSSM, so we do *not* expect
// to see subscriptions or istios in relatedObjects.
// However, we *do* expect to see gatewayclasses and
// gateways whenever the GatewayAPI and
// GatewayAPIController featuregates are enabled.
expected = append(expected, configv1.ObjectReference{
Group: "gateway.networking.k8s.io",
Resource: "gatewayclasses",
}, configv1.ObjectReference{
Group: "gateway.networking.k8s.io",
Resource: "gateways",
})
}
}

coName := controller.IngressClusterOperatorName()
Expand Down