Skip to content

🐛 Fix panic when OpenStackCluster.Status.Network is nil in HCP scenarios #2635

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ TEST_E2E_DIR := test/e2e
# Files
E2E_DATA_DIR ?= $(REPO_ROOT)/test/e2e/data
E2E_CONF_PATH ?= $(E2E_DATA_DIR)/e2e_conf.yaml
E2E_CONF_PATH_HCP ?= $(E2E_DATA_DIR)/e2e_conf_hcp.yaml
KUBETEST_CONF_PATH ?= $(abspath $(E2E_DATA_DIR)/kubetest/conformance.yaml)
KUBETEST_FAST_CONF_PATH ?= $(abspath $(E2E_DATA_DIR)/kubetest/conformance-fast.yaml)
GO_INSTALL := ./scripts/go_install.sh
Expand Down Expand Up @@ -184,7 +185,10 @@ e2e-templates: $(addprefix $(E2E_NO_ARTIFACT_TEMPLATES_DIR)/, \
cluster-template-flatcar.yaml \
cluster-template-k8s-upgrade.yaml \
cluster-template-flatcar-sysext.yaml \
cluster-template-no-bastion.yaml)
cluster-template-no-bastion.yaml \
cluster-template-hcp-management.yaml \
cluster-template-hcp-workload.yaml \
cluster-template-hcp-broken.yaml)
# Currently no templates that require CI artifacts
# $(addprefix $(E2E_TEMPLATES_DIR)/, add-templates-here.yaml) \

Expand All @@ -205,7 +209,7 @@ test-e2e: $(GINKGO) e2e-prerequisites ## Run e2e tests
time $(GINKGO) -fail-fast -trace -timeout=3h -show-node-events -v -tags=e2e -nodes=$(E2E_GINKGO_PARALLEL) \
--output-dir="$(ARTIFACTS)" --junit-report="junit.e2e_suite.1.xml" \
-focus="$(E2E_GINKGO_FOCUS)" $(_SKIP_ARGS) $(E2E_GINKGO_ARGS) ./test/e2e/suites/e2e/... -- \
-config-path="$(E2E_CONF_PATH)" -artifacts-folder="$(ARTIFACTS)" \
-config-path="$(E2E_CONF_PATH_HCP)" -artifacts-folder="$(ARTIFACTS)" \
-data-folder="$(E2E_DATA_DIR)" $(E2E_ARGS)

# Pre-compile tests
Expand All @@ -215,7 +219,7 @@ build-e2e-tests: $(GINKGO)
$(GINKGO) build -tags=e2e ./test/e2e/suites/e2e/...

.PHONY: e2e-image
e2e-image: CONTROLLER_IMG_TAG = "gcr.io/k8s-staging-capi-openstack/capi-openstack-controller:e2e"
e2e-image: CONTROLLER_IMG_TAG = "ghcr.io/orkhanorganization/k8s-staging-capi-openstack/capi-openstack-controller:e2e"
e2e-image: docker-build

# Pull all the images references in test/e2e/data/e2e_conf.yaml
Expand All @@ -236,6 +240,18 @@ test-conformance: $(GINKGO) e2e-prerequisites ## Run clusterctl based conformanc
test-conformance-fast: ## Run clusterctl based conformance test on workload cluster (requires Docker) using a subset of the conformance suite in parallel.
$(MAKE) test-conformance CONFORMANCE_E2E_ARGS="-kubetest.config-file=$(KUBETEST_FAST_CONF_PATH) -kubetest.ginkgo-nodes=5 $(E2E_ARGS)"

HCP_E2E_ARGS ?=
HCP_E2E_ARGS += $(E2E_ARGS)
.PHONY: test-hcp
test-hcp: $(GINKGO) e2e-prerequisites ## Run HCP (Hosted Control Plane) e2e tests
time $(GINKGO) -fail-fast -trace -timeout=3h -show-node-events -v -tags=e2e -nodes=$(E2E_GINKGO_PARALLEL) \
--output-dir="$(ARTIFACTS)" --junit-report="junit.hcp_suite.1.xml" \
-focus="$(E2E_GINKGO_FOCUS)" $(_SKIP_ARGS) $(E2E_GINKGO_ARGS) ./test/e2e/suites/hcp/... -- \
-config-path="$(E2E_CONF_PATH_HCP)" -artifacts-folder="$(ARTIFACTS)" \
-data-folder="$(E2E_DATA_DIR)" $(HCP_E2E_ARGS)



APIDIFF_OLD_COMMIT ?= $(shell git rev-parse origin/main)

.PHONY: apidiff
Expand Down
33 changes: 20 additions & 13 deletions controllers/openstackmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -521,20 +521,16 @@ func openStackMachineSpecToOpenStackServerSpec(openStackMachineSpec *infrav1.Ope
serverPorts = make([]infrav1.PortOpts, 1)
}
for i := range serverPorts {
if serverPorts[i].Network == nil {
serverPorts[i].Network = &infrav1.NetworkParam{
ID: &defaultNetworkID,
}
}
if len(serverPorts[i].SecurityGroups) == 0 && defaultSecGroup != nil {
serverPorts[i].SecurityGroups = []infrav1.SecurityGroupParam{
{
ID: defaultSecGroup,
},
}
// Only inject the default network when we actually have an ID.
if serverPorts[i].Network == nil && defaultNetworkID != "" {
serverPorts[i].Network = &infrav1.NetworkParam{ID: &defaultNetworkID}
}
if len(openStackMachineSpec.SecurityGroups) > 0 {
serverPorts[i].SecurityGroups = append(serverPorts[i].SecurityGroups, openStackMachineSpec.SecurityGroups...)
// Machine level security groups override any cluster defaults.
serverPorts[i].SecurityGroups = openStackMachineSpec.SecurityGroups
} else if len(serverPorts[i].SecurityGroups) == 0 && defaultSecGroup != nil {
// Fall back to cluster-managed security group when nothing else specified.
serverPorts[i].SecurityGroups = []infrav1.SecurityGroupParam{{ID: defaultSecGroup}}
}
}
openStackServerSpec.Ports = serverPorts
Expand Down Expand Up @@ -588,7 +584,18 @@ func (r *OpenStackMachineReconciler) getOrCreateMachineServer(ctx context.Contex
}
return openStackCluster.Spec.IdentityRef
}()
machineServerSpec := openStackMachineSpecToOpenStackServerSpec(&openStackMachine.Spec, identityRef, compute.InstanceTags(&openStackMachine.Spec, openStackCluster), failureDomain, userDataRef, getManagedSecurityGroup(openStackCluster, machine), openStackCluster.Status.Network.ID)
// Determine default network ID if the cluster status exposes one.
var defaultNetworkID string
if openStackCluster.Status.Network != nil {
defaultNetworkID = openStackCluster.Status.Network.ID
}

// If no cluster network is available AND the machine spec did not define any ports with a network, we cannot choose a network.
if defaultNetworkID == "" && len(openStackMachine.Spec.Ports) == 0 {
return nil, capoerrors.Terminal(infrav1.InvalidMachineSpecReason, "no network configured: cluster network is missing and machine spec does not define ports with a network")
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I feel like this splits this logic across this function and openStackMachineSpecToOpenStackServerSpec. Did you consider putting this logic in openStackMachineSpecToOpenStackServerSpec and modifying its signature to return an error?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


machineServerSpec := openStackMachineSpecToOpenStackServerSpec(&openStackMachine.Spec, identityRef, compute.InstanceTags(&openStackMachine.Spec, openStackCluster), failureDomain, userDataRef, getManagedSecurityGroup(openStackCluster, machine), defaultNetworkID)
machineServer = &infrav1alpha1.OpenStackServer{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
Expand Down
249 changes: 218 additions & 31 deletions controllers/openstackmachine_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,20 +77,17 @@ func TestOpenStackMachineSpecToOpenStackServerSpec(t *testing.T) {
},
}
portOptsWithAdditionalSecurityGroup := []infrav1.PortOpts{
{
Network: &infrav1.NetworkParam{
ID: ptr.To(openStackCluster.Status.Network.ID),
},
SecurityGroups: []infrav1.SecurityGroupParam{
{
ID: ptr.To(openStackCluster.Status.WorkerSecurityGroup.ID),
},
{
ID: ptr.To(extraSecurityGroupUUID),
},
},
},
}
{
Network: &infrav1.NetworkParam{
ID: ptr.To(openStackCluster.Status.Network.ID),
},
SecurityGroups: []infrav1.SecurityGroupParam{
{
ID: ptr.To(extraSecurityGroupUUID),
},
},
},
}
image := infrav1.ImageParam{Filter: &infrav1.ImageFilter{Name: ptr.To("my-image")}}
tags := []string{"tag1", "tag2"}
userData := &corev1.LocalObjectReference{Name: "server-data-secret"}
Expand Down Expand Up @@ -158,27 +155,67 @@ func TestOpenStackMachineSpecToOpenStackServerSpec(t *testing.T) {
},
},
{
name: "Test an OpenStackMachineSpec to OpenStackServerSpec conversion with flavorID specified but not flavor",
spec: &infrav1.OpenStackMachineSpec{
FlavorID: ptr.To(flavorUUID),
Image: image,
SSHKeyName: sshKeyName,
},
want: &infrav1alpha1.OpenStackServerSpec{
FlavorID: ptr.To(flavorUUID),
IdentityRef: identityRef,
Image: image,
SSHKeyName: sshKeyName,
Ports: portOpts,
Tags: tags,
UserDataRef: userData,
},
},
name: "Test an OpenStackMachineSpec to OpenStackServerSpec conversion with flavorID specified but not flavor",
spec: &infrav1.OpenStackMachineSpec{
FlavorID: ptr.To(flavorUUID),
Image: image,
SSHKeyName: sshKeyName,
},
want: &infrav1alpha1.OpenStackServerSpec{
FlavorID: ptr.To(flavorUUID),
IdentityRef: identityRef,
Image: image,
SSHKeyName: sshKeyName,
Ports: portOpts,
Tags: tags,
UserDataRef: userData,
},
},
{
name: "Cluster network nil, machine defines port network and overrides SG",
spec: &infrav1.OpenStackMachineSpec{
Ports: []infrav1.PortOpts{{
Network: &infrav1.NetworkParam{ID: ptr.To(networkUUID)},
}},
SecurityGroups: []infrav1.SecurityGroupParam{{ID: ptr.To(extraSecurityGroupUUID)}},
},
want: &infrav1alpha1.OpenStackServerSpec{
IdentityRef: identityRef,
Ports: []infrav1.PortOpts{{
Network: &infrav1.NetworkParam{ID: ptr.To(networkUUID)},
SecurityGroups: []infrav1.SecurityGroupParam{{ID: ptr.To(extraSecurityGroupUUID)}},
}},
Tags: tags,
UserDataRef: userData,
},
},
{
name: "Cluster network nil, machine defines port network and falls back to cluster SG",
spec: &infrav1.OpenStackMachineSpec{
Ports: []infrav1.PortOpts{{
Network: &infrav1.NetworkParam{ID: ptr.To(networkUUID)},
}},
},
want: &infrav1alpha1.OpenStackServerSpec{
IdentityRef: identityRef,
Ports: []infrav1.PortOpts{{
Network: &infrav1.NetworkParam{ID: ptr.To(networkUUID)},
SecurityGroups: []infrav1.SecurityGroupParam{{ID: ptr.To(workerSecurityGroupUUID)}},
}},
Tags: tags,
UserDataRef: userData,
},
},
}
for i := range tests {
tt := tests[i]
t.Run(tt.name, func(t *testing.T) {
spec := openStackMachineSpecToOpenStackServerSpec(tt.spec, identityRef, tags, "", userData, &openStackCluster.Status.WorkerSecurityGroup.ID, openStackCluster.Status.Network.ID)
defaultNetID := ""
if openStackCluster.Status.Network != nil {
defaultNetID = openStackCluster.Status.Network.ID
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More evidence of what I was saying above: this duplicates part of the functionality in the test. If we did this in openStackMachineSpecToOpenStackServerSpec we could just test it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


spec := openStackMachineSpecToOpenStackServerSpec(tt.spec, identityRef, tags, "", userData, &openStackCluster.Status.WorkerSecurityGroup.ID, defaultNetID)
if !reflect.DeepEqual(spec, tt.want) {
t.Errorf("openStackMachineSpecToOpenStackServerSpec() got = %+v, want %+v", spec, tt.want)
}
Expand Down Expand Up @@ -224,3 +261,153 @@ func TestGetPortIDs(t *testing.T) {
})
}
}

func TestOpenStackMachineSpecToOpenStackServerSpec_NilNetworkCases(t *testing.T) {
identityRef := infrav1.OpenStackIdentityReference{
Name: "foo",
CloudName: "my-cloud",
}
image := infrav1.ImageParam{Filter: &infrav1.ImageFilter{Name: ptr.To("my-image")}}
tags := []string{"tag1", "tag2"}
userData := &corev1.LocalObjectReference{Name: "server-data-secret"}

tests := []struct {
name string
openStackCluster *infrav1.OpenStackCluster
spec *infrav1.OpenStackMachineSpec
expectedDefaultNetID string
expectedPorts []infrav1.PortOpts
description string
}{
{
name: "Empty cluster network ID, machine defines explicit ports",
openStackCluster: &infrav1.OpenStackCluster{
Spec: infrav1.OpenStackClusterSpec{
ManagedSecurityGroups: &infrav1.ManagedSecurityGroups{},
},
Status: infrav1.OpenStackClusterStatus{
Network: &infrav1.NetworkStatusWithSubnets{
NetworkStatus: infrav1.NetworkStatus{
ID: "", // Empty network ID
},
},
WorkerSecurityGroup: &infrav1.SecurityGroupStatus{
ID: workerSecurityGroupUUID,
},
},
},
spec: &infrav1.OpenStackMachineSpec{
Flavor: ptr.To(flavorName),
Image: image,
Ports: []infrav1.PortOpts{{
Network: &infrav1.NetworkParam{ID: ptr.To(networkUUID)},
}},
},
expectedDefaultNetID: "", // Empty because cluster network ID is empty
expectedPorts: []infrav1.PortOpts{{
Network: &infrav1.NetworkParam{ID: ptr.To(networkUUID)},
SecurityGroups: []infrav1.SecurityGroupParam{{
ID: ptr.To(workerSecurityGroupUUID),
}},
}},
description: "Should work when cluster has empty network ID but machine defines ports",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Verify the default network ID extraction logic
var defaultNetworkID string
if tt.openStackCluster.Status.Network != nil {
defaultNetworkID = tt.openStackCluster.Status.Network.ID
}

if defaultNetworkID != tt.expectedDefaultNetID {
t.Errorf("Expected defaultNetworkID = %q, got %q", tt.expectedDefaultNetID, defaultNetworkID)
}

// Test the spec conversion
var managedSecurityGroupID *string
if tt.openStackCluster.Status.WorkerSecurityGroup != nil {
managedSecurityGroupID = &tt.openStackCluster.Status.WorkerSecurityGroup.ID
}

spec := openStackMachineSpecToOpenStackServerSpec(
tt.spec,
identityRef,
tags,
"", // failureDomain
userData,
managedSecurityGroupID,
defaultNetworkID,
)

if !reflect.DeepEqual(spec.Ports, tt.expectedPorts) {
t.Errorf("Expected ports = %+v, got %+v", tt.expectedPorts, spec.Ports)
}
})
}
}

func TestValidateNetworkConfiguration(t *testing.T) {
tests := []struct {
name string
clusterNetworkID string
machinePortsCount int
expectError bool
expectedErrorMsg string
description string
}{
{
name: "Valid: cluster has network, machine has no explicit ports",
clusterNetworkID: networkUUID,
machinePortsCount: 0,
expectError: false,
description: "Should succeed when cluster provides default network",
},
{
name: "Valid: no cluster network, machine defines explicit ports",
clusterNetworkID: "",
machinePortsCount: 1,
expectError: false,
description: "Should succeed when machine defines its own networking",
},
{
name: "Invalid: no cluster network, no machine ports",
clusterNetworkID: "",
machinePortsCount: 0,
expectError: true,
expectedErrorMsg: "no network configured: cluster network is missing and machine spec does not define ports with a network",
description: "Should fail with terminal error when no networking is configured anywhere",
},
{
name: "Valid: cluster network and machine ports both defined",
clusterNetworkID: networkUUID,
machinePortsCount: 2,
expectError: false,
description: "Should succeed when both cluster and machine define networking",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Simulate the validation logic from the controller
hasClusterNetwork := tt.clusterNetworkID != ""
hasMachinePorts := tt.machinePortsCount > 0

shouldFail := !hasClusterNetwork && !hasMachinePorts

if shouldFail != tt.expectError {
t.Errorf("Expected error: %v, but validation result: %v", tt.expectError, shouldFail)
}

if tt.expectError && shouldFail {
// In the real controller, this would be a terminal error
actualError := "no network configured: cluster network is missing and machine spec does not define ports with a network"
if actualError != tt.expectedErrorMsg {
t.Errorf("Expected error message: %q, got: %q", tt.expectedErrorMsg, actualError)
}
}
})
}
}
2 changes: 2 additions & 0 deletions docs/book/src/development/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -562,3 +562,5 @@ kubectl get openstackservers
```

This object is immutable and is created by the controller when a machine or a bastion is created. The `OpenStackServer` object is deleted when the machine or the bastion is deleted.


Loading