Skip to content

Commit a6c9cb4

Browse files
Nightly Integration tests for self-hosted infrastructure.
1 parent 7045c13 commit a6c9cb4

File tree

12 files changed

+555
-68
lines changed

12 files changed

+555
-68
lines changed

.github/workflows/integration-tests.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
strategy:
1313
fail-fast: false
1414
matrix:
15-
provider: [k3d, gcp]
15+
provider: [kind, gcp]
1616
timeout-minutes: 90
1717
permissions:
1818
contents: 'read'
@@ -43,7 +43,7 @@ jobs:
4343
USE_GKE_GCLOUD_AUTH_PLUGIN: True
4444
run: |
4545
set -euo pipefail
46-
make test/e2e/multi-region | tee test_output.log
46+
make test/nightly-e2e/multi-region | tee test_output.log
4747
- name: Archive test results
4848
if: ${{ always() }}
4949
uses: actions/upload-artifact@v4
@@ -130,7 +130,7 @@ jobs:
130130
strategy:
131131
fail-fast: false
132132
matrix:
133-
provider: [k3d, gcp]
133+
provider: [kind, gcp]
134134
timeout-minutes: 90
135135
permissions:
136136
contents: read

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,10 @@ test/e2e/%: bin/cockroach bin/kubectl bin/helm build/self-signer test/cluster/up
120120
$(MAKE) test/cluster/down; \
121121
exit $${EXIT_CODE:-0}
122122

123-
test/e2e/multi-region: bin/cockroach bin/kubectl bin/helm build/self-signer bin/k3d
123+
test/e2e/multi-region: bin/cockroach bin/kubectl bin/helm build/self-signer bin/k3d bin/kind
124124
@PATH="$(PWD)/bin:${PATH}" go test -timeout 60m -v -test.run TestOperatorInMultiRegion ./tests/e2e/operator/multiRegion/... || (echo "Multi region tests failed with exit code $$?" && exit 1)
125125

126-
test/e2e/single-region: bin/cockroach bin/kubectl bin/helm build/self-signer bin/k3d
126+
test/e2e/single-region: bin/cockroach bin/kubectl bin/helm build/self-signer bin/k3d bin/kind
127127
@PATH="$(PWD)/bin:${PATH}" go test -timeout 60m -v -test.run TestOperatorInSingleRegion ./tests/e2e/operator/singleRegion/... || (echo "Single region tests failed with exit code $$?" && exit 1)
128128

129129
test/e2e/migrate: bin/cockroach bin/kubectl bin/helm bin/migration-helper build/self-signer test/cluster/up/3

build/templates/cockroachdb-parent/charts/operator/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ image:
99
# pullPolicy specifies the image pull policy.
1010
pullPolicy: IfNotPresent
1111
# tag is the image tag.
12-
tag: "6f62639e9fee99d99b0387a9dccda84daa1a489b592b008f2f354ec57eae09ac"
12+
tag: "24cc995b44f2ff4a2c30b8290d229602c8b5ad90c74d0843c246f701e30db872"
1313
# certificate defines the certificate settings for the Operator.
1414
certificate:
1515
# validForDays specifies the number of days the certificate is valid for.

cockroachdb-parent/charts/operator/values.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
# image captures the container image settings for Operator pods.
55
image:
66
# registry is the container registry where the image is stored.
7-
registry: "us-docker.pkg.dev/cockroach-cloud-images/development"
7+
registry: "us-docker.pkg.dev/releases-prod/self-hosted"
88
# repository defines the image repository.
9-
repository: "cockroach-operator@sha256"
9+
repository: "cockroachdb-operator@sha256"
1010
# pullPolicy specifies the image pull policy.
1111
pullPolicy: IfNotPresent
1212
# tag is the image tag.
13-
tag: "72844b85354fd55b9a487abbd6e253b7d5081f65513c3813fde0ceb7d3ee2f70"
13+
tag: "24cc995b44f2ff4a2c30b8290d229602c8b5ad90c74d0843c246f701e30db872"
1414
# certificate defines the certificate settings for the Operator.
1515
certificate:
1616
# validForDays specifies the number of days the certificate is valid for.

tests/e2e/operator/infra/common.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ import (
1818

1919
// Provider types.
2020
const (
21-
ProviderK3D = "k3d"
22-
ProviderGCP = "gcp"
21+
ProviderK3D = "k3d"
22+
ProviderKind = "kind"
23+
ProviderGCP = "gcp"
2324
)
2425

2526
// Common constants.
@@ -49,8 +50,9 @@ const (
4950

5051
// RegionCodes maps provider types to their region codes
5152
var RegionCodes = map[string][]string{
52-
ProviderK3D: {"us-east1", "us-east2"},
53-
ProviderGCP: {"us-central1", "us-east1"},
53+
ProviderK3D: {"us-east1", "us-east2"},
54+
ProviderKind: {"us-east1", "us-east2"},
55+
ProviderGCP: {"us-central1", "us-east1"},
5456
}
5557

5658
// LoadBalancerAnnotations contains provider-specific service annotations.
@@ -60,7 +62,8 @@ var LoadBalancerAnnotations = map[string]map[string]string{
6062
"networking.gke.io/load-balancer-type": "Internal",
6163
"cloud.google.com/load-balancer-type": "Internal",
6264
},
63-
ProviderK3D: {},
65+
ProviderK3D: {},
66+
ProviderKind: {},
6467
}
6568

6669
// NetworkConfigs defines standard network configurations for each provider and region.

tests/e2e/operator/infra/local.go

Lines changed: 153 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"os"
7+
"strings"
78
"testing"
89
"time"
910

@@ -24,19 +25,21 @@ import (
2425
"sigs.k8s.io/controller-runtime/pkg/client/config"
2526
)
2627

27-
// LocalRegion implements CloudProvider for a local Kubernetes provider (K3d)
28+
// LocalRegion implements CloudProvider for local Kubernetes providers (K3d and Kind)
2829
type LocalRegion struct {
2930
*operator.Region
30-
// "k3d"
31+
// "k3d" or "kind"
3132
ProviderType string
3233
}
3334

34-
// SetUpInfra Creates local k3d clusters, deploy CNI, deploy coredns in each cluster.
35+
// SetUpInfra Creates local k3d and kind clusters, deploy CNI, deploy coredns in each cluster.
3536
//
3637
// Multi-region networking approach:
3738
// - K3D: Calico CNI with BGP for cross-cluster pod routing, built-in ServiceLB for LBs.
39+
// - Kind: default kindnet for in-cluster, Calico objects + BGP peering to advertise
40+
// pod/service CIDRs between clusters. MetalLB for CoreDNS LBs.
3841
// - CoreDNS instances forward requests for other cluster domains; endpoints can be
39-
// ClusterIP/pod IPs.
42+
// ClusterIP/pod IPs (with Calico) or LB IPs (with MetalLB).
4043
func (r *LocalRegion) SetUpInfra(t *testing.T) {
4144
// If using existing infra return clients.
4245
if r.ReusingInfra {
@@ -70,8 +73,8 @@ func (r *LocalRegion) SetUpInfra(t *testing.T) {
7073

7174
kubectlOptions := k8s.NewKubectlOptions(cluster, kubeConfig, coreDNSNamespace)
7275

73-
// Install Calico for multi-cluster networking (k3d)
74-
if r.ProviderType == ProviderK3D {
76+
// Install Calico for multi-cluster networking
77+
if r.ProviderType == ProviderK3D || r.ProviderType == ProviderKind {
7578
calico.RegisterCalicoGVK(k8sClient.Scheme())
7679
objects := calico.K3DCalicoCNI(calico.K3dClusterBGPConfig{
7780
AddressAllocation: i,
@@ -99,6 +102,14 @@ func (r *LocalRegion) SetUpInfra(t *testing.T) {
99102
})
100103
require.NoError(t, err)
101104

105+
// For Kind, install MetalLB before creating LoadBalancer services
106+
// Calico+BGP provides pod routing; MetalLB provides external LB IPs when needed
107+
if r.ProviderType == ProviderKind {
108+
// Install MetalLB with Docker network IPs
109+
err = r.installMetalLBWithDockerIPs(t, kubectlOptions, i)
110+
require.NoError(t, err)
111+
}
112+
102113
// Create a CoreDNS service.
103114
service := coredns.CoreDNSService(nil, GetLoadBalancerAnnotations(r.ProviderType))
104115
serviceYaml := coredns.ToYAML(t, service)
@@ -153,8 +164,8 @@ func (r *LocalRegion) SetUpInfra(t *testing.T) {
153164
r.Clients = clients
154165
r.ReusingInfra = true
155166

156-
// BGP peering setup for multi-region Calico setups (K3D)
157-
if r.IsMultiRegion && (r.ProviderType == ProviderK3D) {
167+
// BGP peering setup for multi-region Calico setups
168+
if r.IsMultiRegion && (r.ProviderType == ProviderK3D || r.ProviderType == ProviderKind) {
158169
netConfig := calico.K3dCalicoBGPPeeringOptions{
159170
ClusterConfig: map[string]calico.K3dClusterBGPConfig{},
160171
}
@@ -197,6 +208,15 @@ func (r *LocalRegion) TeardownInfra(t *testing.T) {
197208
},
198209
WorkingDir: testutil.GetGitRoot(),
199210
}
211+
case ProviderKind:
212+
cmd = shell.Command{
213+
Command: "bash",
214+
Args: []string{
215+
"tests/kind/dev-multi-cluster-kind.sh",
216+
"down",
217+
},
218+
WorkingDir: testutil.GetGitRoot(),
219+
}
200220
default:
201221
t.Logf("[%s] Unknown provider type for teardown", r.ProviderType)
202222
return
@@ -236,6 +256,8 @@ func (r *LocalRegion) setupNetworking(t *testing.T, ctx context.Context, region
236256
switch r.ProviderType {
237257
case ProviderK3D:
238258
labelSelector = fmt.Sprintf("%s=%s", "node-role.kubernetes.io/master", "true")
259+
case ProviderKind:
260+
labelSelector = fmt.Sprintf("%s=%s", "node-role.kubernetes.io/control-plane", "")
239261
default:
240262
return fmt.Errorf("unknown provider type: %s", r.ProviderType)
241263
}
@@ -266,7 +288,7 @@ func (r *LocalRegion) setupNetworking(t *testing.T, ctx context.Context, region
266288
return nil
267289
}
268290

269-
// createLocalCluster creates a new local cluster (k3d)
291+
// createLocalCluster creates a new local cluster (k3d or kind)
270292
// by calling the appropriate shell command.
271293
func (r *LocalRegion) createLocalCluster(t *testing.T, clusterName string, nodeCount int) error {
272294
t.Logf("[%s] Creating new %s cluster: %s with %d nodes", r.ProviderType, r.ProviderType, clusterName, nodeCount)
@@ -281,6 +303,18 @@ func (r *LocalRegion) createLocalCluster(t *testing.T, clusterName string, nodeC
281303
},
282304
WorkingDir: testutil.GetGitRoot(),
283305
}
306+
case ProviderKind:
307+
cmd = shell.Command{
308+
Command: "bash",
309+
Args: []string{
310+
"tests/kind/dev-multi-cluster-kind.sh",
311+
"up",
312+
"--name=chart-testing",
313+
fmt.Sprintf("--nodes=%d", nodeCount),
314+
fmt.Sprintf("--clusters=%d", len(r.Clusters)),
315+
},
316+
WorkingDir: testutil.GetGitRoot(),
317+
}
284318
default:
285319
return fmt.Errorf("unknown provider type: %s", r.ProviderType)
286320
}
@@ -297,3 +331,113 @@ func (r *LocalRegion) createLocalCluster(t *testing.T, clusterName string, nodeC
297331
t.Logf("[%s] Successfully created new %s cluster: %s", r.ProviderType, r.ProviderType, clusterName)
298332
return nil
299333
}
334+
335+
// installMetalLBWithDockerIPs installs MetalLB in a Kind cluster and configures it with
336+
// auto-detected Docker network IPs.
337+
func (r *LocalRegion) installMetalLBWithDockerIPs(t *testing.T, kubectlOptions *k8s.KubectlOptions, clusterIndex int) error {
338+
t.Logf("Installing MetalLB for cluster %s with Docker network IPs", kubectlOptions.ContextName)
339+
340+
// Create kubectl options for MetalLB namespace
341+
kubectlOptionsMetallb := k8s.NewKubectlOptions(kubectlOptions.ContextName, kubectlOptions.ConfigPath, "metallb-system")
342+
343+
// 1. Install MetalLB using official manifests
344+
metallbManifest := "https://raw.githubusercontent.com/metallb/metallb/v0.15.2/config/manifests/metallb-native.yaml"
345+
346+
t.Logf("Applying MetalLB manifests from %s", metallbManifest)
347+
err := k8s.RunKubectlE(t, kubectlOptionsMetallb, "apply", "-f", metallbManifest)
348+
if err != nil {
349+
return fmt.Errorf("failed to apply MetalLB manifests: %w", err)
350+
}
351+
352+
// 2. Wait for MetalLB controller and speaker to be ready
353+
t.Log("Waiting for MetalLB controller deployment to be ready")
354+
_, err = retry.DoWithRetryE(t, "wait for metallb controller", defaultRetries, defaultRetryInterval,
355+
func() (string, error) {
356+
return k8s.RunKubectlAndGetOutputE(t, kubectlOptionsMetallb,
357+
"wait", "--for=condition=Available", "deployment/controller", "--timeout=120s")
358+
})
359+
if err != nil {
360+
return fmt.Errorf("MetalLB controller failed to become ready: %w", err)
361+
}
362+
363+
t.Log("Waiting for MetalLB speaker daemonset to be ready")
364+
_, err = retry.DoWithRetryE(t, "wait for metallb speaker", defaultRetries, defaultRetryInterval,
365+
func() (string, error) {
366+
return k8s.RunKubectlAndGetOutputE(t, kubectlOptionsMetallb,
367+
"wait", "--for=condition=Ready", "pod", "-l", "app=metallb,component=speaker", "--timeout=120s")
368+
})
369+
if err != nil {
370+
return fmt.Errorf("MetalLB speaker failed to become ready: %w", err)
371+
}
372+
373+
// Note: strictARP (needed for kube-proxy IPVS) is not required for Kind's default iptables mode.
374+
// 3. Auto-detect Docker network subnet for the shared multi-cluster network
375+
networkName := "kind-chart-testing"
376+
t.Logf("Detecting Docker network subnet for %s", networkName)
377+
378+
cmd := shell.Command{
379+
Command: "docker",
380+
Args: []string{
381+
"network", "inspect", networkName,
382+
"--format", "{{(index .IPAM.Config 0).Subnet}}",
383+
},
384+
}
385+
386+
output, err := shell.RunCommandAndGetOutputE(t, cmd)
387+
if err != nil {
388+
return fmt.Errorf("failed to detect Docker network subnet for %s: %w", networkName, err)
389+
}
390+
391+
subnet := strings.TrimSpace(output)
392+
t.Logf("Detected Docker subnet for cluster %d: %s", clusterIndex, subnet)
393+
394+
// 4. Parse subnet and create a unique per-cluster IP range from the high end
395+
// Example: 172.20.0.0/16 -> cluster 0: 172.20.255.200-172.20.255.214, cluster 1: 215-229, cluster 2: 230-244
396+
parts := strings.Split(subnet, ".")
397+
if len(parts) != 4 {
398+
return fmt.Errorf("invalid subnet format: %s", subnet)
399+
}
400+
401+
// Compute a non-overlapping range per cluster index within the .255.x space
402+
rangeStart := 200 + (clusterIndex * 15)
403+
if rangeStart > 254 {
404+
rangeStart = 240
405+
}
406+
rangeEnd := rangeStart + 14
407+
if rangeEnd > 254 {
408+
rangeEnd = 254
409+
}
410+
ipRange := fmt.Sprintf("%s.%s.%s.%d-%s.%s.%s.%d", parts[0], parts[1], "255", rangeStart, parts[0], parts[1], "255", rangeEnd)
411+
t.Logf("MetalLB IP address pool: %s", ipRange)
412+
413+
// 5. Apply MetalLB IPAddressPool and L2Advertisement
414+
ipPoolYAML := fmt.Sprintf(`
415+
apiVersion: metallb.io/v1beta1
416+
kind: IPAddressPool
417+
metadata:
418+
name: kind-pool-%d
419+
namespace: metallb-system
420+
spec:
421+
addresses:
422+
- %s
423+
---
424+
apiVersion: metallb.io/v1beta1
425+
kind: L2Advertisement
426+
metadata:
427+
name: kind-l2adv-%d
428+
namespace: metallb-system
429+
spec:
430+
ipAddressPools:
431+
- kind-pool-%d
432+
`, clusterIndex, ipRange, clusterIndex, clusterIndex)
433+
434+
t.Log("Applying MetalLB IPAddressPool and L2Advertisement config")
435+
err = k8s.KubectlApplyFromStringE(t, kubectlOptionsMetallb, ipPoolYAML)
436+
if err != nil {
437+
return fmt.Errorf("failed to apply MetalLB IP pool configuration: %w", err)
438+
}
439+
440+
t.Logf("Successfully installed and configured MetalLB for cluster %s", kubectlOptions.ContextName)
441+
t.Logf("MetalLB installed with IP pool: %s", ipRange)
442+
return nil
443+
}

tests/e2e/operator/infra/provider.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ func ProviderFactory(providerType string, region *operator.Region) CloudProvider
3131
provider := LocalRegion{Region: region, ProviderType: ProviderK3D}
3232
provider.RegionCodes = GetRegionCodes(providerType)
3333
return &provider
34+
case ProviderKind:
35+
provider := LocalRegion{Region: region, ProviderType: ProviderKind}
36+
provider.RegionCodes = GetRegionCodes(providerType)
37+
return &provider
3438
case ProviderGCP:
3539
provider := GcpRegion{Region: region}
3640
provider.RegionCodes = GetRegionCodes(providerType)

0 commit comments

Comments
 (0)