diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 00000000000..9264a2cbd4c --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,13 @@ +{ + "permissions": { + "allow": [ + "WebFetch(domain:rhcos.mirror.openshift.com)", + "WebFetch(domain:mirror.openshift.com)", + "WebFetch(domain:registry.ci.openshift.org)" + ], + "deny": [ + "WebFetch(domain:vcenter.test)" + ], + "ask": [] + } +} diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000000..3a81360db9c --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,257 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +This is the OpenShift Installer, a tool that deploys OpenShift clusters across multiple cloud platforms (AWS, Azure, GCP, vSphere, bare metal, etc.). The installer generates Ignition configs for bootstrap, control plane, and worker nodes, and can optionally provision the underlying infrastructure. + +## Quick Reference Documentation + +- **Getting Started**: See [README.md](README.md) for quick start guide +- **Contributing**: See [CONTRIBUTING.md](CONTRIBUTING.md) for contribution workflow, linting, testing, and commit message format +- **Build Dependencies**: See [docs/dev/dependencies.md](docs/dev/dependencies.md) for required system packages and Go version + +## Build and Development Commands + +### Building the Installer + +```sh +# Build the openshift-install binary +hack/build.sh + +# Skip Terraform build (faster) +SKIP_TERRAFORM=y hack/build.sh + +# Development build (with debugging symbols) +MODE=dev hack/build.sh +``` + +The binary is output to `bin/openshift-install`. + +### Testing + +```sh +# Run unit tests +hack/go-test.sh + +# Run specific tests with additional arguments +hack/go-test.sh -v -run TestSpecificTest + +# Run integration tests +hack/go-integration-test.sh + +# Run node joiner integration tests +hack/go-integration-test-nodejoiner.sh +``` + +### Linting and Formatting + +See [CONTRIBUTING.md](CONTRIBUTING.md#contribution-flow) for the complete list of linters to run before submitting a PR. Quick reference: + +```sh +# Format Go code and organize imports +hack/go-fmt.sh . + +# Run Go linter +hack/go-lint.sh $(go list -f '{{ .ImportPath }}' ./...) + +# Run Go vet +hack/go-vet.sh ./... + +# Check shell scripts +hack/shellcheck.sh + +# Format Terraform files +hack/tf-fmt.sh -list -check + +# Lint Terraform +hack/tf-lint.sh + +# Lint YAML files +hack/yaml-lint.sh +``` + +### Generating Code + +```sh +# Regenerate mocks for unit tests +hack/go-genmock.sh + +# Update install config CRD (after bumping github.com/openshift/api) +go generate ./pkg/types/installconfig.go +``` + +## Architecture + +### Asset-Based Architecture + +The installer uses a dependency-graph architecture where everything is an "Asset". See [docs/design/assetgeneration.md](docs/design/assetgeneration.md) for complete details. + +Key points: +- **Asset**: Interface with `Dependencies()`, `Generate()`, and `Name()` methods +- **WritableAsset**: Assets that can be written to disk and loaded +- Main assets in `pkg/asset/`: install-config, manifests, ignition-configs, cluster + +### Cluster API Integration + +The installer uses Cluster API (CAPI) controllers running in a local control plane. See [docs/dev/cluster-api.md](docs/dev/cluster-api.md) for complete details. + +Key points: +- Local `kube-apiserver` and `etcd` run via envtest +- Platform-specific infrastructure providers in `cluster-api/providers/` +- Build CAPI binaries with `hack/build-cluster-api.sh` (called automatically by `hack/build.sh`) + +### Platform Types + +Platform-specific logic lives in `pkg/types//`: +- Platform type definitions +- Validation logic in `validation/` +- Default values in `defaults/` + +Supported platforms: AWS, Azure, GCP, vSphere, OpenStack, IBM Cloud, Power VS, Nutanix, bare metal. + +### Bootstrap Process + +The installer creates a temporary bootstrap machine that: +1. Hosts resources for control plane machines to boot +2. Forms initial etcd cluster with control plane nodes +3. Starts temporary Kubernetes control plane +4. Schedules production control plane on control plane machines +5. Injects OpenShift components +6. Shuts down once cluster is self-hosting + +## Dependency Management + +See [docs/dev/dependencies.md](docs/dev/dependencies.md) for complete dependency management instructions including: +- Adding/updating Go dependencies with `go get`, `go mod tidy`, `go mod vendor` +- Updating CAPI provider dependencies (detailed multi-step process) +- Special case: updating after bumping `github.com/openshift/api` + +**Important**: Always commit vendored code in a separate commit from functional changes. + +## Commit Message Format + +See [CONTRIBUTING.md](CONTRIBUTING.md#commit-message-format) for the complete format specification. + +Quick reference: +``` +: + + + +Fixes # +``` + +Common subsystems: +- `baremetal`, `vsphere`, `aws`, `azure`, `gcp`, etc. - for platform-specific changes +- `agent`, `ibi` (image-based installer) - for installation method changes +- `terraform`, `cluster-api` - for infrastructure provider changes +- `docs` - for documentation changes +- `unit tests`, `integration tests` - for test-only changes (makes it clear the change doesn't affect the actual installer) + +## Testing Approach + +The installer has different types of tests with varying external requirements: + +### Pure Unit Tests + +Most tests in `pkg/` are pure unit tests that test Go code logic without external dependencies. These can be run with: + +```sh +go test ./pkg/... +``` + +These tests should pass in any environment with Go installed. + +### Integration Tests with External Requirements + +Some test files have external dependencies and will fail without specific tools installed: + +#### Node Joiner Integration Tests +- **Location**: `cmd/node-joiner/*_integration_test.go` +- **Requirements**: + - Kubernetes test binaries (etcd, kube-apiserver) via `setup-envtest` + - Uses `sigs.k8s.io/controller-runtime/pkg/envtest` to run a local control plane + - The test script automatically downloads the required binaries +- **Run with**: `hack/go-integration-test-nodejoiner.sh` (handles setup automatically) +- **Example test**: `TestNodeJoinerIntegration` +- **Note**: Running `go test` directly without the script will fail with "fork/exec .../etcd: no such file or directory" + +#### Agent Integration Tests +- **Location**: `cmd/openshift-install/*_integration_test.go` (tests with "Agent" in name) +- **Requirements**: + - `oc` binary (OpenShift CLI) in `$PATH` + - `nmstatectl` binary (for network state validation) + - Registry credentials for `registry.ci.openshift.org` (for full test pass) +- **Run with**: `hack/go-integration-test.sh` +- **Example test**: `TestAgentIntegration` + +**Installing oc**: Download and extract the OpenShift client tools from the official mirror: + +```sh +curl -L https://mirror.openshift.com/pub/openshift-v4/clients/ocp/stable/openshift-client-linux.tar.gz -o /tmp/oc.tar.gz +mkdir -p ~/.local/bin +tar -xzf /tmp/oc.tar.gz -C ~/.local/bin oc kubectl +rm /tmp/oc.tar.gz +``` + +Make sure `~/.local/bin` is in your `$PATH`. + +**Installing nmstatectl**: Some tests validate network configuration using nmstate. This requires running dnf outside the sandbox: + +```sh +sudo dnf install -y nmstate +``` + +Without `nmstatectl`, network configuration tests will fail with: + +``` +failed to validate network yaml for host 0, install nmstate package, exec: "nmstatectl": executable file not found in $PATH +``` + +**Note on registry credentials**: Many agent integration tests query the CI registry (`registry.ci.openshift.org`) to extract release image information. Without credentials, tests will fail with: + +``` +error: image "registry.ci.openshift.org/origin/release:4.21" not found: manifest unknown: manifest unknown +``` + +In CI environments, credentials are provided via the `AUTH_FILE` environment variable. + +#### General Integration Tests +- **Location**: `test/` +- **Requirements**: Various depending on the test (cloud credentials, network access, etc.) +- **Run with**: `hack/go-integration-test.sh` + +### Running Tests + +```sh +# Run all unit tests (via podman container with all dependencies) +hack/go-test.sh + +# Run unit tests directly (may skip integration tests if dependencies missing) +go test ./... + +# Run specific package tests +go test ./pkg/asset/... + +# Run integration tests (requires full environment setup) +hack/go-integration-test.sh + +# Run node joiner integration tests +hack/go-integration-test-nodejoiner.sh +``` + +### Test Environment Notes + +- **Preferred method**: Use `hack/go-test.sh` which runs tests in a podman container with all dependencies +- **Direct execution**: Running `go test` directly may skip integration tests if tools are missing +- Integration test failures due to missing tools (nmstatectl, kubebuilder, etc.) are expected in minimal environments +- All code in `./cmd/...`, `./data/...`, `./pkg/...` must have unit tests +- Use `hack/go-genmock.sh` to regenerate mocks when interfaces change + +## Important Notes + +- The installer consumes state from a directory (default: current directory) +- Pass `--dir` to specify asset directory for cluster creation/destruction +- Install config can be pre-created and reused across multiple clusters diff --git a/cmd/openshift-install/internal_integration_test.go b/cmd/openshift-install/internal_integration_test.go index 3a6f0135c59..9f22f007b83 100644 --- a/cmd/openshift-install/internal_integration_test.go +++ b/cmd/openshift-install/internal_integration_test.go @@ -112,6 +112,13 @@ func runIntegrationTest(t *testing.T, testFolder string) { } } e.Vars = append(e.Vars, fmt.Sprintf("RELEASE_IMAGE=%s", pullspec)) + // Pass through environment variables that may be set by the sandbox or CI + passthroughVars := []string{"XDG_CACHE_HOME", "HTTP_PROXY", "HTTPS_PROXY", "NO_PROXY", "http_proxy", "https_proxy", "no_proxy"} + for _, varName := range passthroughVars { + if value, ok := os.LookupEnv(varName); ok && value != "" { + e.Vars = append(e.Vars, fmt.Sprintf("%s=%s", varName, value)) + } + } // When AUTH_FILE is set in the CI integration-tests job if authFilePath, ok := os.LookupEnv("AUTH_FILE"); ok && authFilePath != "" { workDir := e.Getenv("WORK") diff --git a/cmd/openshift-install/testdata/agent/image/configurations/vsphere_with-credentials.txt b/cmd/openshift-install/testdata/agent/image/configurations/vsphere_with-credentials.txt index 3001aed2c8b..a2d53238b12 100644 --- a/cmd/openshift-install/testdata/agent/image/configurations/vsphere_with-credentials.txt +++ b/cmd/openshift-install/testdata/agent/image/configurations/vsphere_with-credentials.txt @@ -40,11 +40,11 @@ platform: - datacenters: - testDatacenter password: testPassword - server: vcenter.openshift.com + server: vcenter.test user: testUser failuredomains: - name: testfailureDomain - server: vcenter.openshift.com + server: vcenter.test region: testRegion topology: datacenter: testDatacenter @@ -69,7 +69,7 @@ apiVersion: extensions.hive.openshift.io/v1beta1 kind: AgentClusterInstall metadata: annotations: - agent-install.openshift.io/install-config-overrides: '{"platform":{"vsphere":{"vcenters":[{"server":"vcenter.openshift.com","user":"testUser","password":"testPassword","datacenters":["testDatacenter"]}],"failureDomains":[{"name":"testfailureDomain","region":"testRegion","zone":"testZone","server":"vcenter.openshift.com","topology":{"datacenter":"testDatacenter","computeCluster":"/testDatacenter/host/testComputecluster","networks":["testNetwork"],"datastore":"/testDatacenter/datastore/testDatastore","resourcePool":"/testDatacenter/host/testComputecluster/Resources","folder":"/testDatacenter/vm/testFolder"}}]}}}' + agent-install.openshift.io/install-config-overrides: '{"platform":{"vsphere":{"vcenters":[{"server":"vcenter.test","user":"testUser","password":"testPassword","datacenters":["testDatacenter"]}],"failureDomains":[{"name":"testfailureDomain","region":"testRegion","zone":"testZone","server":"vcenter.test","topology":{"datacenter":"testDatacenter","computeCluster":"/testDatacenter/host/testComputecluster","networks":["testNetwork"],"datastore":"/testDatacenter/datastore/testDatastore","resourcePool":"/testDatacenter/host/testComputecluster/Resources","folder":"/testDatacenter/vm/testFolder"}}]}}}' creationTimestamp: null name: ostest namespace: cluster0 diff --git a/cmd/openshift-install/testdata/agent/image/validations/vsphere_with-partial-credentials.txt b/cmd/openshift-install/testdata/agent/image/validations/vsphere_with-partial-credentials.txt index 2562bffa0f8..421b401e8dd 100644 --- a/cmd/openshift-install/testdata/agent/image/validations/vsphere_with-partial-credentials.txt +++ b/cmd/openshift-install/testdata/agent/image/validations/vsphere_with-partial-credentials.txt @@ -1,6 +1,6 @@ ! exec openshift-install agent create image --dir $WORK -stderr 'Invalid value: "diff.openshift.com": server does not exist in vcenters' +stderr 'Invalid value: "diff.vcenter.test": server does not exist in vcenters' stderr 'platform.vsphere.failureDomains\[0\].topology.folder: Required value: must specify a folder for agent-based installs' ! exists $WORK/agent.x86_64.iso @@ -38,11 +38,11 @@ platform: - datacenters: - testDatacenter password: testPassword - server: vcenter.openshift.com + server: vcenter.test user: testUser failuredomains: - name: testfailureDomain - server: diff.openshift.com + server: diff.vcenter.test region: testRegion topology: datacenter: testDatacenter diff --git a/pkg/asset/agent/installconfig_test.go b/pkg/asset/agent/installconfig_test.go index 7a1b778bf7e..78ad82fcbbc 100644 --- a/pkg/asset/agent/installconfig_test.go +++ b/pkg/asset/agent/installconfig_test.go @@ -519,15 +519,26 @@ platform: vsphere: apiVips: - 192.168.122.10 - vCenter: test.vcenter.com - username: testuser - password: testpassword - datacenter: testDatacenter - defaultDatastore: testDatastore + vcenters: + - server: vcenter.test + datacenters: + - testDatacenter + failureDomains: + - name: test-failure-domain + server: vcenter.test + region: test-region + zone: test-zone + topology: + datacenter: testDatacenter + computeCluster: "/testDatacenter/host/testCluster" + datastore: "/testDatacenter/datastore/testDatastore" + folder: "/testDatacenter/vm/testFolder" + networks: + - testNetwork pullSecret: "{\"auths\":{\"example.com\":{\"auth\":\"c3VwZXItc2VjcmV0Cg==\"}}}" `, expectedFound: false, - expectedError: `invalid install-config configuration: [platform.vsphere.ingressVIPs: Required value: must specify VIP for ingress, when VIP for API is set, platform.vsphere.ingressVIPs: Required value: must specify at least one VIP for the Ingress, platform.vsphere.failureDomains[0].topology.folder: Required value: must specify a folder for agent-based installs]`, + expectedError: `invalid install-config configuration: [platform.vsphere.ingressVIPs: Required value: must specify VIP for ingress, when VIP for API is set, platform.vsphere.ingressVIPs: Required value: must specify at least one VIP for the Ingress, platform.vsphere.vcenters[0].user: Required value: All credential fields are required if any one is specified, platform.vsphere.vcenters[0].password: Required value: All credential fields are required if any one is specified]`, }, { name: "apiVIPs are missing for vsphere platform", @@ -704,14 +715,12 @@ platform: apiVips: - 192.168.122.10 vcenters: - - server: test.vcenter.com - user: testuser - password: testpassword + - server: vcenter.test datacenters: - testDatacenter failureDomains: - name: testFailuredomain - server: test.vcenter.com + server: vcenter.test zone: testZone region: testRegion topology: @@ -724,7 +733,7 @@ platform: pullSecret: "{\"auths\":{\"example.com\":{\"auth\":\"c3VwZXItc2VjcmV0Cg==\"}}}" `, expectedFound: false, - expectedError: `invalid install-config configuration: [platform.vsphere.ingressVIPs: Required value: must specify VIP for ingress, when VIP for API is set, platform.vsphere.ingressVIPs: Required value: must specify at least one VIP for the Ingress]`, + expectedError: `invalid install-config configuration: [platform.vsphere.ingressVIPs: Required value: must specify VIP for ingress, when VIP for API is set, platform.vsphere.ingressVIPs: Required value: must specify at least one VIP for the Ingress, platform.vsphere.vcenters[0].user: Required value: All credential fields are required if any one is specified, platform.vsphere.vcenters[0].password: Required value: All credential fields are required if any one is specified]`, }, { name: "vcenter vSphere credentials are present but failureDomain server does not match", @@ -744,14 +753,12 @@ platform: ingressVips: - 192.168.122.11 vcenters: - - server: test.vcenter.com - user: testuser - password: testpassword + - server: vcenter.test datacenters: - testDatacenter failureDomains: - name: testFailuredomain - server: diff1.vcenter.com + server: diff1.vcenter.test zone: testZone region: testRegion topology: @@ -762,7 +769,7 @@ platform: networks: - testNetwork - name: testFailuredomain2 - server: diff2.vcenter.com + server: diff2.vcenter.test zone: testZone2 region: testRegion2 topology: @@ -775,7 +782,7 @@ platform: pullSecret: "{\"auths\":{\"example.com\":{\"auth\":\"c3VwZXItc2VjcmV0Cg==\"}}}" `, expectedFound: false, - expectedError: `invalid install-config configuration: [platform.vsphere.failureDomains.server: Invalid value: "diff1.vcenter.com": server does not exist in vcenters, platform.vsphere.failureDomains.server: Invalid value: "diff2.vcenter.com": server does not exist in vcenters]`, + expectedError: `invalid install-config configuration: [platform.vsphere.failureDomains.server: Invalid value: "diff1.vcenter.test": server does not exist in vcenters, platform.vsphere.failureDomains.server: Invalid value: "diff2.vcenter.test": server does not exist in vcenters, platform.vsphere.vcenters[0].user: Required value: All credential fields are required if any one is specified, platform.vsphere.vcenters[0].password: Required value: All credential fields are required if any one is specified]`, }, { name: "All required vSphere fields must be entered if some of them are entered - deprecated fields", @@ -794,7 +801,7 @@ platform: - 192.168.122.10 ingressVips: - 192.168.122.11 - vCenter: test.vcenter.com + vCenter: vcenter.test pullSecret: "{\"auths\":{\"example.com\":{\"auth\":\"c3VwZXItc2VjcmV0Cg==\"}}}" `, expectedFound: false, @@ -818,7 +825,7 @@ platform: ingressVips: - 192.168.122.11 vcenters: - - server: test.vcenter.com + - server: vcenter.test user: testuser pullSecret: "{\"auths\":{\"example.com\":{\"auth\":\"c3VwZXItc2VjcmV0Cg==\"}}}" `, diff --git a/pkg/asset/installconfig/vsphere/mock/vsphere_sim.go b/pkg/asset/installconfig/vsphere/mock/vsphere_sim.go index 1028d1d9bd2..2d867376e9a 100644 --- a/pkg/asset/installconfig/vsphere/mock/vsphere_sim.go +++ b/pkg/asset/installconfig/vsphere/mock/vsphere_sim.go @@ -5,8 +5,8 @@ import ( "crypto/tls" "encoding/pem" "errors" - "io/fs" "os" + "path/filepath" "strconv" "github.com/vmware/govmomi/find" @@ -83,12 +83,12 @@ func (vss *VSphereSimulator) StartSimulator() (*simulator.Server, error) { // GetClient returns a vim25 client which connects to and trusts the simulator func GetClient(server *simulator.Server) (*vim25.Client, *session.Manager, error) { - tmpCAdir := "/tmp/vcsimca" + tmpCAdir := filepath.Join(os.TempDir(), "vcsimca") err := os.Mkdir(tmpCAdir, os.ModePerm) if err != nil { // If the error is not file existing return err - if !errors.Is(err, fs.ErrExist) { + if !os.IsExist(err) { return nil, nil, err } } @@ -101,11 +101,19 @@ func GetClient(server *simulator.Server) (*vim25.Client, *session.Manager, error if err != nil { return nil, nil, err } + defer os.Remove(tempFile.Name()) + defer tempFile.Close() + _, err = tempFile.Write(pem.EncodeToMemory(&pemBlock)) if err != nil { return nil, nil, err } + // Close the file so the CA bundle can be read + if err := tempFile.Close(); err != nil { + return nil, nil, err + } + soapClient := soap.NewClient(server.URL, false) err = soapClient.SetRootCAs(tempFile.Name()) if err != nil {