diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 000000000000..a59b21204b62 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,98 @@ +version: 2.1 + +orbs: + go: circleci/go@3.0.2 + +workflows: + build: + jobs: + - test + - build: + context: + - architect + filters: + tags: + only: /^v.*/ + +jobs: + test: + resource_class: large + executor: + name: go/default + tag: 1.24.6 + steps: + - checkout + - go/load-build-cache + - go/mod-download + - run: + command: make setup-envtest + - go/save-build-cache + - go/save-mod-cache + - run: + command: make test + + build: + resource_class: xlarge + machine: + image: ubuntu-2404:2024.11.1 + environment: + COMPONENTS: core kubeadm-bootstrap kubeadm-control-plane + PLATFORMS: amd64 arm64 + + REGISTRY_AZURE: gsoci.azurecr.io/giantswarm + REGISTRY_ALIYUN: giantswarm-registry.cn-shanghai.cr.aliyuncs.com/giantswarm + steps: + - checkout + + - run: + name: Generate CRDs + command: |- + # Generate CRDs. + make generate + + # Check diff. + if ! git diff --exit-code + then + echo "There are differences in generated files." + echo "Please run and commit 'make generate', otherwise we can run into CRDs mismatching the code." + exit 1 + fi + + - run: + name: Build images + command: |- + # Iterate registry. + for registry in "${REGISTRY_AZURE}" "${REGISTRY_ALIYUN}" + do + # Build images. + make docker-build-all ALL_DOCKER_BUILD="${COMPONENTS}" ALL_ARCH="${PLATFORMS}" REGISTRY="${registry}" TAG="${CIRCLE_TAG:-${CIRCLE_SHA1}}" + done + + - run: + name: Push to Azure + command: |- + # Log in to registry. + docker login --username "${ACR_GSOCI_USERNAME}" --password "${ACR_GSOCI_PASSWORD}" "${REGISTRY_AZURE%%/*}" + + # Push images. + make docker-push-all ALL_DOCKER_BUILD="${COMPONENTS}" ALL_ARCH="${PLATFORMS}" REGISTRY="${REGISTRY_AZURE}" TAG="${CIRCLE_TAG:-${CIRCLE_SHA1}}" + + - run: + name: Push to Aliyun + command: |- + # Try up to 10 times. + for i in {1..10} + do + # Log in to registry. + docker login --username "${ALIYUN_USERNAME}" --password "${ALIYUN_PASSWORD}" "${REGISTRY_ALIYUN%%/*}" && \ + \ + # Push images. + make docker-push-all ALL_DOCKER_BUILD="${COMPONENTS}" ALL_ARCH="${PLATFORMS}" REGISTRY="${REGISTRY_ALIYUN}" TAG="${CIRCLE_TAG:-${CIRCLE_SHA1}}" && exit 0 + + # Print attempt. + echo "Attempt #${i} failed." + sleep 30 + done + + # Exit erroneously. + exit 1 diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml deleted file mode 100644 index befec552a825..000000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.yaml +++ /dev/null @@ -1,48 +0,0 @@ -name: 🐛 Bug Report -description: Report a bug encountered while using Cluster API -body: - - type: textarea - id: problem - attributes: - label: What steps did you take and what happened? - description: | - Please provide as much info as possible. Not doing so may result in your bug not being addressed in a timely manner. - If this matter is security related, please follow the guidelines described in https://github.com/kubernetes-sigs/cluster-api/blob/main/SECURITY_CONTACTS - placeholder: "A clear and concise description on how to REPRODUCE the bug." - validations: - required: true - - - type: textarea - id: expected - attributes: - label: What did you expect to happen? - validations: - required: true - - - type: textarea - id: capiVersion - attributes: - label: Cluster API version - placeholder: "The version of the Cluster API used in the environment." - validations: - required: true - - - type: textarea - id: kubeVersion - attributes: - label: Kubernetes version - placeholder: "$kubectl version" - - - type: textarea - id: additional - attributes: - label: Anything else you would like to add? - placeholder: "Miscellaneous information that will assist in solving the issue." - - - type: textarea - id: templateLabel - attributes: - label: Label(s) to be applied - value: | - /kind bug - One or more /area label. See https://github.com/kubernetes-sigs/cluster-api/labels?q=area for the list of labels. diff --git a/.github/ISSUE_TEMPLATE/failing_test.yaml b/.github/ISSUE_TEMPLATE/failing_test.yaml deleted file mode 100644 index ab182697c2b1..000000000000 --- a/.github/ISSUE_TEMPLATE/failing_test.yaml +++ /dev/null @@ -1,49 +0,0 @@ -name: 🚨 Failing Test -description: Report continuously failing tests or jobs in Cluster API CI -body: - - type: textarea - id: jobs - attributes: - label: Which jobs are failing? - placeholder: | - Please only use this template for submitting reports about continuously failing tests or jobs in Cluster API CI. - validations: - required: true - - - type: textarea - id: tests - attributes: - label: Which tests are failing? - validations: - required: true - - - type: textarea - id: since - attributes: - label: Since when has it been failing? - validations: - required: true - - - type: input - id: testgrid - attributes: - label: Testgrid link - - - type: textarea - id: reason - attributes: - label: Reason for failure (if possible) - - - type: textarea - id: additional - attributes: - label: Anything else we need to know? - placeholder: "Miscellaneous information that will assist in fixing the failing test." - - - type: textarea - id: templateLabel - attributes: - label: Label(s) to be applied - value: | - /kind failing-test - One or more /area label. See https://github.com/kubernetes-sigs/cluster-api/labels?q=area for the list of labels. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/feature_request.yaml b/.github/ISSUE_TEMPLATE/feature_request.yaml deleted file mode 100644 index 880103411922..000000000000 --- a/.github/ISSUE_TEMPLATE/feature_request.yaml +++ /dev/null @@ -1,35 +0,0 @@ -name: ✨ Feature request -description: Suggest an idea for this project. -body: - - type: textarea - id: user_story - attributes: - label: What would you like to be added (User Story)? - description: | - A large proposal that works through the design along with the implications of the change can be opened as a CAEP. - See https://github.com/kubernetes-sigs/cluster-api/blob/main/CONTRIBUTING.md#proposal-process-caep - placeholder: "As a [developer/user/operator] I would like to [high level description] for [reasons]." - validations: - required: true - - - type: textarea - id: detailed_feature_description - attributes: - label: Detailed Description - placeholder: "A clear and concise description of what you want to happen." - validations: - required: true - - - type: textarea - id: additional - attributes: - label: Anything else you would like to add? - placeholder: "Miscellaneous information that will assist in solving the issue." - - - type: textarea - id: templateLabel - attributes: - label: Label(s) to be applied - value: | - /kind feature - One or more /area label. See https://github.com/kubernetes-sigs/cluster-api/labels?q=area for the list of labels. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/flaking_test.yaml b/.github/ISSUE_TEMPLATE/flaking_test.yaml deleted file mode 100644 index 407793002cd8..000000000000 --- a/.github/ISSUE_TEMPLATE/flaking_test.yaml +++ /dev/null @@ -1,50 +0,0 @@ -name: ❄️ Flaking Test -description: Report flaky tests or jobs in Cluster API CI -body: - - type: textarea - id: jobs - attributes: - label: Which jobs are flaking? - description: | - Please only use this template for submitting reports about flaky tests or jobs (pass or fail with no underlying change in code) in Cluster API CI. - Links to go.k8s.io/triage and/or links to specific failures in spyglass are appreciated. - validations: - required: true - - - type: textarea - id: tests - attributes: - label: Which tests are flaking? - validations: - required: true - - - type: textarea - id: since - attributes: - label: Since when has it been flaking? - validations: - required: true - - - type: input - id: testgrid - attributes: - label: Testgrid link - - - type: textarea - id: reason - attributes: - label: Reason for failure (if possible) - - - type: textarea - id: additional - attributes: - label: Anything else we need to know? - placeholder: "Miscellaneous information that will assist in fixing the flaking test." - - - type: textarea - id: templateLabel - attributes: - label: Label(s) to be applied - value: | - /kind flake - One or more /area label. See https://github.com/kubernetes-sigs/cluster-api/labels?q=area for the list of labels. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/kubernetes_bump.md b/.github/ISSUE_TEMPLATE/kubernetes_bump.md deleted file mode 100644 index 12ffe6062bff..000000000000 --- a/.github/ISSUE_TEMPLATE/kubernetes_bump.md +++ /dev/null @@ -1,103 +0,0 @@ ---- -name: 🚀 Kubernetes bump -about: "[Only for release team lead] Create an issue to track tasks to support a new Kubernetes minor release." -title: Tasks to bump to Kubernetes v1. -labels: '' -assignees: '' - ---- - -This issue is tracking the tasks that should be implemented **after** the Kubernetes minor release has been released. - -## Tasks - -**Note:** If feasible we usually cherry-pick the changes back to the latest release series. - -### Supporting managing and running on the new Kubernetes version - -This section contains tasks to update our book, e2e testing and CI to use and test the new Kubernetes version -as well as changes to Cluster API that we might have to make to support the new Kubernetes version. All of these -changes should be cherry-picked to all release series that will support the new Kubernetes version. - -* [ ] Continuously modify CAPD to use early versions of the upcoming Kubernetes release (betas and rcs): - * Bump the Kubernetes version in `test/*` except for `test/infrastructure/kind/*`. - * Prior art: https://github.com/kubernetes-sigs/cluster-api/pull/10922 -* [ ] Modify CAPD to use the new Kubernetes release after it is GA: - * Bump the Kubernetes version in `test/*` except for `test/infrastructure/kind/*`. - * Prior art: https://github.com/kubernetes-sigs/cluster-api/pull/11030 -* [ ] Ensure the jobs are adjusted to provide test coverage according to our [support policy](https://cluster-api.sigs.k8s.io/reference/versions.html#supported-kubernetes-versions): - - * At the `.versions` section in the `cluster-api-prowjob-gen.yaml` file in [test-infra](https://github.com/kubernetes/test-infra/blob/master/config/jobs/kubernetes-sigs/cluster-api/): - * Add a new entry for the new Kubernetes version - * Adjust the released Kubernetes's version entry to refer `stable-1.` instead of `ci/latest-1.` - * Check and update the versions for the keys `etcd` and `coreDNS` if necessary: - * For etcd, see the `DefaultEtcdVersion` kubeadm constant: [e.g. for v1.28.0](https://github.com/kubernetes/kubernetes/blob/v1.28.0/cmd/kubeadm/app/constants/constants.go#L308) - * For coredns, see the `CoreDNSVersion` kubeadm constant:[e.g. for v1.28.0](https://github.com/kubernetes/kubernetes/blob/v1.28.0/cmd/kubeadm/app/constants/constants.go#L344) - * For the `.branches.main` section in the `cluster-api-prowjob-gen.yaml` file in [test-infra](https://github.com/kubernetes/test-infra/blob/master/config/jobs/kubernetes-sigs/cluster-api/): - * For the `.upgrades` section: - * Drop the oldest upgrade - * Add a new upgrade entry from the previous to the new Kubernetes version - * Bump the version set at `.kubernetesVersionManagement` to the new minimum supported management cluster version (This is the image version available as kind image). - * Bump the version set at `.kubebuilderEnvtestKubernetesVersion` to the new minimum supported management cluster version. - * Run `make generate-test-infra-prowjobs` to generate the resulting prowjob configuration: - - ```sh - TEST_INFRA_DIR=../../k8s.io/test-infra make generate-test-infra-prowjobs - ``` - - * Prior art: https://github.com/kubernetes/test-infra/pull/33294 - -* [ ] Update book: - * Update supported versions in `versions.md` - * Prior art: https://github.com/kubernetes-sigs/cluster-api/pull/11030 - -* [ ] Issues specific to the Kubernetes minor release: - * Sometimes there are adjustments that we have to make in Cluster API to be able to support - a new Kubernetes minor version. Please add these issues here when they are identified. - -### Bump quickstart and kind image references in CAPD - -Prerequisites: - -* The target Kubernetes version is GA -* There is a new [kind version with/or a new set of kind images](https://github.com/kubernetes-sigs/kind/releases) for the target Kubernetes version - -* [ ] Bump quickstart and kind image references in CAPD: - * Bump the Kubernetes version in: - * `docs/*` - * `Tiltfile` - * Bump kind image references in CAPD (and also kind if necessary, including the latest images for this kind release) - * Add new images in the [kind mapper.go](https://github.com/kubernetes-sigs/cluster-api/blob/0f47a19e038ee6b0d3b1e7675a62cdaf84face8c/test/infrastructure/kind/mapper.go#L79). - * See the [kind releases page](https://github.com/kubernetes-sigs/kind/releases) for the list of released images. - * Set new default image for the [test framework](https://github.com/kubernetes-sigs/cluster-api/blob/0f47a19e038ee6b0d3b1e7675a62cdaf84face8c/test/framework/bootstrap/kind_provider.go#L40) - * If code changes are required for CAPD to incorporate the new Kind version, update [kind latestMode](https://github.com/kubernetes-sigs/cluster-api/blob/0f47a19e038ee6b0d3b1e7675a62cdaf84face8c/test/infrastructure/kind/mapper.go#L66) - * Verify the quickstart manually - * Prior art: https://github.com/kubernetes-sigs/cluster-api/pull/10610 -* [ ] Cherry-pick above PR to the latest release branch. - -### Using new Kubernetes dependencies - -This section contains tasks to update Cluster API to use the latest Kubernetes Go dependencies and related topics -like using the right Go version and build images. These changes are only made on the main branch. We don't -need them in older releases as they are not necessary to manage workload clusters of the new Kubernetes version or -run the Cluster API controllers on the new Kubernetes version. - -* [ ] Ensure there is a new controller-runtime minor release which uses the new Kubernetes Go dependencies. -* [ ] Update our ProwJobs for the `main` branch to use the `kubekins-e2e` with the correct Kubernetes version via [cluster-api-prowjob-gen.yaml](https://github.com/kubernetes/test-infra/blob/master/config/jobs/kubernetes-sigs/cluster-api/cluster-api-prowjob-gen.yaml) and by running `make generate-test-infra-prowjobs`. - * It is recommended to have one PR for presubmit and one for periodic jobs to reduce the risk of breaking the periodic jobs. - * Prior art: https://github.com/kubernetes/test-infra/pull/32380 -* [ ] Bump the Go version in Cluster API: (if Kubernetes is using a new Go minor version) - * Search for the currently used Go version across the repository and update it - * We have to at least modify it in: `hack/ensure-go.sh`, `.golangci.yml`, `cloudbuild*.yaml`, `go.mod`, `Makefile`, `netlify.toml`, `Tiltfile` - * Prior art: https://github.com/kubernetes-sigs/cluster-api/pull/10452 -* [ ] Bumps in Cluster API repo: - * controller-runtime & controller-tools in go.mod files - * setup-envtest via `SETUP_ENVTEST_VER` in `Makefile` - * controller-gen via `CONTROLLER_GEN_VER` in `Makefile` - * conversion-gen via `CONVERSION_GEN_VER` in `Makefile` - * Prior art: https://github.com/kubernetes-sigs/cluster-api/pull/10803 -* [ ] Bump the Kubernetes version used in integration tests via `KUBEBUILDER_ENVTEST_KUBERNETES_VERSION` in `Makefile` - * **Note**: This PR should be cherry-picked as well. It is part of this section as it depends on kubebuilder/controller-runtime releases and is not strictly necessary for [Supporting managing and running on the new Kubernetes version](#supporting-managing-and-running-on-the-new-kubernetes-version). - * Prior art to release envtest binaries: https://github.com/kubernetes-sigs/controller-tools/pull/1032 - * Prior art: #7193 - diff --git a/.github/ISSUE_TEMPLATE/release_tracking.md b/.github/ISSUE_TEMPLATE/release_tracking.md deleted file mode 100644 index f2275195a934..000000000000 --- a/.github/ISSUE_TEMPLATE/release_tracking.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -name: 🚋 Release cycle tracking -about: Create a new release cycle tracking issue for a Cluster API minor release -about: "[Only for release team lead] Create an issue to track tasks for a Cluster API minor release." -title: Tasks for v release cycle -labels: '' -assignees: '' - ---- - -Please see the corresponding sections of the [role-handbooks](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks) for documentation of individual tasks. - -## Tasks - -**Notes**: -* Weeks are only specified to give some orientation. -* The following is based on the v1.6 release cycle. Modify according to the tracked release cycle. - -Week 1: -* [ ] [Release Lead] [Finalize release schedule and team](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#finalize-release-schedule-and-team) -* [ ] [Release Lead] [Add/remove release team members](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#addremove-release-team-members) -* [ ] [Release Lead] [Prepare main branch for development of the new release](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#prepare-main-branch-for-development-of-the-new-release) -* [ ] [Communications Manager] [Add docs to collect release notes for users and migration notes for provider implementers](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/communications#add-docs-to-collect-release-notes-for-users-and-migration-notes-for-provider-implementers) -* [ ] [Communications Manager] [Update supported versions](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/communications#update-supported-versions) - -Week 1 to 4: -* [ ] [Release Lead] [Track] [Remove previously deprecated code](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#track-remove-previously-deprecated-code) - -Week 6: -* [ ] [Release Lead] [Cut the v1.5.1 & v1.4.6 releases](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#repeatedly-cut-a-release) - -Week 9: -* [ ] [Release Lead] [Cut the v1.5.2 & v1.4.7 releases](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#repeatedly-cut-a-release) - -Week 11 to 12: -* [ ] [Release Lead] [Track] [Bump dependencies](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#track-bump-dependencies) - -Week 13: -* [ ] [Release Lead] [Cut the v1.6.0-beta.0 release](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#repeatedly-cut-a-release) -* [ ] [Release Lead] [Cut the v1.5.3 & v1.4.8 releases](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#repeatedly-cut-a-release) -* [ ] [Release Lead] [Create a new GitHub milestone for the next release](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#create-a-new-github-milestone-for-the-next-release) -* [ ] [Communications Manager] [Communicate beta to providers](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/communications#communicate-beta-to-providerss) - -Week 14: -* [ ] [Release Lead] [Cut the v1.6.0-beta.1 release](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#repeatedly-cut-a-release) -* [ ] [Release Lead] [Set a tentative release date for the next minor release](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#set-a-tentative-release-date-for-the-next-minor-release) -* [ ] [Release Lead] [Assemble next release team](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#set-a-tentative-release-date-for-the-next-minor-release) -* [ ] [Release Lead] Select release lead for the next release cycle - -Week 15: - -* KubeCon idle week - -Week 16: -* [ ] [Release Lead] [Cut the v1.6.0-rc.0 release](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#repeatedly-cut-a-release) -* [ ] [Release Lead] [Update milestone applier and GitHub Actions](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#update-milestone-applier-and-github-actions) -* [ ] [CI Manager] [Setup jobs and dashboards for the release-1.6 release branch](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/ci-signal#setup-jobs-and-dashboards-for-a-new-release-branch) -* [ ] [Communications Manager] [Ensure the book for the new release is available](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/communications#ensure-the-book-for-the-new-release-is-available) - -Week 17: -* [ ] [Release Lead] [Cut the v1.6.0-rc.1 release](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#repeatedly-cut-a-release) - -Week 18: -* [ ] [Release Lead] [Cut the v1.6.0 release](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#repeatedly-cut-a-release) -* [ ] [Release Lead] [Cut the v1.5.4 & v1.4.9 releases](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#repeatedly-cut-a-release) -* [ ] [Release Lead] Organize release retrospective -* [ ] [Communications Manager] [Change production branch in Netlify to the new release branch](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/communications#change-production-branch-in-netlify-to-the-new-release-branch) -* [ ] [Communications Manager] [Update clusterctl links in the quickstart](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/communications#update-clusterctl-links-in-the-quickstart) - -Continuously: -* [Release lead] [Maintain the GitHub release milestone](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#continuously-maintain-the-github-release-milestone) -* [Release lead] [Bump the Go version](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#continuously-bump-the-go-version) -* [Communications Manager] [Communicate key dates to the community](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/communications#continuously-communicate-key-dates-to-the-community) -* [Communications Manager] Improve release process documentation -* [Communications Manager] Maintain and improve user facing documentation about releases, release policy and release calendar -* [CI Manager] [Monitor CI signal](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/ci-signal#continuously-monitor-ci-signal) -* [CI Manager] [Reduce the amount of flaky tests](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/ci-signal#continuously-reduce-the-amount-of-flaky-tests) - -If and when necessary: -* [ ] [Release Lead] [Track] [Bump the Cluster API apiVersion](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#optional-track-bump-the-cluster-api-apiversion) -* [ ] [Release Lead] [Track] [Bump the Kubernetes version](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#optional-track-bump-the-kubernetes-version) -* [ ] [Release Lead] [Track Release and Improvement tasks](https://github.com/kubernetes-sigs/cluster-api/tree/main/docs/release/role-handbooks/release-lead#optional-track-release-and-improvement-tasks) - -/priority critical-urgent -/kind feature diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md deleted file mode 100644 index 781d65bdc24b..000000000000 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ /dev/null @@ -1,18 +0,0 @@ - - -**What this PR does / why we need it**: - -**Which issue(s) this PR fixes** *(optional, in `fixes #(, fixes #, ...)` format, will close the issue(s) when PR gets merged)*: -Fixes # - - \ No newline at end of file diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml deleted file mode 100644 index 42a8a1a9a898..000000000000 --- a/.github/dependabot.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Please see the documentation for all configuration options: -# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates -version: 2 -updates: -# GitHub Actions -- package-ecosystem: "github-actions" - directory: "/" - schedule: - interval: "weekly" - groups: - all-github-actions: - patterns: [ "*" ] - commit-message: - prefix: ":seedling:" - labels: - - "area/ci" - - "ok-to-test" - -# Go modules -- package-ecosystem: "gomod" - directories: - - "/" - - "/test" - - "/hack/tools" - schedule: - interval: "weekly" - day: "monday" - ## group all dependencies with a k8s.io prefix into a single PR. - groups: - all-go-mod-patch-and-minor: - patterns: [ "*" ] - update-types: [ "patch", "minor" ] - ignore: - # Ignore controller-runtime as its upgraded manually. - - dependency-name: "sigs.k8s.io/controller-runtime" - update-types: [ "version-update:semver-major", "version-update:semver-minor" ] - # Ignore k8s and its transitives modules as they are upgraded manually together with controller-runtime. - - dependency-name: "k8s.io/*" - update-types: [ "version-update:semver-major", "version-update:semver-minor" ] - - dependency-name: "github.com/prometheus/*" - update-types: [ "version-update:semver-major", "version-update:semver-minor"] - - dependency-name: "go.etcd.io/*" - update-types: [ "version-update:semver-major", "version-update:semver-minor" ] - - dependency-name: "google.golang.org/grpc" - update-types: [ "version-update:semver-major", "version-update:semver-minor" ] - # Note: We have to keep this 100% in sync with k8s.io, so we get exactly the behavior - # that the k8s.io CEL code expects. - - dependency-name: "github.com/google/cel-go" - # Ignore kind as its upgraded manually. - - dependency-name: "sigs.k8s.io/kind" - update-types: [ "version-update:semver-major", "version-update:semver-minor" ] - # Bumping the kustomize API independently can break compatibility with client-go as they share k8s.io/kube-openapi as a dependency. - - dependency-name: "sigs.k8s.io/kustomize/api" - update-types: [ "version-update:semver-major", "version-update:semver-minor" ] - commit-message: - prefix: ":seedling:" - labels: - - "area/dependency" - - "ok-to-test" diff --git a/.github/workflows/pr-dependabot.yaml b/.github/workflows/pr-dependabot.yaml deleted file mode 100644 index 18e2ff424372..000000000000 --- a/.github/workflows/pr-dependabot.yaml +++ /dev/null @@ -1,49 +0,0 @@ -name: PR dependabot code generation and go modules fix - -# This action runs on other PRs opened by dependabot. It updates modules and generated code on PRs opened by dependabot. -on: - pull_request: - branches: - - dependabot/** - push: - branches: - - dependabot/** - workflow_dispatch: - -permissions: - contents: write # Allow to update the PR. - -jobs: - build: - name: Build - runs-on: ubuntu-latest - steps: - - name: Check out code into the Go module directory - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2 - - name: Calculate go version - id: vars - run: echo "go_version=$(make go-version)" >> $GITHUB_OUTPUT - - name: Set up Go - uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # tag=v5.3.0 - with: - go-version: ${{ steps.vars.outputs.go_version }} - - uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # tag=v4.2.2 - name: Restore go cache - with: - path: | - ~/.cache/go-build - ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-go- - - name: Update all modules - run: make generate-modules - - name: Update generated code - run: make generate - - uses: EndBug/add-and-commit@a94899bca583c204427a224a7af87c02f9b325d5 # tag=v9.1.4 - name: Commit changes - with: - author_name: dependabot[bot] - author_email: 49699333+dependabot[bot]@users.noreply.github.com - default_author: github_actor - message: 'Update generated code' diff --git a/.github/workflows/pr-gh-workflow-approve.yaml b/.github/workflows/pr-gh-workflow-approve.yaml deleted file mode 100644 index f493fd40032d..000000000000 --- a/.github/workflows/pr-gh-workflow-approve.yaml +++ /dev/null @@ -1,42 +0,0 @@ -name: PR approve GH Workflows - -on: - pull_request_target: - types: - - edited - - labeled - - reopened - - synchronize - -permissions: {} - -jobs: - approve: - name: Approve ok-to-test - if: contains(github.event.pull_request.labels.*.name, 'ok-to-test') - runs-on: ubuntu-latest - permissions: - actions: write - steps: - - name: Update PR - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 - continue-on-error: true - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - const result = await github.rest.actions.listWorkflowRunsForRepo({ - owner: context.repo.owner, - repo: context.repo.repo, - event: "pull_request", - status: "action_required", - head_sha: context.payload.pull_request.head.sha, - per_page: 100 - }); - - for (var run of result.data.workflow_runs) { - await github.rest.actions.approveWorkflowRun({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: run.id - }); - } diff --git a/.github/workflows/pr-golangci-lint.yaml b/.github/workflows/pr-golangci-lint.yaml deleted file mode 100644 index 6b0205c1b534..000000000000 --- a/.github/workflows/pr-golangci-lint.yaml +++ /dev/null @@ -1,37 +0,0 @@ -name: PR golangci-lint - -on: - pull_request: - types: [opened, edited, synchronize, reopened] - -# Remove all permissions from GITHUB_TOKEN except metadata. -permissions: {} - -jobs: - golangci: - name: lint - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - working-directory: - - "" - - test - - hack/tools - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2 - - name: Calculate go version - id: vars - run: echo "go_version=$(make go-version)" >> $GITHUB_OUTPUT - - name: Set up Go - uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # tag=v5.3.0 - with: - go-version: ${{ steps.vars.outputs.go_version }} - - name: golangci-lint - uses: golangci/golangci-lint-action@4696ba8babb6127d732c3c6dde519db15edab9ea # tag=v6.5.1 - with: - version: v1.63.4 - args: --out-format=colored-line-number - working-directory: ${{matrix.working-directory}} - - name: Lint API - run: GOLANGCI_LINT_EXTRA_ARGS=--out-format=colored-line-number make lint-api diff --git a/.github/workflows/pr-md-link-check.yaml b/.github/workflows/pr-md-link-check.yaml deleted file mode 100644 index 36c64b54c11a..000000000000 --- a/.github/workflows/pr-md-link-check.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: PR check Markdown links - -on: - pull_request: - types: [opened, edited, synchronize, reopened] - paths: - - '**.md' - -# Remove all permissions from GITHUB_TOKEN except metadata. -permissions: {} - -jobs: - markdown-link-check: - name: Broken Links - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2 - - uses: gaurav-nelson/github-action-markdown-link-check@3c3b66f1f7d0900e37b71eca45b63ea9eedfce31 # tag=1.0.17 - with: - use-quiet-mode: 'yes' - config-file: .markdownlinkcheck.json - check-modified-files-only: 'yes' - base-branch: ${{ github.base_ref }} diff --git a/.github/workflows/pr-verify.yaml b/.github/workflows/pr-verify.yaml deleted file mode 100644 index 2f834c95839b..000000000000 --- a/.github/workflows/pr-verify.yaml +++ /dev/null @@ -1,19 +0,0 @@ -name: PR title verifier - -on: - pull_request_target: - types: [opened, edited, synchronize, reopened] - -jobs: - verify: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2 - - - name: Check if PR title is valid - env: - PR_TITLE: ${{ github.event.pull_request.title }} - run: | - ./hack/verify-pr-title.sh "${PR_TITLE}" - diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 47c6bb805f96..d57c23cd96e9 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,114 +1,34 @@ -name: Create Release +name: Release on: push: - branches: - - main - paths: - - 'CHANGELOG/*.md' + tags: + - 'v*' permissions: - contents: write # Allow to push a tag, create a release branch and publish a draft release. + contents: write jobs: - push_release_tags: + draft_release: + name: Draft release runs-on: ubuntu-latest - outputs: - release_tag: ${{ steps.release-version.outputs.release_version }} steps: - - name: Checkout code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2 - with: - fetch-depth: 0 - - name: Get changed files - id: changed-files - uses: tj-actions/changed-files@2f7c5bfce28377bc069a65ba478de0a74aa0ca32 # tag=v46.0.1 - - name: Get release version - id: release-version - run: | - if [[ ${{ steps.changed-files.outputs.all_changed_files_count }} != 1 ]]; then - echo "1 release notes file should be changed to create a release tag, found ${{ steps.changed-files.outputs.all_changed_files_count }}" - exit 1 - fi - for changed_file in ${{ steps.changed-files.outputs.all_changed_files }}; do - export RELEASE_VERSION=$(echo "${changed_file}" | grep -oP '(?<=/)[^/]+(?=\.md)') - echo "RELEASE_VERSION=$RELEASE_VERSION" >> $GITHUB_ENV - echo "RELEASE_VERSION=$RELEASE_VERSION" >> $GITHUB_OUTPUT - if [[ "$RELEASE_VERSION" =~ ^v[0-9]+\.[0-9]+\.[0-9]+(-[0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?(\+[0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?$ ]]; then - echo "Valid semver: $RELEASE_VERSION" - else - echo "Invalid semver: $RELEASE_VERSION" - exit 1 - fi - done - - name: Determine the release branch to use - run: | - # Use the release branch for all v1.10 releases - if [[ ! $RELEASE_VERSION =~ ^v1\.10 ]] && [[ $RELEASE_VERSION =~ beta ]] || [[ $RELEASE_VERSION =~ alpha ]]; then - export RELEASE_BRANCH=main - echo "RELEASE_BRANCH=$RELEASE_BRANCH" >> $GITHUB_ENV - echo "This is a beta or alpha release, will use release branch $RELEASE_BRANCH" - else - export RELEASE_BRANCH=release-$(echo $RELEASE_VERSION | sed -E 's/^v([0-9]+)\.([0-9]+)\..*$/\1.\2/') - echo "RELEASE_BRANCH=$RELEASE_BRANCH" >> $GITHUB_ENV - echo "This is not a beta or alpha release, will use release branch $RELEASE_BRANCH" - fi - - name: Create or checkout release branch - run: | - if git show-ref --verify --quiet "refs/remotes/origin/$RELEASE_BRANCH"; then - echo "Branch $RELEASE_BRANCH already exists" - git checkout "$RELEASE_BRANCH" - else - git checkout -b "$RELEASE_BRANCH" - git push origin "$RELEASE_BRANCH" - echo "Created branch $RELEASE_BRANCH" - fi - - name: Validate tag does not already exist - run: | - if [[ $(git tag -l $RELEASE_VERSION) ]]; then - echo "Tag $RELEASE_VERSION already exists, exiting" - exit 1 - fi - - name: Create Release Tag - run: | - git config user.name "${GITHUB_ACTOR}" - git config user.email "${GITHUB_ACTOR}@users.noreply.github.com" - git tag -a ${RELEASE_VERSION} -m ${RELEASE_VERSION} - git tag test/${RELEASE_VERSION} - git push origin ${RELEASE_VERSION} - git push origin test/${RELEASE_VERSION} - echo "Created tags $RELEASE_VERSION and test/${RELEASE_VERSION}" - release: - name: create draft release - runs-on: ubuntu-latest - needs: push_release_tags - steps: - - name: Set env - run: echo "RELEASE_TAG=${RELEASE_TAG}" >> $GITHUB_ENV - env: - RELEASE_TAG: ${{needs.push_release_tags.outputs.release_tag}} - - name: checkout code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2 - with: - fetch-depth: 0 - ref: ${{ env.RELEASE_TAG }} - - name: Calculate go version - run: echo "go_version=$(make go-version)" >> $GITHUB_ENV - - name: Set up Go - uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # tag=v5.3.0 - with: - go-version: ${{ env.go_version }} - - name: generate release artifacts - run: | - make release - - name: get release notes - run: | - curl -L "https://raw.githubusercontent.com/${{ github.repository }}/main/CHANGELOG/${{ env.RELEASE_TAG }}.md" \ - -o "${{ env.RELEASE_TAG }}.md" - - name: Release - uses: softprops/action-gh-release@c95fe1489396fe8a9eb87c0abf8aa5b2ef267fda # tag=v2.2.1 - with: - draft: true - files: out/* - body_path: ${{ env.RELEASE_TAG }}.md - tag_name: ${{ env.RELEASE_TAG }} + - name: Setup environment + run: echo "RELEASE_TAG=${GITHUB_REF:10}" >> $GITHUB_ENV + - name: Checkout code + uses: actions/checkout@v4.2.2 + with: + fetch-depth: 0 + - name: Calculate Go version + run: echo "go_version=$(make go-version)" >> $GITHUB_ENV + - name: Setup Go + uses: actions/setup-go@v5.3.0 + with: + go-version: ${{ env.go_version }} + - name: Generate artifacts + run: make release + - name: Draft release + uses: softprops/action-gh-release@v2.2.1 + with: + draft: true + files: out/* diff --git a/.github/workflows/weekly-md-link-check.yaml b/.github/workflows/weekly-md-link-check.yaml deleted file mode 100644 index 0e8b000ff41b..000000000000 --- a/.github/workflows/weekly-md-link-check.yaml +++ /dev/null @@ -1,26 +0,0 @@ -name: Weekly check all Markdown links - -on: - schedule: - # Cron for every Monday at 12:00 UTC. - - cron: "0 12 * * 1" - -# Remove all permissions from GITHUB_TOKEN except metadata. -permissions: {} - -jobs: - markdown-link-check: - name: Broken Links - strategy: - fail-fast: false - matrix: - branch: [ main, release-1.9, release-1.8 ] - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2 - with: - ref: ${{ matrix.branch }} - - uses: gaurav-nelson/github-action-markdown-link-check@3c3b66f1f7d0900e37b71eca45b63ea9eedfce31 # tag=1.0.17 - with: - use-quiet-mode: 'yes' - config-file: .markdownlinkcheck.json diff --git a/.github/workflows/weekly-security-scan.yaml b/.github/workflows/weekly-security-scan.yaml deleted file mode 100644 index c680ca7f4f3f..000000000000 --- a/.github/workflows/weekly-security-scan.yaml +++ /dev/null @@ -1,32 +0,0 @@ -name: Weekly security scan - -on: - schedule: - # Cron for every Monday at 12:00 UTC. - - cron: "0 12 * * 1" - -# Remove all permissions from GITHUB_TOKEN except metadata. -permissions: {} - -jobs: - scan: - strategy: - fail-fast: false - matrix: - branch: [ main, release-1.9, release-1.8 ] - name: Trivy - runs-on: ubuntu-latest - steps: - - name: Check out code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2 - with: - ref: ${{ matrix.branch }} - - name: Calculate go version - id: vars - run: echo "go_version=$(make go-version)" >> $GITHUB_OUTPUT - - name: Set up Go - uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # tag=v5.3.0 - with: - go-version: ${{ steps.vars.outputs.go_version }} - - name: Run verify security target - run: make verify-security diff --git a/.github/workflows/weekly-test-release.yaml b/.github/workflows/weekly-test-release.yaml deleted file mode 100644 index 44a1afe0f5b8..000000000000 --- a/.github/workflows/weekly-test-release.yaml +++ /dev/null @@ -1,40 +0,0 @@ -name: Weekly release test - -# Note: This workflow does not build for releases. It attempts to build release binaries periodically to ensure the repo -# release machinery is in a good state. - -on: - schedule: - # Cron for every day at 12:00 UTC. - - cron: "0 12 * * *" - -# Remove all permissions from GITHUB_TOKEN except metadata. -permissions: {} - -jobs: - weekly-test-release: - name: Test release - strategy: - fail-fast: false - matrix: - branch: [ main, release-1.9, release-1.8 ] - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2 - with: - ref: ${{ matrix.branch }} - fetch-depth: 0 - - name: Set env - run: echo "RELEASE_TAG=v9.9.9-fake" >> $GITHUB_ENV - - name: Set fake tag for release - run: | - git tag ${{ env.RELEASE_TAG }} - - name: Calculate go version - run: echo "go_version=$(make go-version)" >> $GITHUB_ENV - - name: Set up Go - uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # tag=v5.3.0 - with: - go-version: ${{ env.go_version }} - - name: Test release - run: | - make release \ No newline at end of file diff --git a/Makefile b/Makefile index 7e7547e3b69a..1417fd3c7891 100644 --- a/Makefile +++ b/Makefile @@ -533,7 +533,7 @@ generate-go-conversions-test-extension: $(CONVERSION_GEN) ## Generate conversion # The tmp/sigs.k8s.io/cluster-api symlink is a workaround to make this target run outside of GOPATH .PHONY: generate-go-openapi generate-go-openapi: $(OPENAPI_GEN) ## Generate openapi go code for runtime SDK - @mkdir -p ./tmp/sigs.k8s.io; ln -s $(ROOT_DIR) ./tmp/sigs.k8s.io/; cd ./tmp; \ + @mkdir -p ./tmp/sigs.k8s.io; rm -f ./tmp/sigs.k8s.io/cluster-api; ln -s $(ROOT_DIR) ./tmp/sigs.k8s.io/cluster-api; cd ./tmp; \ for pkg in "api/v1beta1" "$(EXP_DIR)/runtime/hooks/api/v1alpha1"; do \ (cd ../ && $(MAKE) clean-generated-openapi-definitions SRC_DIRS="./$${pkg}"); \ echo "** Generating openapi schema for types in ./$${pkg} **"; \ diff --git a/README.md b/README.md index 2f87ecc9b480..b17c3d579398 100644 --- a/README.md +++ b/README.md @@ -1,65 +1,107 @@ -capi -

- - - - - -GitHub release (latest SemVer) -

- # Cluster API -### 👋 Welcome to our project! Our [Book](https://cluster-api.sigs.k8s.io) can help you get started and provides lots of in-depth information. +This is Giant Swarm's fork. See the upstream [cluster-api README](https://github.com/kubernetes-sigs/cluster-api/blob/main/README.md) for official documentation. + +## How to work with this repo + +Currently, we try to follow the upstream `release-X.Y` branch to always get the latest stable release and fixes, but not untested commits from `main`. Our only differences against upstream should be in this `README.md` and `.circleci/`. Other changes should be opened as PR for the upstream project first. + +We release cluster-api versions with [cluster-api-app](https://github.com/giantswarm/cluster-api-app/). To provide the YAML manifests, we use GitHub releases as the upstream project. The scripts in `cluster-api-app` convert them into the final manifests. + +### Repo setup + +Since we follow upstream, add their Git repo as remote from which we merge commits: + +```sh +git clone git@github.com:giantswarm/cluster-api.git +cd cluster-api +git remote add upstream https://github.com/kubernetes-sigs/cluster-api.git +``` + +### Test and release + +If you have a non-urgent fix, create an upstream PR and wait until it gets released. We call this release `vX.Y.Z` in the below instructions, so please fill in the desired tag. + +Please follow the development workflow: + +- Ensure a stable release branch exists in our fork repo. For example with a desired upstream release v1.4.5, the branch is `release-1.4`. If it does not exist on our side yet, copy the branch from upstream and add our changes such as `README.md` and `.circleci/` on top. +- Create a working branch for your changes +- We want to use stable upstream release tags unless a hotfix is absolutely required ([decision](https://intranet.giantswarm.io/docs/product/pdr/010_fork_management/)). Please decide what type of change you're making: + + - Either: you want to merge and test the latest upstream tag + + ```sh + git fetch upstream + + git checkout -b my-working-branch release-X.Y + + # Create a merge commit using upstream's desired release tag (the one we want + # to upgrade to). + # This creates a commit message such as "Merge tag 'v1.4.5' into release-1.4". + git merge --no-ff vX.Y.Z + + # Since we want the combined content of our repo and the upstream Git tag, + # we need to create our own tag on the merge commit + git tag "vX.Y.Z-gs-$(git rev-parse --short HEAD)" + + # Push your working branch. This triggers image build in CircleCI + git push -#### Useful links -- [Feature proposals](./docs/proposals) -- [Quick Start](https://cluster-api.sigs.k8s.io/user/quick-start.html) + # Push your Giant Swarm tag (assuming `origin` is the Giant Swarm fork). + # This triggers the GitHub release action - please continue reading below! + git push origin "vX.Y.Z-gs-$(git rev-parse --short HEAD)" + ``` -## ✨ What is the Cluster API? + - Or: you want to implement something else, such as working on some issue that we have which is not fixed in upstream yet. Note that for testing changes to upstream, you probably better base your work on the `upstream/main` branch and try your change together with the latest commits from upstream. This also avoids merge conflicts. Maintainers can then help you cherry-pick into their release branches. The latest release branch is usually a bit behind `main`. -Cluster API is a Kubernetes subproject focused on providing declarative APIs and tooling to simplify provisioning, upgrading, and operating multiple Kubernetes clusters. + ```sh + git checkout -b my-working-branch release-X.Y # or based on `main` instead of `release-X.Y`, see hint above -Started by the Kubernetes Special Interest Group (SIG) Cluster Lifecycle, the Cluster API project uses Kubernetes-style APIs and patterns to automate cluster lifecycle management for platform operators. The supporting infrastructure, like virtual machines, networks, load balancers, and VPCs, as well as the Kubernetes cluster configuration are all defined in the same way that application developers operate deploying and managing their workloads. This enables consistent and repeatable cluster deployments across a wide variety of infrastructure environments. + # Make some changes and commit as usual + git commit -### ⚙️ Providers + git tag "vX.Y.Z-gs-$(git rev-parse --short HEAD)" -Cluster API can be extended to support any infrastructure (AWS, Azure, vSphere, etc.), bootstrap or control plane (kubeadm is built-in) provider. There is a growing list of [supported providers](https://cluster-api.sigs.k8s.io/reference/providers.html) available. + # Push your working branch. This triggers image build in CircleCI + git push - + # Push your Giant Swarm tag (assuming `origin` is the Giant Swarm fork). + # This triggers the GitHub release action - please continue reading below! + git push origin "vX.Y.Z-gs-$(git rev-parse --short HEAD)" + ``` -## 🤗 Community, discussion, contribution, and support +- Check that the [CircleCI pipeline](https://app.circleci.com/pipelines/github/giantswarm/cluster-api) succeeds for the desired Git tag in order to produce images. If the tag build fails, fix it. +- Check that the [GitHub release action](https://github.com/giantswarm/cluster-api/actions) for the `vX.Y.Z-gs-...` tag succeeds +- Edit [that draft GitHub release](https://github.com/giantswarm/cluster-api/releases) and turn it from draft to released. This makes the release's manifest files available on the internet, as used in [cluster-api-app](https://github.com/giantswarm/cluster-api-app). +- Test the changes in the app -Cluster API is developed in the open, and is constantly being improved by our users, contributors, and maintainers. It is because of you that we are able to automate cluster lifecycle management for the community. Join us! + - Replace `.image.tag` in [cluster-api-app's `values.yaml`](https://github.com/giantswarm/cluster-api-app/blob/master/helm/cluster-api/values.yaml) with the new tag `vX.Y.Z-gs-...`. + - Run `cd cluster-api-app && make generate` to update manifests + - Commit and push your working branch for `cluster-api-app` to trigger CircleCI pipeline + - Install and test the app thoroughly on a management cluster. Continue with the next step only once you're confident. +- Open PR for `cluster-api` fork (your working branch) -If you have questions or want to get the latest project news, you can connect with us in the following ways: + - If you merged an upstream release tag, we should target our `release-X.Y` branch with the PR. + - On the other hand, if you implemented something else which is not in upstream yet, we should target `upstream/main` so that it first lands in the upstream project, officially approved, tested and released. Afterwards, you would repeat this whole procedure and merge the release that includes your fix. For a quick in-house hotfix, you can alternatively do a quicker PR targeted against our `release-X.Y` branch. +- Also open PR for `cluster-api-app` change +- Once merged, manually bump the version in the respective collection to deploy it for one provider (e.g. [capa-app-collection](https://github.com/giantswarm/capa-app-collection/)) -- Chat with us on the Kubernetes [Slack](http://slack.k8s.io/) in the [#cluster-api][#cluster-api slack] channel -- Subscribe to the [SIG Cluster Lifecycle](https://groups.google.com/a/kubernetes.io/g/sig-cluster-lifecycle) Google Group for access to documents and calendars -- Join our Cluster API working group sessions where we share the latest project news, demos, answer questions, and triage issues - - Weekly on Wednesdays @ 10:00 PT on [Zoom][zoomMeeting] - - Previous meetings: \[ [notes][notes] | [recordings][recordings] \] +### Keep fork customizations up to date -Pull Requests and feedback on issues are very welcome! -See the [issue tracker] if you're unsure where to start, especially the [Good first issue] and [Help wanted] tags, and -also feel free to reach out to discuss. +Only `README.md`, `.github/` and `.circleci/` should differ between upstream and our fork, so the diff of everything else should be empty, or at worst, contain hotfixes that are not in upstream yet: -See also our [contributor guide](CONTRIBUTING.md) and the Kubernetes [community page] for more details on how to get involved. +```sh +git fetch upstream +git diff `# the upstream tag we merged recently` vX.Y.Z..origin/release-X.Y `# our release branch` -- ':!.circleci/' ':!README.md' +``` -### Code of conduct +And we should also keep our `main` and `release-X.Y` branches in sync, so this diff should be empty: -Participation in the Kubernetes community is governed by the [Kubernetes Code of Conduct](code-of-conduct.md). +```sh +git diff main..release-X.Y -- .circleci/ README.md .github/ +``` -[community page]: https://kubernetes.io/community -[notes]: https://cluster-api.sigs.k8s.io/agenda -[recordings]: https://www.youtube.com/playlist?list=PL69nYSiGNLP29D0nYgAGWt1ZFqS9Z7lw4 -[zoomMeeting]: https://zoom.us/j/861487554 -[implementerNotes]: https://docs.google.com/document/d/1IZ2-AZhe4r3CYiJuttyciS7bGZTTx4iMppcA8_Pr3xE/edit -[providerZoomMeetingTues]: https://zoom.us/j/140808484 -[providerZoomMeetingWed]: https://zoom.us/j/424743530 -[issue tracker]: https://github.com/kubernetes-sigs/cluster-api/issues -[#cluster-api slack]: https://kubernetes.slack.com/archives/C8TSNPY4T -[Good first issue]: https://github.com/kubernetes-sigs/cluster-api/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22 -[Help wanted]: https://github.com/kubernetes-sigs/cluster-api/issues?utf8=%E2%9C%93&q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22+ +If this shows any output, please align the `main` branch with the release branches. - +We changed the upstream github actions to work with our repository (e.g., removing references to non-synced branches). +When updating to a new release, please also update branch references in the actions (e.g., bump the release version in `.github/workflows/{test-release-weekly.yml, lint-docs-weekly.yml, weekly-security-scan.yaml`). diff --git a/config/crd/bases/cluster.x-k8s.io_machinepools.yaml b/config/crd/bases/cluster.x-k8s.io_machinepools.yaml index c2949543da52..cf8b4e49f572 100644 --- a/config/crd/bases/cluster.x-k8s.io_machinepools.yaml +++ b/config/crd/bases/cluster.x-k8s.io_machinepools.yaml @@ -1092,6 +1092,39 @@ spec: This is a pointer to distinguish between explicit zero and not specified. format: int32 type: integer + strategy: + description: strategy defines how to replace existing machines with + new ones. + properties: + remediation: + description: |- + remediation controls the strategy of remediating unhealthy machines + as marked by a MachineHealthCheck. This only applies to infrastructure + providers supporting "MachinePool Machines". For other providers, + no remediation is done. + properties: + maxInFlight: + anyOf: + - type: integer + - type: string + description: |- + maxInFlight determines how many in flight remediations should happen at the same time. + + Remediation only happens on the MachineSet with the most current revision, while + older MachineSets (usually present during rollout operations) aren't allowed to remediate. + + Note: In general (independent of remediations), unhealthy machines are always + prioritized during scale down operations over healthy ones. + + MaxInFlight can be set to a fixed number or a percentage. + Example: when this is set to 20%, the MachineSet controller deletes at most 20% of + the desired replicas. + + If not set, remediation is limited to all machines (bounded by replicas) + under the active MachineSet's management. + x-kubernetes-int-or-string: true + type: object + type: object template: description: template describes the machines that will be created. properties: diff --git a/controlplane/kubeadm/internal/controllers/helpers.go b/controlplane/kubeadm/internal/controllers/helpers.go index 029e0cbbd9c7..34ec2d791d80 100644 --- a/controlplane/kubeadm/internal/controllers/helpers.go +++ b/controlplane/kubeadm/internal/controllers/helpers.go @@ -71,12 +71,12 @@ func (r *KubeadmControlPlaneReconciler) reconcileKubeconfig(ctx context.Context, controllerOwnerRef := *metav1.NewControllerRef(controlPlane.KCP, controlplanev1.GroupVersion.WithKind(kubeadmControlPlaneKind)) clusterName := util.ObjectKey(controlPlane.Cluster) - configSecret, err := secret.GetFromNamespacedName(ctx, r.SecretCachingClient, clusterName, secret.Kubeconfig) + configSecret, err := secret.GetFromNamespacedName(ctx, r.Client, clusterName, secret.Kubeconfig) switch { case apierrors.IsNotFound(err): createErr := kubeconfig.CreateSecretWithOwner( ctx, - r.SecretCachingClient, + r.Client, clusterName, endpoint.String(), controllerOwnerRef, diff --git a/exp/api/v1beta1/machinepool_types.go b/exp/api/v1beta1/machinepool_types.go index be97236182d4..e61f8ab0d5eb 100644 --- a/exp/api/v1beta1/machinepool_types.go +++ b/exp/api/v1beta1/machinepool_types.go @@ -69,10 +69,29 @@ type MachinePoolSpec struct { // +kubebuilder:validation:items:MinLength=1 // +kubebuilder:validation:items:MaxLength=256 FailureDomains []string `json:"failureDomains,omitempty"` + + // strategy defines how to replace existing machines with new ones. + // +optional + Strategy *MachinePoolStrategy `json:"strategy,omitempty"` } // ANCHOR_END: MachinePoolSpec +// ANCHOR: MachinePoolStrategy + +// MachinePoolStrategy describes how to replace existing machines +// with new ones. +type MachinePoolStrategy struct { + // remediation controls the strategy of remediating unhealthy machines + // as marked by a MachineHealthCheck. This only applies to infrastructure + // providers supporting "MachinePool Machines". For other providers, + // no remediation is done. + // +optional + Remediation *clusterv1.RemediationStrategy `json:"remediation,omitempty"` +} + +// ANCHOR_END: MachinePoolStrategy + // ANCHOR: MachinePoolStatus // MachinePoolStatus defines the observed state of MachinePool. diff --git a/exp/api/v1beta1/zz_generated.deepcopy.go b/exp/api/v1beta1/zz_generated.deepcopy.go index 06bd6b0e9a9b..e1739ebe96c0 100644 --- a/exp/api/v1beta1/zz_generated.deepcopy.go +++ b/exp/api/v1beta1/zz_generated.deepcopy.go @@ -111,6 +111,11 @@ func (in *MachinePoolSpec) DeepCopyInto(out *MachinePoolSpec) { *out = make([]string, len(*in)) copy(*out, *in) } + if in.Strategy != nil { + in, out := &in.Strategy, &out.Strategy + *out = new(MachinePoolStrategy) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachinePoolSpec. @@ -165,6 +170,26 @@ func (in *MachinePoolStatus) DeepCopy() *MachinePoolStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachinePoolStrategy) DeepCopyInto(out *MachinePoolStrategy) { + *out = *in + if in.Remediation != nil { + in, out := &in.Remediation, &out.Remediation + *out = new(apiv1beta1.RemediationStrategy) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachinePoolStrategy. +func (in *MachinePoolStrategy) DeepCopy() *MachinePoolStrategy { + if in == nil { + return nil + } + out := new(MachinePoolStrategy) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MachinePoolV1Beta2Status) DeepCopyInto(out *MachinePoolV1Beta2Status) { *out = *in diff --git a/exp/internal/controllers/machinepool_controller_phases.go b/exp/internal/controllers/machinepool_controller_phases.go index db989f773f3b..c66b53c02dd1 100644 --- a/exp/internal/controllers/machinepool_controller_phases.go +++ b/exp/internal/controllers/machinepool_controller_phases.go @@ -20,6 +20,8 @@ import ( "context" "fmt" "reflect" + "slices" + "sort" "time" "github.com/pkg/errors" @@ -28,6 +30,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" kerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" "k8s.io/utils/ptr" @@ -44,6 +47,7 @@ import ( "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" + "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/conditions" utilconversion "sigs.k8s.io/cluster-api/util/conversion" "sigs.k8s.io/cluster-api/util/labels" @@ -280,7 +284,10 @@ func (r *MachinePoolReconciler) reconcileInfrastructure(ctx context.Context, s * // Get the nodeRefsMap from the cluster. s.nodeRefMap, getNodeRefsErr = r.getNodeRefMap(ctx, clusterClient) - err = r.reconcileMachines(ctx, s, infraConfig) + res := ctrl.Result{} + + reconcileMachinesRes, err := r.reconcileMachines(ctx, s, infraConfig) + res = util.LowestNonZeroResult(res, reconcileMachinesRes) if err != nil || getNodeRefsErr != nil { return ctrl.Result{}, kerrors.NewAggregate([]error{errors.Wrapf(err, "failed to reconcile Machines for MachinePool %s", klog.KObj(mp)), errors.Wrapf(getNodeRefsErr, "failed to get nodeRefs for MachinePool %s", klog.KObj(mp))}) @@ -288,7 +295,7 @@ func (r *MachinePoolReconciler) reconcileInfrastructure(ctx context.Context, s * if !mp.Status.InfrastructureReady { log.Info("Infrastructure provider is not yet ready", infraConfig.GetKind(), klog.KObj(infraConfig)) - return ctrl.Result{}, nil + return res, nil } var providerIDList []string @@ -307,7 +314,7 @@ func (r *MachinePoolReconciler) reconcileInfrastructure(ctx context.Context, s * if len(providerIDList) == 0 && mp.Status.Replicas != 0 { log.Info("Retrieved empty spec.providerIDList from infrastructure provider but status.replicas is not zero.", "replicas", mp.Status.Replicas) - return ctrl.Result{}, nil + return res, nil } if !reflect.DeepEqual(mp.Spec.ProviderIDList, providerIDList) { @@ -317,7 +324,7 @@ func (r *MachinePoolReconciler) reconcileInfrastructure(ctx context.Context, s * mp.Status.UnavailableReplicas = mp.Status.Replicas } - return ctrl.Result{}, nil + return res, nil } // reconcileMachines reconciles Machines associated with a MachinePool. @@ -327,7 +334,7 @@ func (r *MachinePoolReconciler) reconcileInfrastructure(ctx context.Context, s * // infrastructure is created accordingly. // Note: When supported by the cloud provider implementation of the MachinePool, machines will provide a means to interact // with the corresponding infrastructure (e.g. delete a specific machine in case MachineHealthCheck detects it is unhealthy). -func (r *MachinePoolReconciler) reconcileMachines(ctx context.Context, s *scope, infraMachinePool *unstructured.Unstructured) error { +func (r *MachinePoolReconciler) reconcileMachines(ctx context.Context, s *scope, infraMachinePool *unstructured.Unstructured) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) mp := s.machinePool @@ -335,10 +342,10 @@ func (r *MachinePoolReconciler) reconcileMachines(ctx context.Context, s *scope, if err := util.UnstructuredUnmarshalField(infraMachinePool, &infraMachineKind, "status", "infrastructureMachineKind"); err != nil { if errors.Is(err, util.ErrUnstructuredFieldNotFound) { log.V(4).Info("MachinePool Machines not supported, no infraMachineKind found") - return nil + return ctrl.Result{}, nil } - return errors.Wrapf(err, "failed to retrieve infraMachineKind from infrastructure provider for MachinePool %s", klog.KObj(mp)) + return ctrl.Result{}, errors.Wrapf(err, "failed to retrieve infraMachineKind from infrastructure provider for MachinePool %s", klog.KObj(mp)) } infraMachineSelector := metav1.LabelSelector{ @@ -355,7 +362,7 @@ func (r *MachinePoolReconciler) reconcileMachines(ctx context.Context, s *scope, infraMachineList.SetAPIVersion(infraMachinePool.GetAPIVersion()) infraMachineList.SetKind(infraMachineKind + "List") if err := r.Client.List(ctx, &infraMachineList, client.InNamespace(mp.Namespace), client.MatchingLabels(infraMachineSelector.MatchLabels)); err != nil { - return errors.Wrapf(err, "failed to list infra machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace) + return ctrl.Result{}, errors.Wrapf(err, "failed to list infra machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace) } // Add watcher for infraMachine, if there isn't one already; this will allow this controller to reconcile @@ -366,21 +373,26 @@ func (r *MachinePoolReconciler) reconcileMachines(ctx context.Context, s *scope, // Add watcher for infraMachine, if there isn't one already. if err := r.externalTracker.Watch(log, sampleInfraMachine, handler.EnqueueRequestsFromMapFunc(r.infraMachineToMachinePoolMapper), predicates.ResourceIsChanged(r.Client.Scheme(), *r.externalTracker.PredicateLogger)); err != nil { - return err + return ctrl.Result{}, err } // Get the list of machines managed by this controller, and align it with the infra machines managed by // the InfraMachinePool controller. machineList := &clusterv1.MachineList{} if err := r.Client.List(ctx, machineList, client.InNamespace(mp.Namespace), client.MatchingLabels(infraMachineSelector.MatchLabels)); err != nil { - return err + return ctrl.Result{}, err } if err := r.createOrUpdateMachines(ctx, s, machineList.Items, infraMachineList.Items); err != nil { - return errors.Wrapf(err, "failed to create machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace) + return ctrl.Result{}, errors.Wrapf(err, "failed to create machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace) } - return nil + res, err := r.reconcileUnhealthyMachines(ctx, s, machineList.Items) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile unhealthy machines for MachinePool %s", klog.KObj(mp)) + } + + return res, nil } // createOrUpdateMachines creates a MachinePool Machine for each infraMachine if it doesn't already exist and sets the owner reference and infraRef. @@ -575,3 +587,121 @@ func (r *MachinePoolReconciler) getNodeRefMap(ctx context.Context, c client.Clie return nodeRefsMap, nil } + +func (r *MachinePoolReconciler) reconcileUnhealthyMachines(ctx context.Context, s *scope, machines []clusterv1.Machine) (ctrl.Result, error) { + if len(machines) == 0 { + return ctrl.Result{}, nil + } + + log := ctrl.LoggerFrom(ctx) + mp := s.machinePool + + machinesWithHealthCheck := slices.DeleteFunc(slices.Clone(machines), func(machine clusterv1.Machine) bool { + return !conditions.Has(&machine, clusterv1.MachineHealthCheckSucceededCondition) + }) + if len(machinesWithHealthCheck) == 0 { + // This means there is no MachineHealthCheck selecting any machines + // of this machine pool. In this case, do not requeue so often, + // but still check regularly in case a MachineHealthCheck became + // deployed or activated. This long interval shouldn't be a problem + // at cluster creation, since newly-created nodes should anyway + // trigger MachinePool reconciliation as the infrastructure provider + // creates the InfraMachines. + log.V(4).Info("Skipping reconciliation of unhealthy MachinePool machines because there are no health-checked machines") + return ctrl.Result{RequeueAfter: 10 * time.Minute}, nil + } + + unhealthyMachines := slices.DeleteFunc(slices.Clone(machines), func(machine clusterv1.Machine) bool { + return !collections.IsUnhealthyAndOwnerRemediated(&machine) + }) + log.V(4).Info("Reconciling unhealthy MachinePool machines", "unhealthyMachines", len(unhealthyMachines)) + + // Calculate how many in flight machines we should remediate. + // By default, we allow all machines to be remediated at the same time. + maxInFlight := len(unhealthyMachines) + if mp.Spec.Strategy != nil && mp.Spec.Strategy.Remediation != nil { + if mp.Spec.Strategy.Remediation.MaxInFlight != nil { + var err error + replicas := int(ptr.Deref(mp.Spec.Replicas, 1)) + maxInFlight, err = intstr.GetScaledValueFromIntOrPercent(mp.Spec.Strategy.Remediation.MaxInFlight, replicas, true) + if err != nil { + return ctrl.Result{}, fmt.Errorf("failed to calculate maxInFlight to remediate machines: %v", err) + } + log = log.WithValues("maxInFlight", maxInFlight, "replicas", replicas) + } + } + + machinesToRemediate := make([]*clusterv1.Machine, 0, len(unhealthyMachines)) + inFlight := 0 + for _, m := range unhealthyMachines { + if !m.DeletionTimestamp.IsZero() { + if conditions.IsTrue(&m, clusterv1.MachineOwnerRemediatedCondition) { + // Machine has been remediated by this controller and still in flight. + inFlight++ + } + continue + } + if conditions.IsFalse(&m, clusterv1.MachineOwnerRemediatedCondition) { + machinesToRemediate = append(machinesToRemediate, &m) + } + } + log = log.WithValues("inFlight", inFlight) + + if len(machinesToRemediate) == 0 { + // There's a MachineHealthCheck monitoring the machines, but currently + // no action to be taken. A machine could require remediation at any + // time, so use a short interval until next reconciliation. + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + + if inFlight >= maxInFlight { + log.V(3).Info("Remediation strategy is set, and maximum in flight has been reached", "machinesToBeRemediated", len(machinesToRemediate)) + + // Check soon again whether the already-remediating (= deleting) machines are gone + // so that more machines can be remediated + return ctrl.Result{RequeueAfter: 15 * time.Second}, nil + } + + // Sort the machines from newest to oldest. + // We are trying to remediate machines failing to come up first because + // there is a chance that they are not hosting any workloads (minimize disruption). + sort.SliceStable(machinesToRemediate, func(i, j int) bool { + return machinesToRemediate[i].CreationTimestamp.After(machinesToRemediate[j].CreationTimestamp.Time) + }) + + haveMoreMachinesToRemediate := false + if len(machinesToRemediate) > (maxInFlight - inFlight) { + haveMoreMachinesToRemediate = true + log.V(5).Info("Remediation strategy is set, limiting in flight operations", "machinesToBeRemediated", len(machinesToRemediate)) + machinesToRemediate = machinesToRemediate[:(maxInFlight - inFlight)] + } + + // Remediate unhealthy machines by deleting them + var errs []error + for _, m := range machinesToRemediate { + log.Info("Deleting unhealthy Machine", "Machine", klog.KObj(m)) + patch := client.MergeFrom(m.DeepCopy()) + if err := r.Client.Delete(ctx, m); err != nil { + if apierrors.IsNotFound(err) { + continue + } + errs = append(errs, errors.Wrapf(err, "failed to delete Machine %s", klog.KObj(m))) + continue + } + conditions.MarkTrue(m, clusterv1.MachineOwnerRemediatedCondition) + if err := r.Client.Status().Patch(ctx, m, patch); err != nil && !apierrors.IsNotFound(err) { + errs = append(errs, errors.Wrapf(err, "failed to update status of Machine %s", klog.KObj(m))) + } + } + + if len(errs) > 0 { + return ctrl.Result{}, errors.Wrapf(kerrors.NewAggregate(errs), "failed to delete unhealthy Machines") + } + + if haveMoreMachinesToRemediate { + // More machines need remediation, so reconcile again sooner + return ctrl.Result{RequeueAfter: 15 * time.Second}, nil + } + + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil +} diff --git a/exp/internal/controllers/machinepool_controller_phases_test.go b/exp/internal/controllers/machinepool_controller_phases_test.go index 2b8b6557bd18..7f0fc5630549 100644 --- a/exp/internal/controllers/machinepool_controller_phases_test.go +++ b/exp/internal/controllers/machinepool_controller_phases_test.go @@ -42,8 +42,10 @@ import ( externalfake "sigs.k8s.io/cluster-api/controllers/external/fake" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/internal/util/ssa" + "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/kubeconfig" "sigs.k8s.io/cluster-api/util/labels/format" + "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/test/builder" ) @@ -1436,7 +1438,7 @@ func TestReconcileMachinePoolMachines(t *testing.T) { scope := &scope{ machinePool: &machinePool, } - err = r.reconcileMachines(ctx, scope, &unstructured.Unstructured{Object: infraConfig}) + _, err = r.reconcileMachines(ctx, scope, &unstructured.Unstructured{Object: infraConfig}) r.reconcilePhase(&machinePool) g.Expect(err).ToNot(HaveOccurred()) @@ -1507,7 +1509,7 @@ func TestReconcileMachinePoolMachines(t *testing.T) { machinePool: &machinePool, } - err = r.reconcileMachines(ctx, scope, &unstructured.Unstructured{Object: infraConfig}) + _, err = r.reconcileMachines(ctx, scope, &unstructured.Unstructured{Object: infraConfig}) r.reconcilePhase(&machinePool) g.Expect(err).ToNot(HaveOccurred()) @@ -1566,9 +1568,12 @@ func TestReconcileMachinePoolMachines(t *testing.T) { machinePool: &machinePool, } - err = r.reconcileMachines(ctx, scope, &unstructured.Unstructured{Object: infraConfig}) + res, err := r.reconcileMachines(ctx, scope, &unstructured.Unstructured{Object: infraConfig}) r.reconcilePhase(&machinePool) g.Expect(err).ToNot(HaveOccurred()) + // Regular reconciliation makes no sense if infra provider + // doesn't support MachinePool machines + g.Expect(res.RequeueAfter).To(BeZero()) machineList := &clusterv1.MachineList{} labels := map[string]string{ @@ -1578,6 +1583,116 @@ func TestReconcileMachinePoolMachines(t *testing.T) { g.Expect(env.GetAPIReader().List(ctx, machineList, client.InNamespace(cluster.Namespace), client.MatchingLabels(labels))).To(Succeed()) g.Expect(machineList.Items).To(BeEmpty()) }) + + t.Run("Should delete unhealthy machines", func(*testing.T) { + machinePool := getMachinePool(3, "machinepool-test-4", clusterName, ns.Name) + g.Expect(env.CreateAndWait(ctx, &machinePool)).To(Succeed()) + + infraMachines := getInfraMachines(3, machinePool.Name, clusterName, ns.Name) + for i := range infraMachines { + g.Expect(env.CreateAndWait(ctx, &infraMachines[i])).To(Succeed()) + } + + machines := getMachines(3, machinePool.Name, clusterName, ns.Name) + for i := range machines { + g.Expect(env.CreateAndWait(ctx, &machines[i])).To(Succeed()) + } + + // machines[0] isn't changed here (no conditions = considered healthy). + + // machines[1] is marked as unhealthy by conditions + patchHelper, err := patch.NewHelper(&machines[1], env) + unhealthyMachineName := machines[1].Name + conditions.MarkFalse(&machines[1], clusterv1.MachineHealthCheckSucceededCondition, clusterv1.MachineHasFailureReason, clusterv1.ConditionSeverityWarning, "") + conditions.MarkFalse(&machines[1], clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "") + g.Expect(err).ShouldNot(HaveOccurred()) + g.Expect(patchHelper.Patch(ctx, &machines[1], patch.WithStatusObservedGeneration{}, patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.MachineHealthCheckSucceededCondition, + clusterv1.MachineOwnerRemediatedCondition, + }}, patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.MachineHealthCheckSucceededCondition, + clusterv1.MachineOwnerRemediatedCondition, + }})).To(Succeed()) + + // machines[2] is marked as healthy by conditions + patchHelper, err = patch.NewHelper(&machines[2], env) + conditions.MarkTrue(&machines[2], clusterv1.MachineHealthCheckSucceededCondition) + g.Expect(err).ShouldNot(HaveOccurred()) + g.Expect(patchHelper.Patch(ctx, &machines[2], patch.WithStatusObservedGeneration{}, patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.MachineHealthCheckSucceededCondition, + clusterv1.MachineOwnerRemediatedCondition, + }}, patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.MachineHealthCheckSucceededCondition, + clusterv1.MachineOwnerRemediatedCondition, + }})).To(Succeed()) + + infraConfig := map[string]interface{}{ + "kind": builder.GenericInfrastructureMachinePoolKind, + "apiVersion": builder.InfrastructureGroupVersion.String(), + "metadata": map[string]interface{}{ + "name": "infra-config4", + "namespace": ns.Name, + }, + "spec": map[string]interface{}{ + "providerIDList": []interface{}{ + "test://id-1", + }, + }, + "status": map[string]interface{}{ + "ready": true, + "addresses": []interface{}{ + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.1", + }, + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.2", + }, + }, + "infrastructureMachineKind": builder.GenericInfrastructureMachineKind, + }, + } + g.Expect(env.CreateAndWait(ctx, &unstructured.Unstructured{Object: infraConfig})).To(Succeed()) + + r := &MachinePoolReconciler{ + Client: env, + ssaCache: ssa.NewCache(testController), + externalTracker: external.ObjectTracker{ + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: env.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), + }, + } + scope := &scope{ + machinePool: &machinePool, + } + res, err := r.reconcileMachines(ctx, scope, &unstructured.Unstructured{Object: infraConfig}) + r.reconcilePhase(&machinePool) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.RequeueAfter).To(BeNumerically(">=", 0)) + + machineList := &clusterv1.MachineList{} + labels := map[string]string{ + clusterv1.ClusterNameLabel: clusterName, + clusterv1.MachinePoolNameLabel: machinePool.Name, + } + g.Expect(env.GetAPIReader().List(ctx, machineList, client.InNamespace(cluster.Namespace), client.MatchingLabels(labels))).To(Succeed()) + + // The unhealthy machine should have been remediated (= deleted) + g.Expect(machineList.Items).To(HaveLen(2)) + + for i := range machineList.Items { + machine := &machineList.Items[i] + + // Healthy machines should remain + g.Expect(machine.Name).ToNot(Equal(unhealthyMachineName)) + + _, err := external.Get(ctx, r.Client, &machine.Spec.InfrastructureRef) + g.Expect(err).ToNot(HaveOccurred()) + } + }) }) } diff --git a/internal/apis/core/exp/v1alpha3/conversion.go b/internal/apis/core/exp/v1alpha3/conversion.go index e2a80b95218b..31a0e14c9e0c 100644 --- a/internal/apis/core/exp/v1alpha3/conversion.go +++ b/internal/apis/core/exp/v1alpha3/conversion.go @@ -27,6 +27,15 @@ import ( utilconversion "sigs.k8s.io/cluster-api/util/conversion" ) +func Convert_v1alpha3_MachineDeploymentStrategy_To_v1beta1_MachinePoolStrategy(in *clusterv1alpha3.MachineDeploymentStrategy, out *expv1.MachinePoolStrategy, _ apimachineryconversion.Scope) error { + out.Remediation = nil + return nil +} + +func Convert_v1beta1_MachinePoolStrategy_To_v1alpha3_MachineDeploymentStrategy(in *expv1.MachinePoolStrategy, out *clusterv1alpha3.MachineDeploymentStrategy, _ apimachineryconversion.Scope) error { + return nil +} + // Convert_v1alpha3_MachinePoolSpec_To_v1beta1_MachinePoolSpec is an autogenerated conversion function. func Convert_v1alpha3_MachinePoolSpec_To_v1beta1_MachinePoolSpec(in *MachinePoolSpec, out *expv1.MachinePoolSpec, s apimachineryconversion.Scope) error { return autoConvert_v1alpha3_MachinePoolSpec_To_v1beta1_MachinePoolSpec(in, out, s) diff --git a/internal/apis/core/exp/v1alpha3/conversion_test.go b/internal/apis/core/exp/v1alpha3/conversion_test.go index ec17db16642c..3db58314bdaf 100644 --- a/internal/apis/core/exp/v1alpha3/conversion_test.go +++ b/internal/apis/core/exp/v1alpha3/conversion_test.go @@ -28,15 +28,17 @@ import ( expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" clusterv1alpha3 "sigs.k8s.io/cluster-api/internal/apis/core/v1alpha3" utilconversion "sigs.k8s.io/cluster-api/util/conversion" + "sigs.k8s.io/controller-runtime/pkg/conversion" ) // Test is disabled when the race detector is enabled (via "//go:build !race" above) because otherwise the fuzz tests would just time out. func TestFuzzyConversion(t *testing.T) { t.Run("for MachinePool", utilconversion.FuzzTestFunc(utilconversion.FuzzTestFuncInput{ - Hub: &expv1.MachinePool{}, - Spoke: &MachinePool{}, - FuzzerFuncs: []fuzzer.FuzzerFuncs{fuzzFuncs}, + Hub: &expv1.MachinePool{}, + HubAfterMutation: machinePoolHubAfterMutation, + Spoke: &MachinePool{}, + FuzzerFuncs: []fuzzer.FuzzerFuncs{fuzzFuncs}, })) } @@ -45,6 +47,7 @@ func fuzzFuncs(_ runtimeserializer.CodecFactory) []interface{} { BootstrapFuzzer, MachinePoolSpecFuzzer, ObjectMetaFuzzer, + hubMachinePoolSpec, } } @@ -69,7 +72,17 @@ func ObjectMetaFuzzer(in *clusterv1alpha3.ObjectMeta, c fuzz.Continue) { func MachinePoolSpecFuzzer(in *MachinePoolSpec, c fuzz.Continue) { c.Fuzz(in) - // These fields have been removed in v1beta1 - // data is going to be lost, so we're forcing zero values here. + in.Strategy = nil +} + +func machinePoolHubAfterMutation(c conversion.Hub) { + mp := c.(*expv1.MachinePool) + + mp.Spec.Strategy = nil +} + +func hubMachinePoolSpec(in *expv1.MachinePoolSpec, c fuzz.Continue) { + c.Fuzz(in) + in.Strategy = nil } diff --git a/internal/apis/core/exp/v1alpha3/zz_generated.conversion.go b/internal/apis/core/exp/v1alpha3/zz_generated.conversion.go index 6f65b4b2f037..068fc7920e7d 100644 --- a/internal/apis/core/exp/v1alpha3/zz_generated.conversion.go +++ b/internal/apis/core/exp/v1alpha3/zz_generated.conversion.go @@ -60,6 +60,11 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddConversionFunc((*corev1alpha3.MachineDeploymentStrategy)(nil), (*v1beta1.MachinePoolStrategy)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha3_MachineDeploymentStrategy_To_v1beta1_MachinePoolStrategy(a.(*corev1alpha3.MachineDeploymentStrategy), b.(*v1beta1.MachinePoolStrategy), scope) + }); err != nil { + return err + } if err := s.AddConversionFunc((*MachinePoolSpec)(nil), (*v1beta1.MachinePoolSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha3_MachinePoolSpec_To_v1beta1_MachinePoolSpec(a.(*MachinePoolSpec), b.(*v1beta1.MachinePoolSpec), scope) }); err != nil { @@ -80,6 +85,11 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddConversionFunc((*v1beta1.MachinePoolStrategy)(nil), (*corev1alpha3.MachineDeploymentStrategy)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta1_MachinePoolStrategy_To_v1alpha3_MachineDeploymentStrategy(a.(*v1beta1.MachinePoolStrategy), b.(*corev1alpha3.MachineDeploymentStrategy), scope) + }); err != nil { + return err + } if err := s.AddConversionFunc((*v1beta1.MachinePool)(nil), (*MachinePool)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta1_MachinePool_To_v1alpha3_MachinePool(a.(*v1beta1.MachinePool), b.(*MachinePool), scope) }); err != nil { @@ -163,7 +173,15 @@ func autoConvert_v1alpha3_MachinePoolSpec_To_v1beta1_MachinePoolSpec(in *Machine if err := Convert_v1alpha3_MachineTemplateSpec_To_v1beta1_MachineTemplateSpec(&in.Template, &out.Template, s); err != nil { return err } - // WARNING: in.Strategy requires manual conversion: does not exist in peer-type + if in.Strategy != nil { + in, out := &in.Strategy, &out.Strategy + *out = new(v1beta1.MachinePoolStrategy) + if err := Convert_v1alpha3_MachineDeploymentStrategy_To_v1beta1_MachinePoolStrategy(*in, *out, s); err != nil { + return err + } + } else { + out.Strategy = nil + } out.MinReadySeconds = (*int32)(unsafe.Pointer(in.MinReadySeconds)) out.ProviderIDList = *(*[]string)(unsafe.Pointer(&in.ProviderIDList)) out.FailureDomains = *(*[]string)(unsafe.Pointer(&in.FailureDomains)) @@ -179,6 +197,15 @@ func autoConvert_v1beta1_MachinePoolSpec_To_v1alpha3_MachinePoolSpec(in *v1beta1 out.MinReadySeconds = (*int32)(unsafe.Pointer(in.MinReadySeconds)) out.ProviderIDList = *(*[]string)(unsafe.Pointer(&in.ProviderIDList)) out.FailureDomains = *(*[]string)(unsafe.Pointer(&in.FailureDomains)) + if in.Strategy != nil { + in, out := &in.Strategy, &out.Strategy + *out = new(corev1alpha3.MachineDeploymentStrategy) + if err := Convert_v1beta1_MachinePoolStrategy_To_v1alpha3_MachineDeploymentStrategy(*in, *out, s); err != nil { + return err + } + } else { + out.Strategy = nil + } return nil } diff --git a/internal/apis/core/exp/v1alpha4/conversion.go b/internal/apis/core/exp/v1alpha4/conversion.go index 7333658a0416..4236695b8470 100644 --- a/internal/apis/core/exp/v1alpha4/conversion.go +++ b/internal/apis/core/exp/v1alpha4/conversion.go @@ -79,3 +79,7 @@ func Convert_v1beta1_MachinePoolStatus_To_v1alpha4_MachinePoolStatus(in *expv1.M // V1Beta2 was added in v1beta1 return autoConvert_v1beta1_MachinePoolStatus_To_v1alpha4_MachinePoolStatus(in, out, s) } + +func Convert_v1beta1_MachinePoolSpec_To_v1alpha4_MachinePoolSpec(in *expv1.MachinePoolSpec, out *MachinePoolSpec, s apimachineryconversion.Scope) error { + return autoConvert_v1beta1_MachinePoolSpec_To_v1alpha4_MachinePoolSpec(in, out, s) +} diff --git a/internal/apis/core/exp/v1alpha4/conversion_test.go b/internal/apis/core/exp/v1alpha4/conversion_test.go index bff86035f730..d71dfc93abda 100644 --- a/internal/apis/core/exp/v1alpha4/conversion_test.go +++ b/internal/apis/core/exp/v1alpha4/conversion_test.go @@ -21,6 +21,10 @@ package v1alpha4 import ( "testing" + fuzz "github.com/google/gofuzz" + "k8s.io/apimachinery/pkg/api/apitesting/fuzzer" + runtimeserializer "k8s.io/apimachinery/pkg/runtime/serializer" + expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" utilconversion "sigs.k8s.io/cluster-api/util/conversion" ) @@ -29,7 +33,21 @@ import ( func TestFuzzyConversion(t *testing.T) { t.Run("for MachinePool", utilconversion.FuzzTestFunc(utilconversion.FuzzTestFuncInput{ - Hub: &expv1.MachinePool{}, - Spoke: &MachinePool{}, + Hub: &expv1.MachinePool{}, + // HubAfterMutation: machinePoolHubAfterMutation, + Spoke: &MachinePool{}, + FuzzerFuncs: []fuzzer.FuzzerFuncs{fuzzFuncs}, })) } + +func fuzzFuncs(_ runtimeserializer.CodecFactory) []interface{} { + return []interface{}{ + hubMachinePoolSpec, + } +} + +func hubMachinePoolSpec(in *expv1.MachinePoolSpec, c fuzz.Continue) { + c.Fuzz(in) + + in.Strategy = nil +} diff --git a/internal/apis/core/exp/v1alpha4/zz_generated.conversion.go b/internal/apis/core/exp/v1alpha4/zz_generated.conversion.go index c3a04594f280..755063fcf4a7 100644 --- a/internal/apis/core/exp/v1alpha4/zz_generated.conversion.go +++ b/internal/apis/core/exp/v1alpha4/zz_generated.conversion.go @@ -65,11 +65,6 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*v1beta1.MachinePoolSpec)(nil), (*MachinePoolSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1beta1_MachinePoolSpec_To_v1alpha4_MachinePoolSpec(a.(*v1beta1.MachinePoolSpec), b.(*MachinePoolSpec), scope) - }); err != nil { - return err - } if err := s.AddGeneratedConversionFunc((*MachinePoolStatus)(nil), (*v1beta1.MachinePoolStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha4_MachinePoolStatus_To_v1beta1_MachinePoolStatus(a.(*MachinePoolStatus), b.(*v1beta1.MachinePoolStatus), scope) }); err != nil { @@ -80,6 +75,11 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddConversionFunc((*v1beta1.MachinePoolSpec)(nil), (*MachinePoolSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta1_MachinePoolSpec_To_v1alpha4_MachinePoolSpec(a.(*v1beta1.MachinePoolSpec), b.(*MachinePoolSpec), scope) + }); err != nil { + return err + } if err := s.AddConversionFunc((*v1beta1.MachinePoolStatus)(nil), (*MachinePoolStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta1_MachinePoolStatus_To_v1alpha4_MachinePoolStatus(a.(*v1beta1.MachinePoolStatus), b.(*MachinePoolStatus), scope) }); err != nil { @@ -193,14 +193,10 @@ func autoConvert_v1beta1_MachinePoolSpec_To_v1alpha4_MachinePoolSpec(in *v1beta1 out.MinReadySeconds = (*int32)(unsafe.Pointer(in.MinReadySeconds)) out.ProviderIDList = *(*[]string)(unsafe.Pointer(&in.ProviderIDList)) out.FailureDomains = *(*[]string)(unsafe.Pointer(&in.FailureDomains)) + // WARNING: in.Strategy requires manual conversion: does not exist in peer-type return nil } -// Convert_v1beta1_MachinePoolSpec_To_v1alpha4_MachinePoolSpec is an autogenerated conversion function. -func Convert_v1beta1_MachinePoolSpec_To_v1alpha4_MachinePoolSpec(in *v1beta1.MachinePoolSpec, out *MachinePoolSpec, s conversion.Scope) error { - return autoConvert_v1beta1_MachinePoolSpec_To_v1alpha4_MachinePoolSpec(in, out, s) -} - func autoConvert_v1alpha4_MachinePoolStatus_To_v1beta1_MachinePoolStatus(in *MachinePoolStatus, out *v1beta1.MachinePoolStatus, s conversion.Scope) error { out.NodeRefs = *(*[]v1.ObjectReference)(unsafe.Pointer(&in.NodeRefs)) out.Replicas = in.Replicas diff --git a/util/patch/patch.go b/util/patch/patch.go index 73e60f253d39..3bdb6d1edd4c 100644 --- a/util/patch/patch.go +++ b/util/patch/patch.go @@ -200,7 +200,7 @@ func (h *Helper) patch(ctx context.Context, obj client.Object) error { if err != nil { return err } - return h.client.Patch(ctx, afterObject, client.MergeFrom(beforeObject)) + return h.client.Patch(ctx, afterObject, client.MergeFromWithOptions(beforeObject, client.MergeFromWithOptimisticLock{})) } // patchStatus issues a patch if the status has changed. @@ -212,7 +212,7 @@ func (h *Helper) patchStatus(ctx context.Context, obj client.Object) error { if err != nil { return err } - return h.client.Status().Patch(ctx, afterObject, client.MergeFrom(beforeObject)) + return h.client.Status().Patch(ctx, afterObject, client.MergeFromWithOptions(beforeObject, client.MergeFromWithOptimisticLock{})) } // patchStatusConditions issues a patch if there are any changes to the conditions slice under