Skip to content

Commit 66c3cd9

Browse files
committed
feat: trying to migrate to cilium without fucking up
1 parent 29dc203 commit 66c3cd9

File tree

41 files changed

+400
-374
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+400
-374
lines changed

.github/renovate.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
"kubernetes/.+\\.ya?ml$"
4141
],
4242
"matchStrings": [
43-
"datasource=(?<datasource>\\S+) depName=(?<depName>\\S+)( versioning=(?<versioning>\\S+))?\n.*?\"(?<currentValue>.*)\"\n"
43+
"datasource=(?<datasource>\\S+) depName=(?<depName>\\S+)( registryUrl=(?<registryUrl>\\S+))?\n.*?\"(?<currentValue>.*)\"\n"
4444
],
4545
"datasourceTemplate": "{{#if datasource}}{{{datasource}}}{{else}}github-releases{{/if}}",
4646
"versioningTemplate": "{{#if versioning}}{{{versioning}}}{{else}}semver{{/if}}"

.github/renovate/groups.json

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,11 @@
1212
"separateMinorPatch": true
1313
},
1414
{
15-
"description": "Immich images",
16-
"groupName": "Immich",
17-
"matchPackagePatterns": ["immich"],
18-
"matchDatasources": ["docker"],
19-
"group": {
20-
"commitMessageTopic": "{{{groupName}}} group"
21-
},
22-
"separateMinorPatch": true
23-
},
24-
{
25-
"description": "Rook-Ceph image and chart",
26-
"groupName": "Rook-Ceph",
27-
"matchPackagePatterns": ["rook.ceph"],
28-
"matchDatasources": ["docker", "helm"],
15+
"description": "Flux Group",
16+
"groupName": "Flux",
17+
"matchPackagePatterns": ["flux"],
18+
"matchDatasources": ["docker", "github-tags"],
19+
"versioning": "semver",
2920
"group": {
3021
"commitMessageTopic": "{{{groupName}}} group"
3122
},

.taskfiles/ClusterTasks.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@ tasks:
2626
reconcile:
2727
desc: Force update Flux to pull in changes from your Git repository
2828
cmds:
29-
- flux reconcile -n flux-system source git home-kubernetes
30-
- flux reconcile -n flux-system kustomization cluster
29+
- flux reconcile -n flux-system kustomization cluster --with-source
3130

3231
hr-restart:
3332
desc: Restart all failed Helm Releases

ansible/inventory/group_vars/kubernetes/k3s.yml

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,9 @@ k3s_registration_address: "{{ kubevip_address }}"
2828

2929
# (list) A list of URLs to deploy on the primary control plane. Read notes below.
3030
k3s_server_manifests_urls:
31-
# Kube-vip
31+
# Kube-vip RBAC
3232
- url: https://raw.githubusercontent.com/kube-vip/kube-vip/main/docs/manifests/rbac.yaml
3333
filename: custom-kube-vip-rbac.yaml
34-
# Tigera Operator
35-
- url: https://raw.githubusercontent.com/projectcalico/calico/v3.25.1/manifests/tigera-operator.yaml
36-
filename: custom-calico-tigera-operator.yaml
3734
# Prometheus Operator
3835
- url: https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.65.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
3936
filename: custom-prometheus-alertmanagerconfigs.yaml
@@ -56,8 +53,12 @@ k3s_server_manifests_urls:
5653
- url: https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.65.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml
5754
filename: custom-prometheus-prometheusagents.yaml
5855

59-
# (list) A flat list of templates to deploy on the primary control plane
56+
# (list) A flat list of templates to deploy on the primary control plane nodes
6057
# /var/lib/rancher/k3s/server/manifests
6158
k3s_server_manifests_templates:
62-
- custom-calico-installation.yaml.j2
63-
- custom-kube-vip-daemonset.yaml.j2
59+
- cilium-helmchart.yaml.j2
60+
61+
# (list) A flat list of templates to deploy as static pods on all the control plane nodes
62+
# /var/lib/rancher/k3s/agent/pod-manifests
63+
k3s_server_pod_manifests_templates:
64+
- kube-vip-static-pod.yaml.j2

ansible/inventory/group_vars/kubernetes/kube-vip.yml

Lines changed: 0 additions & 3 deletions
This file was deleted.

ansible/inventory/group_vars/kubernetes/os.yml

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,8 @@
11
---
2-
# (string) Timezone for the servers
3-
# timezone: "America/New_York"
42

53
# (list) Additional ssh public keys to add to the nodes
64
# ssh_authorized_keys:
75

8-
fedora:
9-
packages:
10-
- dnf-plugin-system-upgrade
11-
- dnf-utils
12-
- hdparm
13-
- htop
14-
- ipvsadm
15-
- lm_sensors
16-
- nano
17-
- nvme-cli
18-
- socat
19-
- python3-kubernetes
20-
- python3-libselinux
21-
- python3-pyyaml
22-
236
ubuntu:
247
packages:
258
- hdparm
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
timezone: "America/Los_Angeles"
3+
kubevip_address: "10.69.69.2"
4+
cluster_cidr: "10.98.0.0/16"
5+
service_cidr: "10.99.0.0/16"

ansible/inventory/group_vars/master/k3s.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ k3s_server:
2626
- traefik
2727
disable-network-policy: true
2828
disable-cloud-controller: true
29+
disable-kube-proxy: true
2930
write-kubeconfig-mode: "644"
3031
# Network CIDR to use for pod IPs
3132
cluster-cidr: "10.42.0.0/16"
@@ -34,9 +35,6 @@ k3s_server:
3435
kube-controller-manager-arg:
3536
# Required to monitor kube-controller-manager with kube-prometheus-stack
3637
- "bind-address=0.0.0.0"
37-
kube-proxy-arg:
38-
# Required to monitor kube-proxy with kube-prometheus-stack
39-
- "metrics-bind-address=0.0.0.0"
4038
kube-scheduler-arg:
4139
# Required to monitor kube-scheduler with kube-prometheus-stack
4240
- "bind-address=0.0.0.0"

ansible/playbooks/cluster-installation.yml

Lines changed: 53 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -45,53 +45,67 @@
4545
regexp: "https://127.0.0.1:6443"
4646
replace: "https://{{ k3s_registration_address }}:6443"
4747

48-
- name: Resource Readiness Check
48+
# Unmanaging and removing the Cilium HelmChart is required for
49+
# flux to take over managing the lifecycle of Cilium
50+
51+
- name: Post installation of custom manifests tasks
4952
run_once: true
50-
kubernetes.core.k8s_info:
51-
kubeconfig: /etc/rancher/k3s/k3s.yaml
52-
kind: "{{ item.kind }}"
53-
name: "{{ item.name }}"
54-
namespace: "{{ item.namespace | default('') }}"
55-
wait: true
56-
wait_sleep: 10
57-
wait_timeout: 360
58-
loop:
59-
- kind: Deployment
60-
name: tigera-operator
61-
namespace: tigera-operator
62-
- kind: DaemonSet
63-
name: kube-vip
64-
namespace: kube-system
65-
- kind: Installation
66-
name: default
67-
- kind: CustomResourceDefinition
68-
name: alertmanagerconfigs.monitoring.coreos.com
69-
- kind: CustomResourceDefinition
70-
name: alertmanagers.monitoring.coreos.com
71-
- kind: CustomResourceDefinition
72-
name: podmonitors.monitoring.coreos.com
73-
- kind: CustomResourceDefinition
74-
name: probes.monitoring.coreos.com
75-
- kind: CustomResourceDefinition
76-
name: prometheuses.monitoring.coreos.com
77-
- kind: CustomResourceDefinition
78-
name: prometheusrules.monitoring.coreos.com
79-
- kind: CustomResourceDefinition
80-
name: servicemonitors.monitoring.coreos.com
81-
- kind: CustomResourceDefinition
82-
name: thanosrulers.monitoring.coreos.com
83-
- kind: CustomResourceDefinition
84-
name: scrapeconfigs.monitoring.coreos.com
85-
- kind: CustomResourceDefinition
86-
name: prometheusagents.monitoring.coreos.com
8753
when:
8854
- k3s_server_manifests_templates | length > 0
8955
or k3s_server_manifests_urls | length > 0
9056
- k3s_control_node is defined
9157
- k3s_control_node
58+
block:
59+
- name: Wait for custom manifests to rollout
60+
kubernetes.core.k8s_info:
61+
kubeconfig: /etc/rancher/k3s/k3s.yaml
62+
kind: "{{ item.kind }}"
63+
name: "{{ item.name }}"
64+
namespace: "{{ item.namespace | default('') }}"
65+
wait: true
66+
wait_sleep: 10
67+
wait_timeout: 360
68+
loop:
69+
- name: cilium
70+
kind: HelmChart
71+
namespace: kube-system
72+
- name: podmonitors.monitoring.coreos.com
73+
kind: CustomResourceDefinition
74+
- name: prometheusrules.monitoring.coreos.com
75+
kind: CustomResourceDefinition
76+
- name: servicemonitors.monitoring.coreos.com
77+
kind: CustomResourceDefinition
78+
- name: Wait for Cilium to rollout
79+
kubernetes.core.k8s_info:
80+
kubeconfig: /etc/rancher/k3s/k3s.yaml
81+
kind: Job
82+
name: helm-install-cilium
83+
namespace: kube-system
84+
wait: true
85+
wait_condition:
86+
type: Complete
87+
status: true
88+
wait_timeout: 360
89+
- name: Patch the Cilium HelmChart to unmanage it
90+
kubernetes.core.k8s_json_patch:
91+
kubeconfig: /etc/rancher/k3s/k3s.yaml
92+
name: cilium
93+
kind: HelmChart
94+
namespace: kube-system
95+
patch:
96+
- op: add
97+
path: /metadata/annotations/helmcharts.helm.cattle.io~1unmanaged
98+
value: "true"
99+
- name: Remove the Cilium HelmChart CR
100+
kubernetes.core.k8s:
101+
kubeconfig: /etc/rancher/k3s/k3s.yaml
102+
name: cilium
103+
kind: HelmChart
104+
namespace: kube-system
105+
state: absent
92106

93107
# NOTE
94-
# Cleaning up the manifests from the /var/lib/rancher/k3s/server/manifests directory
108+
# Cleaning up certain manifests from the /var/lib/rancher/k3s/server/manifests directory
95109
# is needed because k3s has an awesome "feature" to always re-deploy them when the k3s
96110
# service is restarted. Removing them does not uninstall the manifests from your cluster.
97111

ansible/playbooks/cluster-nuke.yml

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
- name: nuke
1010
prompt: |-
1111
Are you sure you want to nuke this cluster?
12-
Type YES I WANT TO DESTROY THIS CLUSTER to proceed
12+
Type 'YES I WANT TO DESTROY THIS CLUSTER' to proceed
1313
default: "n"
1414
private: false
1515
pre_tasks:
@@ -22,6 +22,25 @@
2222
ansible.builtin.pause:
2323
seconds: 5
2424
tasks:
25+
- name: Uninstall Cilium
26+
when:
27+
- k3s_control_node is defined
28+
- k3s_control_node
29+
ansible.builtin.shell: |
30+
cilium uninstall --wait
31+
environment:
32+
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
33+
34+
- name: Prevent k3s from starting on reboot
35+
ansible.builtin.systemd:
36+
name: k3s
37+
enabled: false
38+
39+
- name: Reboot
40+
ansible.builtin.reboot:
41+
msg: Rebooting nodes
42+
reboot_timeout: 3600
43+
2544
- name: Uninstall k3s
2645
ansible.builtin.include_role:
2746
name: xanmanning.k3s

0 commit comments

Comments
 (0)