diff --git a/fleet/lib/rook-ceph-cluster/overlays/lukay/values.yaml b/fleet/lib/rook-ceph-cluster/overlays/lukay/values.yaml index b44401186..1bc586840 100644 --- a/fleet/lib/rook-ceph-cluster/overlays/lukay/values.yaml +++ b/fleet/lib/rook-ceph-cluster/overlays/lukay/values.yaml @@ -1,68 +1,73 @@ --- cephClusterSpec: - # XXX allow upgrade even if not healthy because lukay05 was removed - continueUpgradeAfterChecksEvenIfNotHealthy: true mon: count: 3 + cephConfig: + global: + osd_pool_default_pg_autoscale_mode: warn + rgw_override_bucket_index_max_shards: "401" + rgw_enable_usage_log: "false" + mgr: + mgr/balancer/upmap_max_deviation: "1" + osd: + osd_max_pg_per_osd_hard_ratio: "10" + osd_op_queue: wpq + osd_scrub_auto_repair: "true" storage: useAllNodes: false useAllDevices: false config: - osdsPerDevice: "3" + osdsPerDevice: "1" + encryptedDevice: "true" nodes: - name: lukay01 devices: - - name: sda - - name: sdb - - name: sdc - - name: sdd - - name: sde - - name: sdf - - name: sdg - - name: sdh - - name: sdi - - name: sdj + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R702912N + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R702901H + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R702902M + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R702906Y + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R702936A + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R704221R + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R702953P + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R804983Z + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R702951K + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R702385N - name: lukay02 devices: - - name: sda - - name: sdb - - name: sdc - - name: sdd - - name: sde - - name: sdf - - name: sdg - - name: sdh - - name: sdi - - name: sdj + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R705147R + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R705139Z + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R705153V + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R705479L + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R705487X + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R705492F + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R705485Y + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R705109K + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R705141T + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R805000T - name: lukay03 devices: - - name: sda - - name: sdb - - name: sdc - - name: sdd - - name: sde - - name: sdf - - name: sdg - - name: sdh - - name: sdi - - name: sdj - - name: lukay04 + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R704207V + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R804245D + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R802783P + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R802782B + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R804846V + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R802794M + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0RA02831V + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0RA02160A + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R801805Z + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R804287J + - name: lukay05 devices: - - name: sda - - name: sdb - - name: sdc - - name: sdd - - name: sde - - name: sdf - - name: sdg - - name: sdh - - name: sdi - - name: sdj - resources: - osd: - requests: - cpu: 100m - memory: 4Gi + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R703848Z + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R704195Z + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R704225B + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R705482V + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R804292R + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R804293V + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R804996Z + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R804998H + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R805002E + - name: /dev/disk/by-id/scsi-SATA_Samsung_SSD_870_S5VUNJ0R805003M cephBlockPools: - name: replicapool @@ -74,12 +79,19 @@ cephBlockPools: quotas: maxSize: 2Ti enableRBDStats: true + parameters: + nodelete: "true" + nosizechange: "true" + pg_autoscale_mode: "off" + pg_num: "32" storageClass: name: rook-ceph-block enabled: true isDefault: true reclaimPolicy: Delete allowVolumeExpansion: true + mountOptions: + - discard parameters: clusterID: rook-ceph pool: replicapool diff --git a/fleet/lib/rook-ceph-conf/charts/lukay/templates/cephnfs-backup.yaml b/fleet/lib/rook-ceph-conf/charts/lukay/templates/cephnfs-backup.yaml deleted file mode 100644 index 8e6465ccc..000000000 --- a/fleet/lib/rook-ceph-conf/charts/lukay/templates/cephnfs-backup.yaml +++ /dev/null @@ -1,71 +0,0 @@ ---- -apiVersion: ceph.rook.io/v1 -kind: CephFilesystem -metadata: - name: backup - namespace: rook-ceph -spec: - metadataPool: - failureDomain: host - replicated: - size: 3 - dataPools: - - failureDomain: host - replicated: - size: 3 - metadataServer: - activeCount: 3 - activeStandby: true - resources: - limits: - cpu: "4" - memory: 4Gi - requests: - cpu: "4" - memory: 4Gi - preserveFilesystemOnDelete: false ---- -apiVersion: ceph.rook.io/v1 -kind: CephNFS -metadata: - name: backup - namespace: rook-ceph -spec: - rados: - pool: backup-data0 - server: - active: 1 - resources: - limits: - cpu: "3" - memory: 8Gi - requests: - cpu: "3" - memory: 8Gi ---- -apiVersion: v1 -kind: Service -metadata: - labels: - app: rook-ceph-nfs - ceph_daemon_type: nfs - ceph_nfs: backup - instance: a - rook_cluster: rook-ceph - name: rook-ceph-nfs-backup - namespace: rook-ceph - annotations: - metallb.universe.tf/loadBalancerIPs: 139.229.160.212 -spec: - ports: - - name: nfs - port: 2049 - protocol: TCP - targetPort: 2049 - selector: - app: rook-ceph-nfs - ceph_daemon_type: nfs - ceph_nfs: backup - instance: a - rook_cluster: rook-ceph - type: LoadBalancer diff --git a/fleet/lib/rook-ceph-conf/charts/lukay/templates/cephobjectstore-it.yaml b/fleet/lib/rook-ceph-conf/charts/lukay/templates/cephobjectstore-backups.yaml similarity index 74% rename from fleet/lib/rook-ceph-conf/charts/lukay/templates/cephobjectstore-it.yaml rename to fleet/lib/rook-ceph-conf/charts/lukay/templates/cephobjectstore-backups.yaml index af3e3c63f..d356b7e4e 100644 --- a/fleet/lib/rook-ceph-conf/charts/lukay/templates/cephobjectstore-it.yaml +++ b/fleet/lib/rook-ceph-conf/charts/lukay/templates/cephobjectstore-backups.yaml @@ -2,32 +2,28 @@ apiVersion: ceph.rook.io/v1 kind: CephObjectRealm metadata: - name: it - namespace: rook-ceph + name: backups spec: defaultRealm: true --- apiVersion: ceph.rook.io/v1 kind: CephObjectZoneGroup metadata: - name: it - namespace: rook-ceph + name: backups spec: - realm: it + realm: backups --- apiVersion: ceph.rook.io/v1 kind: CephObjectZone metadata: - name: it - namespace: rook-ceph + name: backups spec: - zoneGroup: it + zoneGroup: backups --- apiVersion: ceph.rook.io/v1 kind: CephObjectStore metadata: - name: it - namespace: rook-ceph + name: backups spec: metadataPool: failureDomain: host @@ -49,13 +45,12 @@ spec: # securePort: 443 instances: 3 zone: - name: it + name: backups --- apiVersion: networking.k8s.io/v1 kind: Ingress metadata: - name: rook-ceph-rgw-it - namespace: rook-ceph + name: rook-ceph-rgw-backups annotations: cert-manager.io/cluster-issuer: letsencrypt nginx.ingress.kubernetes.io/proxy-body-size: 1024m @@ -63,16 +58,16 @@ spec: ingressClassName: nginx tls: - hosts: - - it-s3.ls.lsst.org - secretName: rook-ceph-rgw-it-ingress-tls + - s3-backups.ls.lsst.org + secretName: rook-ceph-rgw-backups-ingress-tls rules: - - host: it-s3.ls.lsst.org + - host: s3-backups.ls.lsst.org http: paths: - path: / pathType: Prefix backend: service: - name: rook-ceph-rgw-it + name: rook-ceph-rgw-backups port: number: 80 diff --git a/fleet/lib/rook-ceph-conf/charts/lukay/templates/cm-cephcli.yaml b/fleet/lib/rook-ceph-conf/charts/lukay/templates/cm-cephcli.yaml index 89370488b..c3856854a 100644 --- a/fleet/lib/rook-ceph-conf/charts/lukay/templates/cm-cephcli.yaml +++ b/fleet/lib/rook-ceph-conf/charts/lukay/templates/cm-cephcli.yaml @@ -10,10 +10,6 @@ data: ceph orch set backend "" ceph mgr module disable rook - waitfornfs backup - ceph nfs export rm backup /backup - ceph nfs export create cephfs backup /backup backup - ceph mgr module enable rook ceph orch set backend rook ceph device monitoring on diff --git a/lukay/README.md b/lukay/README.md deleted file mode 100644 index 2085b1c00..000000000 --- a/lukay/README.md +++ /dev/null @@ -1,26 +0,0 @@ -# Lukay cluster deployment - -```bash -ssh lukay01.cp.lsst.org -sudo -iu rke -git clone https://github.com/lsst-it/k8s-cookbook -cd k8s-cookbook/lukay/ - -(cd rke; rke up) -export KUBECONFIG=/home/rke/k8s-cookbook/luan/rke/kube_config_cluster.yml - -(cd metallb; ./metallb.sh) - -(cd cert-manager; ./cert-manager.sh) - -(cd ingress; ./ingress-nginx-helm.sh) - -(cd multus; ./multus.sh) - -(cd rook-ceph; ./rook-ceph.sh) - -``` - -Import lukay cluster into rancher via this url: - -https://rancher.cp.lsst.org/g/clusters/add/launch/import?importProvider=other diff --git a/lukay/external-secrets/README.md b/lukay/external-secrets/README.md deleted file mode 120000 index ac2d2bd3e..000000000 --- a/lukay/external-secrets/README.md +++ /dev/null @@ -1 +0,0 @@ -../../template/external-secrets/README.md \ No newline at end of file diff --git a/lukay/external-secrets/external-secrets.sh b/lukay/external-secrets/external-secrets.sh deleted file mode 120000 index 481549d0f..000000000 --- a/lukay/external-secrets/external-secrets.sh +++ /dev/null @@ -1 +0,0 @@ -../../template/external-secrets/external-secrets.sh \ No newline at end of file diff --git a/lukay/external-secrets/fetch-credentials.sh b/lukay/external-secrets/fetch-credentials.sh deleted file mode 120000 index bff691441..000000000 --- a/lukay/external-secrets/fetch-credentials.sh +++ /dev/null @@ -1 +0,0 @@ -../../template/external-secrets/fetch-credentials.sh \ No newline at end of file diff --git a/lukay/rke/.gitignore b/lukay/rke/.gitignore deleted file mode 100644 index d534642ee..000000000 --- a/lukay/rke/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -cluster.rkestate -kube_config_cluster.yml -*.tar.gz diff --git a/lukay/rke/Makefile b/lukay/rke/Makefile deleted file mode 120000 index bb99fbe76..000000000 --- a/lukay/rke/Makefile +++ /dev/null @@ -1 +0,0 @@ -../.././template/rke/Makefile \ No newline at end of file diff --git a/lukay/rke/cluster.yml b/lukay/rke/cluster.yml deleted file mode 100644 index 650d3ea53..000000000 --- a/lukay/rke/cluster.yml +++ /dev/null @@ -1,51 +0,0 @@ ---- -# If you intened to deploy Kubernetes in an air-gapped environment, -# please consult the documentation on how to configure custom RKE images. -nodes: -- address: lukay01.cp.lsst.org - hostname_override: lukay01 - user: rke - role: - - controlplane - - worker - - etcd - labels: - role: storage-node -- address: lukay02.cp.lsst.org - hostname_override: lukay02 - user: rke - role: - - controlplane - - worker - - etcd - labels: - role: storage-node -- address: lukay03.cp.lsst.org - hostname_override: lukay03 - user: rke - role: - - controlplane - - worker - - etcd - labels: - role: storage-node -- address: lukay04.cp.lsst.org - hostname_override: lukay04 - user: rke - role: - - controlplane - - worker - - etcd - labels: - role: storage-node -services: - kubelet: - extra_args: - node-status-max-images: "-1" -network: - plugin: canal -ssh_key_path: ~/.ssh/id_rsa -ignore_docker_version: true -kubernetes_version: v1.32.2-rancher1-1 -ingress: - provider: none diff --git a/lukay/rook-ceph/scripts/rook-ceph-node-cleanup.sh b/lukay/rook-ceph/scripts/rook-ceph-node-cleanup.sh deleted file mode 100755 index d01c910ab..000000000 --- a/lukay/rook-ceph/scripts/rook-ceph-node-cleanup.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash - -set -xe - -SSH_USER='hreinking_b' -DEV=(a b c d e f g h i j) -SSH=(3) -for f in "${SSH[@]}" -do - for i in "${DEV[@]}" - do - ssh -l $SSH_USER lukay0"${f}".cp.lsst.org 'sudo rm -rf /var/lib/rook' - ssh -l $SSH_USER lukay0"${f}".cp.lsst.org 'ls /dev/mapper/ceph-* | xargs -I% -- echo /sbin/dmsetup remove %' - ssh -l $SSH_USER lukay0"${f}".cp.lsst.org 'sudo rm -rf /dev/ceph-*' - ssh -l $SSH_USER lukay0"${f}".cp.lsst.org "sudo sgdisk --zap-all /dev/sd${i}" - ssh -l $SSH_USER lukay0"${f}".cp.lsst.org "sudo dd if=/dev/zero of=/dev/sd${i} bs=1M count=100 oflag=direct,dsync" - ssh -l $SSH_USER lukay0"${f}".cp.lsst.org "sudo blockdev --rereadpt /dev/sd${i}" - done - # ssh -l $SSH_USER $f 'sudo reboot' -done \ No newline at end of file diff --git a/lukay/external-secrets/.gitignore b/rke2/lukay/external-secrets/.gitignore similarity index 100% rename from lukay/external-secrets/.gitignore rename to rke2/lukay/external-secrets/.gitignore diff --git a/rke2/lukay/external-secrets/README.md b/rke2/lukay/external-secrets/README.md new file mode 120000 index 000000000..da3703d40 --- /dev/null +++ b/rke2/lukay/external-secrets/README.md @@ -0,0 +1 @@ +../../../template/external-secrets/README.md \ No newline at end of file diff --git a/rke2/lukay/external-secrets/external-secrets.sh b/rke2/lukay/external-secrets/external-secrets.sh new file mode 120000 index 000000000..8449c7f89 --- /dev/null +++ b/rke2/lukay/external-secrets/external-secrets.sh @@ -0,0 +1 @@ +../../../template/external-secrets/external-secrets.sh \ No newline at end of file diff --git a/rke2/lukay/external-secrets/fetch-credentials.sh b/rke2/lukay/external-secrets/fetch-credentials.sh new file mode 120000 index 000000000..b72b7d149 --- /dev/null +++ b/rke2/lukay/external-secrets/fetch-credentials.sh @@ -0,0 +1 @@ +../../../template/external-secrets/fetch-credentials.sh \ No newline at end of file diff --git a/lukay/external-secrets/onepass_item.sh b/rke2/lukay/external-secrets/onepass_item.sh similarity index 100% rename from lukay/external-secrets/onepass_item.sh rename to rke2/lukay/external-secrets/onepass_item.sh