Skip to content

Commit 934024a

Browse files
committed
pt1
1 parent f9745f8 commit 934024a

File tree

1 file changed

+2
-157
lines changed

1 file changed

+2
-157
lines changed

.github/workflows/build-notebooks-TEMPLATE.yaml

Lines changed: 2 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -398,164 +398,9 @@ jobs:
398398
ln -s ../rocm-tensorflow runtimes/rocm/tensorflow
399399
ln -s ../rocm-pytorch runtimes/rocm/pytorch
400400
401-
# https://cri-o.io/
402-
- name: Install cri-o
403-
id: install-crio
401+
- name: Provision K8s cluster
404402
if: ${{ steps.have-tests.outputs.tests == 'true' }}
405-
run: |
406-
set -Eeuxo pipefail
407-
408-
# the Microsoft repo's kubelet does not provide /etc/systemd/system/kubelet.service.d/10-kubeadm.conf
409-
# [Service]
410-
# EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env
411-
# ExecStart=/usr/bin/kubelet $KUBELET_KUBEADM_ARGS
412-
sudo ls /etc/apt/sources.list.d/
413-
sudo rm /etc/apt/sources.list.d/microsoft-prod.list
414-
415-
sudo apt-get update
416-
sudo apt-get install -y software-properties-common curl
417-
418-
# https://github.com/cri-o/packaging?tab=readme-ov-file#distributions-using-deb-packages
419-
420-
curl -fsSL https://pkgs.k8s.io/core:/stable:/v${KUBERNETES_VERSION}/deb/Release.key | \
421-
sudo gpg --dearmor --batch --yes -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
422-
423-
echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v${KUBERNETES_VERSION}/deb/ /" | \
424-
sudo tee /etc/apt/sources.list.d/kubernetes.list
425-
426-
curl -fsSL https://download.opensuse.org/repositories/isv:/cri-o:/stable:/v${CRIO_VERSION}/deb/Release.key | \
427-
sudo gpg --dearmor --batch --yes -o /etc/apt/keyrings/cri-o-apt-keyring.gpg
428-
429-
echo "deb [signed-by=/etc/apt/keyrings/cri-o-apt-keyring.gpg] https://download.opensuse.org/repositories/isv:/cri-o:/stable:/v${CRIO_VERSION}/deb/ /" | \
430-
sudo tee /etc/apt/sources.list.d/cri-o.list
431-
432-
sudo apt-get update
433-
434-
# [ERROR FileExisting-conntrack]: conntrack not found in system path
435-
# see man apt-patterns for the ~name=version* syntax
436-
437-
# The following packages will be DOWNGRADED:
438-
# kubectl
439-
# E: Packages were downgraded and -y was used without --allow-downgrades.
440-
441-
sudo apt-get install -y --allow-downgrades \
442-
"cri-o=${CRIO_VERSION}.*" \
443-
"kubelet=${KUBERNETES_VERSION}.*" "kubeadm=${KUBERNETES_VERSION}.*" "kubectl=${KUBERNETES_VERSION}.*" \
444-
conntrack
445-
446-
# make use of /etc/cni/net.d/11-crio-ipv4-bridge.conflist so we don't
447-
# need a pod network and just use the default bridge
448-
sudo rm -rf /etc/cni/net.d/*
449-
# cat /etc/cni/net.d/11-crio-ipv4-bridge.conflist
450-
# https://github.com/containerd/containerd/blob/main/script%2Fsetup%2Finstall-cni
451-
# https://www.cni.dev/plugins/current/main/bridge/
452-
sudo cp ci/cached-builds/11-crio-ipv4-bridge.conflist /etc/cni/net.d/11-crio-ipv4-bridge.conflist
453-
454-
sudo cp ci/cached-builds/crio.conf /etc/crio/crio.conf.d/
455-
456-
sudo systemctl daemon-reload
457-
sudo systemctl start crio.service
458-
env:
459-
# TODO(jdanek): install also "cri-tools=${CRIO_VERSION}.*" when updating to 1.33
460-
CRIO_VERSION: 1.32
461-
# This has to be kept in sync with the packages above, otherwise
462-
# [ERROR KubeletVersion]: the kubelet version is higher than the control plane version.
463-
# This is not a supported version skew and may lead to a malfunctional cluster.
464-
# Kubelet version: "1.33.0" Control plane version: "1.30.12"
465-
KUBERNETES_VERSION: 1.33
466-
# Also update version in kubeadm.yaml
467-
468-
- run: sudo crictl info
469-
if: ${{ steps.have-tests.outputs.tests == 'true' }}
470-
471-
- name: Show crio debug data (on failure)
472-
if: ${{ failure() && steps.have-tests.outputs.tests == 'true' }}
473-
run: |
474-
set -Eeuxo pipefail
475-
476-
sudo systemctl status crio.service || true
477-
sudo journalctl -xeu crio.service
478-
479-
# do this early, it's a good check that cri-o is not completely broken
480-
- name: "Show crio images information"
481-
if: ${{ steps.have-tests.outputs.tests == 'true' }}
482-
run: sudo crictl images
483-
484-
- name: Install Kubernetes cluster
485-
if: ${{ steps.have-tests.outputs.tests == 'true' }}
486-
run: |
487-
set -Eeuxo pipefail
488-
489-
sudo swapoff -a
490-
sudo modprobe br_netfilter
491-
sudo sysctl -w net.ipv4.ip_forward=1
492-
493-
# Was getting strange DNS resolution errors from pods that don't seem to want to go away sometimes:
494-
# Resolving raw.githubusercontent.com (raw.githubusercontent.com)... failed: Name or service not known.
495-
# wget: unable to resolve host address ‘raw.githubusercontent.com’
496-
# Here's what helped:
497-
# https://kubernetes.io/docs/tasks/administer-cluster/dns-debugging-resolution/#known-issues
498-
# https://github.com/kubernetes/kubernetes/blob/e4c1f980b76fecece30c2f77885a7117192170a6/CHANGELOG/CHANGELOG-1.30.md?plain=1#L1454
499-
# https://github.com/canonical/microk8s/issues/68#issuecomment-404923563
500-
sudo ufw allow in on cni0
501-
sudo ufw allow out on cni0
502-
sudo ufw default allow routed
503-
sudo iptables -P FORWARD ACCEPT
504-
sudo iptables -t nat -A POSTROUTING -s 10.85.0.0/16 -o eth0 -j MASQUERADE
505-
506-
sudo kubeadm reset -f --cri-socket=unix:///var/run/crio/crio.sock
507-
508-
# https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm
509-
sudo kubeadm init --config=ci/cached-builds/kubeadm.yaml
510-
511-
mkdir -p $HOME/.kube
512-
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
513-
sudo chown $(id -u):$(id -g) $HOME/.kube/config
514-
515-
- name: Show kubelet debug data (on failure)
516-
if: ${{ failure() && steps.have-tests.outputs.tests == 'true' && steps.install-crio.outcome == 'success' }}
517-
run: |
518-
set -Eeuxo pipefail
519-
520-
# [kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
521-
sudo cat /var/lib/kubelet/kubeadm-flags.env || true
522-
# [kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
523-
sudo cat /var/lib/kubelet/config.yaml || true
524-
525-
sudo systemctl cat kubelet.service || true
526-
527-
sudo cat /etc/systemd/system/kubelet.service.d/10-kubeadm.conf || true
528-
529-
sudo systemctl status kubelet || true
530-
sudo journalctl -xeu kubelet
531-
532-
# Here is one example how you may list all running Kubernetes containers by using crictl:
533-
sudo crictl --runtime-endpoint unix:///var/run/crio/crio.sock ps -a | grep kube | grep -v pause
534-
# Once you have found the failing container, you can inspect its logs with:
535-
# crictl --runtime-endpoint unix:///var/run/crio/crio.sock logs CONTAINERID
536-
537-
- name: Show nodes status and wait for readiness
538-
if: ${{ steps.have-tests.outputs.tests == 'true' }}
539-
run: |
540-
kubectl describe nodes
541-
kubectl wait --for=condition=Ready nodes --all --timeout=100s || (kubectl describe nodes && false)
542-
543-
- name: Wait for pods to be running
544-
if: ${{ steps.have-tests.outputs.tests == 'true' }}
545-
run: |
546-
set -Eeuxo pipefail
547-
kubectl wait deployments --all --all-namespaces --for=condition=Available --timeout=100s
548-
kubectl wait pods --all --all-namespaces --for=condition=Ready --timeout=100s
549-
550-
- name: "Install local-path provisioner"
551-
if: ${{ steps.have-tests.outputs.tests == 'true' }}
552-
run: |
553-
set -Eeuxo pipefail
554-
kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.31/deploy/local-path-storage.yaml
555-
kubectl wait deployments --all --namespace=local-path-storage --for=condition=Available --timeout=100s
556-
# https://kubernetes.io/docs/tasks/administer-cluster/change-default-storage-class/
557-
kubectl get storageclass
558-
kubectl patch storageclass local-path -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
403+
uses: ./.github/actions/provision-k8s
559404

560405
- name: "Run image tests"
561406
# skip on s390x because we are unable to install requirements-elyra.txt that's installed by runtime image tests

0 commit comments

Comments
 (0)