Steps to configure homelab kubernetes cluster - specific to my set up, but could be easily replicated within other environments
Bare Metal install of Ubuntu Server 18.04 LTS on both Master and Worker nodes
sudo apt install libnss3-toolswget https://github.com/FiloSottile/mkcert/releases/download/v1.4.0/mkcert-v1.4.0-linux-amd64
mv mkcert-v1.4.0-linux-amd64 mkcert
chmod +x mkcert
sudo mv mkcert /usr/local/bin/mkcertCreate x509 Certificates
# install local CA
mkcert -install
mkcert '*.home.swinney.io'
mkdir -p cert && mv *-key.pem cert/key.pem && mv *.pem cert/cert.pemInstall GoLANG
wget https://dl.google.com/go/go1.13.linux-amd64.tar.gz
tar -xvf go1.13.linux-amd64.tar.gz
sudo mv go /usr/local/bin
rm -f go1.13.linux-amd64.tar.gz
mkdir ~/go-projectsSet GOROOT and GOPATH in .profile
# set PATH for GoLang
if [ -d "$HOME/go-projects" ] ; then
GOROOT="/usr/local/go"
GOPATH="$HOME/go-projects"
PATH="$GOPATH/bin:$GOROOT/bin:$PATH"
fiInstall GCC
sudo apt update
sudo apt install -y gccInstall CFSSL
sudo apt install golang-cfsslInstall Istioctl
Multi node deployment, with Pods deployable on Master node, suits my homelab environment. Deep Learning Worker node is marked for ML/DL deployments only. No other Pods will run on it.
sudo apt update && sudo apt install -y apt-transport-https curl ca-certificates software-properties-common nfs-commoncurl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -sudo add-apt-repository \
"deb https://apt.kubernetes.io/ kubernetes-xenial main"curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -sudo add-apt-repository \
"deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"sudo apt updatesudo swapoff -a
sudo sed -i '/ swap / s/^/#/' /etc/fstabThe /etc/fstab file should now be commentted out for the swap mount point
sudo apt-get install docker-ce=18.06.2~ce~3-0~ubuntusudo apt-mark hold docker-ce=18.06.2~ce~3-0~ubuntucat > /etc/docker/daemon.json <<EOF
{
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
},
"storage-driver": "overlay2"
}
EOFsudo systemctl daemon-reload && sudo systemctl restart dockersudo apt install -y kubelet kubeadm kubectlsudo apt-mark hold kubelet kubeadm kubectlsudo kubeadm init --config kubeinit.confmkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/configkubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.ymlkubectl create secret -n kube-system generic weave-passwd --from-literal=weave-passwd=$(hexdump -n 16 -e '4/4 "%08x" 1 "\n"' /dev/random)
kubectl apply -n kube-system -f "https://cloud.weave.works/k8s/net?k8s-version=$(kubectl version | base64 | tr -d '\n')&password-secret=weave-passwd"kubectl taint nodes --all node-role.kubernetes.io/master-kubectl create -f helm/helm-rbac.yaml
kubectl create serviceaccount --namespace kube-system tiller
kubectl create clusterrolebinding tiller-cluster-rule --clusterrole=cluster-admin --serviceaccount=kube-system:tiller
sudo snap install helm --classic
helm init --upgrade
kubectl patch deploy --namespace kube-system tiller-deploy -p '{"spec":{"template":{"spec":{"serviceAccount":"tiller"}}}}'helm install --name=metallb --namespace=metallb-system -f metallb/metallb-values.yaml stable/metallbkubectl create -f nfs-client/deploy/rbac.yaml
kubectl apply -f nfs-client/deploy/deployment.yaml
kubectl apply -f nfs-client/deploy/class.yaml
kubectl patch deployment nfs-client-provisioner -p '{"spec":{"template":{"spec":{"serviceAccount":"nfs-client-provisioner"}}}}'
kubectl patch storageclass managed-nfs-storage -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'kubectl create -f 1.8-metricserverkubectl apply -f https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.0-beta1/aio/deploy/recommended.yamlChange spec.type to LoadBalancer:
kubectl --namespace kubernetes-dashboard edit service kubernetes-dashboard
kubectl apply -f dashboard/dashboard-admin.yamlkubectl -n kubernetes-dashboard describe secret $(kubectl -n kubernetes-dashboard get secret | grep admin-user | awk '{print $1}')eyJhbGciOiJSUzI1NiIsImtpZCI6IiJ9.eyJpc3MiOiJrdWJlcm5ldGVzL3NlcnZpY2VhY2NvdW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9uYW1lc3BhY2UiOiJrdWJlcm5ldGVzLWRhc2hib2FyZCIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VjcmV0Lm5hbWUiOiJhZG1pbi11c2VyLXRva2VuLXNtbTQyIiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9zZXJ2aWNlLWFjY291bnQubmFtZSI6ImFkbWluLXVzZXIiLCJrdWJlcm5ldGVzLmlvL3NlcnZpY2VhY2NvdW50L3NlcnZpY2UtYWNjb3VudC51aWQiOiI3YjlkMzk4Ni1kYTQyLTQwMTUtOWI4ZC1mYjgzNzgxM2I1YTciLCJzdWIiOiJzeXN0ZW06c2VydmljZWFjY291bnQ6a3ViZXJuZXRlcy1kYXNoYm9hcmQ6YWRtaW4tdXNlciJ9.jjGAVDeJJBIXe7jzSbmC_azlT5MAnH3yemX81m9Bv9W_I5u2Nm9aezTPZyRnO46UN7Eb2piWH5fUeNCiVZylPQt-FI4L4BGLEl5RWJInckollrSRw2bhEBkdtmEdHWjqsKXNQLV2qbuTin6ZE4lpuMa0PbkCkX-wtdpf0ejnq_PIIEdkOAvrYOKzIO6LHAEkCtK4nFObwEGPUH1yDoIbGCbdlg_xbEx-6Uv7Xz8YfbZ3DBDljcL_tyk8LwmaUWmNryTNclWBXNPOKnqrfkx1DEdj6RXTrG9TIbaIJ8YW324PmYPkPt_MDGQNxDDwpWAgH7BsogOcb7XWRGuix16_pQkubectl --namespace kubernetes-dashboard get service kubernetes-dashboardConnect via https://
helm install stable/heapster --name heapster --set rbac.create=truewget https://github.com/bitnami-labs/sealed-secrets/releases/download/v0.8.3/kubeseal-linux-amd64 -O kubeseal
sudo install -m 755 kubeseal /usr/local/bin/kubeseal
rm -f kubeseal
kubectl apply -f https://github.com/bitnami-labs/sealed-secrets/releases/download/v0.8.3/controller.yaml
kubeseal --fetch-cert > certs/kubecert.pemExample Usage:
echo -n <SECRET> | kubectl create secret generic <SECRET-NAME> --dry-run --from-file=<VALUE>=/dev/stdin -o yaml > <SECRET-FILENAME>.yaml
kubeseal --cert certs/kubecert.pem --format yaml < <SECRET-FILENAME>.yaml > <SEALEDSECRET-FILENAME>.yamlInstall Traefik for LoadBalancing/Ingress - We'll use Node-labels to prevent pods being deployed on Deep Learning Worker
kubectl apply -f traefik/traefik-service-acc.yaml
kubectl apply -f traefik/traefik-cr.yaml
kubectl apply -f traefik/traefik-crb.yaml
kubectl apply -f traefik/traefik-deployment.yaml
kubectl apply -f traefik/traefik-svc.yaml
kubectl apply -f traefik/traefik-webui-svc.yaml
kubectl apply -f traefik/traefik-webui-ingress.yamlkubectl apply -f namespaceskubectl apply -f monitoringA lot of this could be done during the initial Master/Worker build
TBCREBOOT!
sudo apt update && sudo apt install -y apt-transport-https curl ca-certificates software-properties-common nfs-commoncurl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -sudo add-apt-repository \
"deb https://apt.kubernetes.io/ kubernetes-xenial main" curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo pt-key add -sudo add-apt-repository \
"deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.listsudo apt updatesudo swapoff -aCheck /etc/fstab file and comment out the swap mounting point
sudo apt-get install docker-ce=18.06.2~ce~3-0~ubuntu sudo apt-mark hold docker-ce=18.06.2~ce~3-0~ubuntusudo apt-get install nvidia-docker2cat > /etc/docker/daemon.json <<EOF
{
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
},
"storage-driver": "overlay2",
"default-runtime": "nvidia",
"runtimes": {
"nvidia": {
"path": "nvidia-container-runtime",
"runtimeArgs": []
}
}
}
EOFsudo mkdir -p /etc/systemd/system/docker.service.dsudo systemctl daemon-reload && sudo systemctl restart dockersudo apt install -y kubelet kubeadm kubectlRBEOOT!
Create token for joining cluster and list it out
kubeadm token create && kubeadm token list List token-ca-cert-hash
openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | \
openssl dgst -sha256 -hex | sed 's/^.* //'On Worker Node
sudo kubeadm join --token <token> <master-ip>:<master-port> --discovery-token-ca-cert-hash sha256:<hash>Example:
sudo kubeadm join --token zo9ju3.ezaz85c7oha3x9jp kube:6443 --discovery-token-ca-cert-hash sha256:6877abd8a6f646680ad1fd8ef0373d128890715decfbb1724d75431dd8bbdd80Confirm on master node that the worker node has joined
kubectl get nodes -o widekubectl label nodes deeplab.home.swinney.io workload=mldlConfirm label has been applied
kubectl get nodes --show-labelsOn Master Node, install Nvidia DevicePlugin DaemonSet
kubectl create -f deviceplugins/nvidia-device-plugin.ymlkubectl apply -f deployments/piholeopsys=linux
curl -s https://api.github.com/repos/kubeflow/kubeflow/releases/latest |\
grep browser_download |\
grep $opsys |\
cut -d '"' -f 4 |\
xargs curl -O -L && \
tar -zvxf kfctl_*_${opsys}.tar.gz# Add kfctl to PATH, to make the kfctl binary easier to use.
export PATH=$PATH:"<path to kfctl>"
export KFAPP="<your choice of application directory name>"
# Installs Istio by default. Comment out Istio components in the config file to skip Istio installation. See https://github.com/kubeflow/kubeflow/pull/3663
export CONFIG="https://raw.githubusercontent.com/kubeflow/kubeflow/v0.6-branch/bootstrap/config/kfctl_k8s_istio.0.6.2.yaml"
kfctl init ${KFAPP} --config=${CONFIG} -V
cd ${KFAPP}
kfctl generate all -V
kfctl apply all -V