|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# Function to broadcast and log messages |
| 4 | +broadcast() { |
| 5 | + echo "$1" |
| 6 | +} |
| 7 | + |
| 8 | +log() { |
| 9 | + echo "$1" >> /var/log/script.log |
| 10 | +} |
| 11 | + |
| 12 | +# Main script execution starts here |
| 13 | +echo "Running cloudinit.sh script" |
| 14 | + |
| 15 | +# Add public key to OPC user |
| 16 | +echo "Adding public key to OPC authorized_keys" |
| 17 | +sudo -u opc sh -c "echo ${PUB_KEY} >> /home/opc/.ssh/authorized_keys" |
| 18 | + |
| 19 | +# Install essential packages including git |
| 20 | +echo "Installing necessary packages..." |
| 21 | +dnf install -y dnf-utils zip unzip gcc git |
| 22 | +dnf config-manager --add-repo=https://download.docker.com/linux/centos/docker-ce.repo |
| 23 | +dnf remove -y runc |
| 24 | + |
| 25 | +# Install Docker |
| 26 | +echo "Installing Docker..." |
| 27 | +dnf install -y docker-ce --nobest |
| 28 | +systemctl enable docker.service |
| 29 | + |
| 30 | +# Get API key from Terraform variable |
| 31 | +api_key="${nvidia_api_key}" |
| 32 | + |
| 33 | +# Install NVIDIA container toolkit for Docker |
| 34 | +broadcast "Installing NVIDIA container toolkit for Docker..." |
| 35 | +log "Installing NVIDIA container toolkit for Docker..." |
| 36 | +distribution=$(. /etc/os-release; echo $ID$VERSION_ID) |
| 37 | +curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/nvidia-container-toolkit.repo | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo >/dev/null |
| 38 | +sudo yum install -y nvidia-container-toolkit >/dev/null |
| 39 | +sudo systemctl restart docker |
| 40 | +broadcast "NVIDIA container toolkit installed successfully." |
| 41 | +log "NVIDIA container toolkit installed successfully." |
| 42 | + |
| 43 | +# Generate CDI configuration for Docker |
| 44 | +broadcast "Configuring CDI for Docker..." |
| 45 | +log "Configuring CDI for Docker..." |
| 46 | +sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml >/dev/null |
| 47 | +broadcast "CDI configured successfully for Docker." |
| 48 | +log "CDI configured successfully for Docker." |
| 49 | + |
| 50 | +# Setup NVIDIA driver persistence across reboots |
| 51 | +broadcast "Enabling NVIDIA persistence daemon..." |
| 52 | +log "Enabling NVIDIA persistence daemon..." |
| 53 | +nvidia-persistenced |
| 54 | +sudo systemctl enable nvidia-persistenced |
| 55 | +broadcast "NVIDIA persistence daemon enabled." |
| 56 | +log "NVIDIA persistence daemon enabled." |
| 57 | + |
| 58 | +# Configure Docker to use NVIDIA runtime |
| 59 | +broadcast "Configuring Docker to use NVIDIA runtime..." |
| 60 | +log "Configuring Docker to use NVIDIA runtime..." |
| 61 | +sudo tee /etc/docker/daemon.json > /dev/null <<EOF |
| 62 | +{ |
| 63 | + "runtimes": { |
| 64 | + "nvidia": { |
| 65 | + "path": "nvidia-container-runtime", |
| 66 | + "runtimeArgs": [] |
| 67 | + } |
| 68 | + } |
| 69 | +} |
| 70 | +EOF |
| 71 | +sudo systemctl restart docker |
| 72 | +broadcast "Docker configured to use NVIDIA runtime." |
| 73 | +log "Docker configured to use NVIDIA runtime." |
| 74 | + |
| 75 | +# Start Docker and add OPC user to Docker group |
| 76 | +echo "Starting Docker service..." |
| 77 | +systemctl start docker.service |
| 78 | +usermod -aG docker opc |
| 79 | + |
| 80 | +# Install Python packages |
| 81 | +echo "Installing Python packages..." |
| 82 | +python3 -m pip install --upgrade pip wheel oci |
| 83 | +python3 -m pip install --upgrade setuptools |
| 84 | +python3 -m pip install oci-cli langchain six |
| 85 | + |
| 86 | +# Grow filesystem |
| 87 | +echo "Expanding filesystem..." |
| 88 | +/usr/libexec/oci-growfs -y |
| 89 | + |
| 90 | +# Optional firewall configuration |
| 91 | +# broadcast "Configuring firewall..." |
| 92 | +# log "Configuring firewall..." |
| 93 | +# sudo firewall-cmd --zone=public --add-port=8888/tcp --permanent |
| 94 | +# sudo firewall-cmd --reload |
| 95 | +# broadcast "Firewall configuration complete." |
| 96 | +# log "Firewall configuration complete." |
| 97 | + |
| 98 | +# Holoscan installation |
| 99 | +broadcast "Logging in to nvcr.io..." |
| 100 | +log "Logging in to nvcr.io..." |
| 101 | +echo $api_key | docker login nvcr.io --username '$oauthtoken' --password-stdin >/dev/null |
| 102 | +broadcast "Logged in to nvcr.io successfully." |
| 103 | +log "Logged in to nvcr.io successfully." |
| 104 | + |
| 105 | +broadcast "Pulling Holoscan image from nvcr.io..." |
| 106 | +log "Pulling Holoscan image from nvcr.io..." |
| 107 | +docker pull nvcr.io/nvidia/clara-holoscan/holoscan:v2.4.0-dgpu >/dev/null |
| 108 | +broadcast "Holoscan image pulled successfully." |
| 109 | +log "Holoscan image pulled successfully." |
| 110 | + |
| 111 | +broadcast "Starting Holoscan Jupyter container..." |
| 112 | +log "Starting Holoscan Jupyter container..." |
| 113 | + |
| 114 | +docker run -d \ |
| 115 | + --gpus all \ |
| 116 | + --net host \ |
| 117 | + --ipc=host \ |
| 118 | + --cap-add=CAP_SYS_PTRACE \ |
| 119 | + --ulimit memlock=-1 \ |
| 120 | + --ulimit stack=67108864 \ |
| 121 | + -v /home/user/holoscan_examples:/examples \ |
| 122 | + -v /var/run/docker.sock:/var/run/docker.sock \ |
| 123 | + --name holoscan_jupyter \ |
| 124 | + nvcr.io/nvidia/clara-holoscan/holoscan:v2.4.0-dgpu /bin/bash -c \ |
| 125 | + "apt-get update && apt-get install -y python3-pip git && \ |
| 126 | + pip3 install jupyter && \ |
| 127 | + mkdir -p /workspace/holoscan_jupyter_notebooks && \ |
| 128 | + jupyter notebook --ip=0.0.0.0 --port=8888 --allow-root --no-browser --NotebookApp.token='' --NotebookApp.password='' --notebook-dir=/workspace" |
| 129 | + |
| 130 | +broadcast "Holoscan Jupyter container started successfully." |
| 131 | +log "Holoscan Jupyter container started successfully." |
| 132 | + |
| 133 | +# Stop and configure firewall |
| 134 | +echo "Configuring firewall..." |
| 135 | +systemctl stop firewalld |
| 136 | +firewall-offline-cmd --zone=public --add-port=8888/tcp |
| 137 | +systemctl start firewalld |
| 138 | + |
| 139 | +broadcast "Cloudinit.sh script completed." |
| 140 | +log "Cloudinit.sh script completed." |
0 commit comments