intro-programming-gpu/chpc.tex at main · chpc-uofu/intro-programming-gpu · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
\section{Use of GPUs at the CHPC}

\subsection{GPUs available at CHPC}

\subsubsection{Regular env.: lp/kp/np/grn clusters}
\begin{frame}
        \frametitle{GPU devices on lp/kp/np/grn}
\begin{table}[H]
   \begin{center}
     \begin{tabular}{c|c}
             \texttt{GPU device type} & \texttt{compute capability} \\
        \hline
	  \href{https://www.nvidia.com/en-us/geforce/graphics-cards/geforce-gtx-titan-x/specifications/}{\small{\texttt{NVIDIA GeForce GTX TITAN X}}} & \small{5.2} \\
	  \href{https://images.nvidia.com/content/tesla/pdf/nvidia-tesla-p100-PCIe-datasheet.pdf}{\small{\texttt{Tesla P100-PCIE-16GB}}} & \small{6.0} \\
	  \href{https://www.nvidia.com/content/dam/en-zz/Solutions/design-visualization/documents/nvidia-p40-datasheet.pdf}{\small{\texttt{Tesla P40}}}& \small{6.1} \\
	  \href{https://www.nvidia.com/en-us/geforce/10-series/\#1080-ti-spec}{\small{\texttt{NVIDIA GeForce GTX 1080 Ti}}}    &  \small{6.1}  \\
	  \href{https://www.gpuzoo.com/GPU-NVIDIA/Titan\_V.html}{\small{\texttt{NVIDIA Titan V}}} & \small{7.0} \\
	  \href{https://images.nvidia.com/content/technologies/volta/pdf/tesla-volta-v100-datasheet-letter-fnl-web.pdf}{\small{\texttt{NVIDIA Tesla V100-PCIE-16GB}}} & \small{7.0} \\
	  \href{https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/tesla-t4/t4-tensor-core-product-brief.pdf}{\small{\texttt{Tesla T4}}} & \small{7.5} \\
	  \href{https://www.techpowerup.com/gpu-specs/geforce-rtx-2080-ti.c3305}{\small{\texttt{NVIDIA GeForce RTX 2080 Ti}}} & \small{7.5} \\
	  \href{https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf}{\small{\texttt{NVIDIA A100-PCIe-40GB}}} & \small{8.0} \\
	  \href{https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf}{\small{\texttt{NVIDIA A100-SXM4-80GB}}} & \small{8.0} \\
	  \href{https://www.nvidia.com/en-us/design-visualization/a800/}{\small{\texttt{NVIDIA A800 40GB Active}}} & \small{8.0}  \\
        \hline
    \end{tabular}
   \end{center}
   \caption{GPU devices on lp/kp/np/grn (10/31/2025)}
\end{table}
\end{frame}

\begin{frame}
	\frametitle{GPU devices on lp/kp/np/grn (cont.)}
\begin{table}[H]
   \begin{center}
     \begin{tabular}{c|c}
             \texttt{GPU device type} & \texttt{compute capability} \\
      \hline
        \href{https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/rtx-3090-3090ti/}{\small{\texttt{NVIDIA GeForce RTX 3090}}} & \small{8.6} \\
        \href{https://images.nvidia.com/content/Solutions/data-center/a40/nvidia-a40-datasheet.pdf}{\small{\texttt{NVIDIA A40}}}     & \small{8.6} \\
	     \href{https://www.nvidia.com/content/dam/en-zz/Solutions/gtcs22/design-visualization/quadro-product-literature/proviz-nvidia-rtx-a5500-datasheet-2130578-r3-us-web.pdf}{\small{\texttt{NVIDIA RTX A5500}}} & \small{8.6} \\
	     \href{https://www.nvidia.com/en-us/design-visualization/rtx-a6000/}{\small{\texttt{NVIDIA RTX A6000}}} & \small{8.6} \\
	     \href{https://www.nvidia.com/content/dam/en-zz/Solutions/design-visualization/rtx-6000/proviz-print-rtx6000-datasheet-web-2504660.pdf}{\small{\texttt{NVIDIA RTX 6000 Ada Generation}}}&  \small{8.9} \\
	     \href{https://www.nvidia.com/en-us/data-center/l40/}{\small{\texttt{NVIDIA L40}}} & \small{8.9} \\
	     \href{https://resources.nvidia.com/en-us-l40s/l40s-datasheet-28413}{\small{\texttt{NVIDIA L40S}}} & \small{8.9} \\
	     \href{https://www.nvidia.com/en-us/data-center/h100/}{\small{\texttt{NVIDIA H100 [NVL]}}}/\href{https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/h100/PB-11773-001\_v01.pdf}{\small{\texttt{Deep Dive}}} &  \small{9.0} \\
	     \href{https://www.nvidia.com/en-us/data-center/h200/}{\small{\texttt{NVIDIA H200 [NVL]}}} &  \small{9.0} \\
	     \href{https://www.nvidia.com/content/dam/en-zz/Solutions/products/workstations/professional-desktop-gpus/rtx-pro-6000-max-q/workstation-datasheet-blackwell-rtx-pro-6000-max-q-nvidia-3519233.pdf}{\small{\texttt{NVIDIA RTX PRO 6000 Blackwell Max-Q}}} & \small{12.0} \\
        \hline
     \end{tabular}
   \end{center}
   \caption{GPU devices on lp/kp/np/grn (10/31/2025)}
\end{table}
\end{frame}

\subsubsection{Protected env.: redwood cluster}
\begin{frame}
	\frametitle{GPU devices on redwood}
\begin{table}[H]
   \begin{center}
     \begin{tabular}{c|c}
       \multirow{2}{*}{\texttt{GPU device type}} & \texttt{compute} \\
	                                               & \texttt{capability} \\
       \hline
       \href{https://www.nvidia.com/en-us/geforce/10-series/\#1080-ti-spec}{\small{\texttt{NVIDIA GeForce GTX 1080 Ti}}}    &  \small{6.1}  \\
       \href{https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf}{\small{\texttt{NVIDIA A100-SXM4-40GB}}} &  \small{8.0} \\
	     \href{https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf}{\small{\texttt{NVIDIA A100 80GB PCIe}}} &  \small{8.0} \\
	     \href{https://www.nvidia.com/content/dam/en-zz/Solutions/data-center/products/a30-gpu/pdf/a30-datasheet.pdf}{\small{\texttt{NVIDIA A30}}}   &  \small{8.0} \\
	     \href{https://images.nvidia.com/content/Solutions/data-center/a40/nvidia-a40-datasheet.pdf}{\small{\texttt{NVIDIA A40}}} &  \small{8.6} \\
	     \href{https://www.nvidia.com/content/dam/en-zz/Solutions/design-visualization/rtx-6000/proviz-print-rtx6000-datasheet-web-2504660.pdf}{\small{\texttt{NVIDIA RTX 6000 Ada Generation}}}&  \small{8.9} \\
	     \href{https://www.nvidia.com/en-us/data-center/h100/}{\texttt{NVIDIA H100 NVL}}/\href{https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/h100/PB-11773-001\_v01.pdf}{\small{\texttt{Deep Dive}}} &  \small{9.0} \\
	     \href{https://www.nvidia.com/en-us/data-center/h200/}{\small{\texttt{NVIDIA H200 [NVL]}}} &  \small{9.0} \\
        \hline
     \end{tabular}
   \end{center}
   \caption{GPU devices on redwood (10/31/2025)}
\end{table}
\end{frame}

\subsection{How to access the GPUs at CHPC}
\begin{frame}
   \frametitle{Accessing GPUs at CHPC}
      \begin{itemize}
	 \item \href{https://www.chpc.utah.edu/presentations/images-and-pdfs/usinggpuss24f.pdf}{Using GPUs at the CHPC (Presentation by Martin \v{C}uma)}
	 \item \textbf{\textcolor{orange}{Note:}}
            \begin{itemize}
		    \item When a GPU job is launched the job runs with its own \href{https://en.wikipedia.org/wiki/Cgroups}{cgroup}.
		     (limits/accounts for its own resources).
	     \item When a \$USER has several GPU jobs running on the \textbf{same} node,
	 	   the \$USER will land in \textbf{one} cgroup belonging to one of his/her jobs
                   when the \$USER \texttt{ssh}es into the node where these jobs run.\\
	  	   Therefore, the \$USER can \textbf{not} verify the status of his/her other jobs
                   using tools like \texttt{nvidia-smi} directly.
	       \item $\Rightarrow$ use, \\\texttt{srun --pty --overlap --jobid \$JOBID /usr/bin/nvidia-smi} \\
		     where JOBID stands for the jobid.
            \end{itemize}
      \end{itemize}
\end{frame}