-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathchpc.tex
More file actions
98 lines (92 loc) · 7.02 KB
/
chpc.tex
File metadata and controls
98 lines (92 loc) · 7.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
\section{Use of GPUs at the CHPC}
\subsection{GPUs available at CHPC}
\subsubsection{Regular env.: lp/kp/np/grn clusters}
\begin{frame}
\frametitle{GPU devices on lp/kp/np/grn}
\begin{table}[H]
\begin{center}
\begin{tabular}{c|c}
\texttt{GPU device type} & \texttt{compute capability} \\
\hline
\href{https://www.nvidia.com/en-us/geforce/graphics-cards/geforce-gtx-titan-x/specifications/}{\small{\texttt{NVIDIA GeForce GTX TITAN X}}} & \small{5.2} \\
\href{https://images.nvidia.com/content/tesla/pdf/nvidia-tesla-p100-PCIe-datasheet.pdf}{\small{\texttt{Tesla P100-PCIE-16GB}}} & \small{6.0} \\
\href{https://www.nvidia.com/content/dam/en-zz/Solutions/design-visualization/documents/nvidia-p40-datasheet.pdf}{\small{\texttt{Tesla P40}}}& \small{6.1} \\
\href{https://www.nvidia.com/en-us/geforce/10-series/\#1080-ti-spec}{\small{\texttt{NVIDIA GeForce GTX 1080 Ti}}} & \small{6.1} \\
\href{https://www.gpuzoo.com/GPU-NVIDIA/Titan\_V.html}{\small{\texttt{NVIDIA Titan V}}} & \small{7.0} \\
\href{https://images.nvidia.com/content/technologies/volta/pdf/tesla-volta-v100-datasheet-letter-fnl-web.pdf}{\small{\texttt{NVIDIA Tesla V100-PCIE-16GB}}} & \small{7.0} \\
\href{https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/tesla-t4/t4-tensor-core-product-brief.pdf}{\small{\texttt{Tesla T4}}} & \small{7.5} \\
\href{https://www.techpowerup.com/gpu-specs/geforce-rtx-2080-ti.c3305}{\small{\texttt{NVIDIA GeForce RTX 2080 Ti}}} & \small{7.5} \\
\href{https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf}{\small{\texttt{NVIDIA A100-PCIe-40GB}}} & \small{8.0} \\
\href{https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf}{\small{\texttt{NVIDIA A100-SXM4-80GB}}} & \small{8.0} \\
\href{https://www.nvidia.com/en-us/design-visualization/a800/}{\small{\texttt{NVIDIA A800 40GB Active}}} & \small{8.0} \\
\hline
\end{tabular}
\end{center}
\caption{GPU devices on lp/kp/np/grn (10/31/2025)}
\end{table}
\end{frame}
\begin{frame}
\frametitle{GPU devices on lp/kp/np/grn (cont.)}
\begin{table}[H]
\begin{center}
\begin{tabular}{c|c}
\texttt{GPU device type} & \texttt{compute capability} \\
\hline
\href{https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/rtx-3090-3090ti/}{\small{\texttt{NVIDIA GeForce RTX 3090}}} & \small{8.6} \\
\href{https://images.nvidia.com/content/Solutions/data-center/a40/nvidia-a40-datasheet.pdf}{\small{\texttt{NVIDIA A40}}} & \small{8.6} \\
\href{https://www.nvidia.com/content/dam/en-zz/Solutions/gtcs22/design-visualization/quadro-product-literature/proviz-nvidia-rtx-a5500-datasheet-2130578-r3-us-web.pdf}{\small{\texttt{NVIDIA RTX A5500}}} & \small{8.6} \\
\href{https://www.nvidia.com/en-us/design-visualization/rtx-a6000/}{\small{\texttt{NVIDIA RTX A6000}}} & \small{8.6} \\
\href{https://www.nvidia.com/content/dam/en-zz/Solutions/design-visualization/rtx-6000/proviz-print-rtx6000-datasheet-web-2504660.pdf}{\small{\texttt{NVIDIA RTX 6000 Ada Generation}}}& \small{8.9} \\
\href{https://www.nvidia.com/en-us/data-center/l40/}{\small{\texttt{NVIDIA L40}}} & \small{8.9} \\
\href{https://resources.nvidia.com/en-us-l40s/l40s-datasheet-28413}{\small{\texttt{NVIDIA L40S}}} & \small{8.9} \\
\href{https://www.nvidia.com/en-us/data-center/h100/}{\small{\texttt{NVIDIA H100 [NVL]}}}/\href{https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/h100/PB-11773-001\_v01.pdf}{\small{\texttt{Deep Dive}}} & \small{9.0} \\
\href{https://www.nvidia.com/en-us/data-center/h200/}{\small{\texttt{NVIDIA H200 [NVL]}}} & \small{9.0} \\
\href{https://www.nvidia.com/content/dam/en-zz/Solutions/products/workstations/professional-desktop-gpus/rtx-pro-6000-max-q/workstation-datasheet-blackwell-rtx-pro-6000-max-q-nvidia-3519233.pdf}{\small{\texttt{NVIDIA RTX PRO 6000 Blackwell Max-Q}}} & \small{12.0} \\
\hline
\end{tabular}
\end{center}
\caption{GPU devices on lp/kp/np/grn (10/31/2025)}
\end{table}
\end{frame}
\subsubsection{Protected env.: redwood cluster}
\begin{frame}
\frametitle{GPU devices on redwood}
\begin{table}[H]
\begin{center}
\begin{tabular}{c|c}
\multirow{2}{*}{\texttt{GPU device type}} & \texttt{compute} \\
& \texttt{capability} \\
\hline
\href{https://www.nvidia.com/en-us/geforce/10-series/\#1080-ti-spec}{\small{\texttt{NVIDIA GeForce GTX 1080 Ti}}} & \small{6.1} \\
\href{https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf}{\small{\texttt{NVIDIA A100-SXM4-40GB}}} & \small{8.0} \\
\href{https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf}{\small{\texttt{NVIDIA A100 80GB PCIe}}} & \small{8.0} \\
\href{https://www.nvidia.com/content/dam/en-zz/Solutions/data-center/products/a30-gpu/pdf/a30-datasheet.pdf}{\small{\texttt{NVIDIA A30}}} & \small{8.0} \\
\href{https://images.nvidia.com/content/Solutions/data-center/a40/nvidia-a40-datasheet.pdf}{\small{\texttt{NVIDIA A40}}} & \small{8.6} \\
\href{https://www.nvidia.com/content/dam/en-zz/Solutions/design-visualization/rtx-6000/proviz-print-rtx6000-datasheet-web-2504660.pdf}{\small{\texttt{NVIDIA RTX 6000 Ada Generation}}}& \small{8.9} \\
\href{https://www.nvidia.com/en-us/data-center/h100/}{\texttt{NVIDIA H100 NVL}}/\href{https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/h100/PB-11773-001\_v01.pdf}{\small{\texttt{Deep Dive}}} & \small{9.0} \\
\href{https://www.nvidia.com/en-us/data-center/h200/}{\small{\texttt{NVIDIA H200 [NVL]}}} & \small{9.0} \\
\hline
\end{tabular}
\end{center}
\caption{GPU devices on redwood (10/31/2025)}
\end{table}
\end{frame}
\subsection{How to access the GPUs at CHPC}
\begin{frame}
\frametitle{Accessing GPUs at CHPC}
\begin{itemize}
\item \href{https://www.chpc.utah.edu/presentations/images-and-pdfs/usinggpuss24f.pdf}{Using GPUs at the CHPC (Presentation by Martin \v{C}uma)}
\item \textbf{\textcolor{orange}{Note:}}
\begin{itemize}
\item When a GPU job is launched the job runs with its own \href{https://en.wikipedia.org/wiki/Cgroups}{cgroup}.
(limits/accounts for its own resources).
\item When a \$USER has several GPU jobs running on the \textbf{same} node,
the \$USER will land in \textbf{one} cgroup belonging to one of his/her jobs
when the \$USER \texttt{ssh}es into the node where these jobs run.\\
Therefore, the \$USER can \textbf{not} verify the status of his/her other jobs
using tools like \texttt{nvidia-smi} directly.
\item $\Rightarrow$ use, \\\texttt{srun --pty --overlap --jobid \$JOBID /usr/bin/nvidia-smi} \\
where JOBID stands for the jobid.
\end{itemize}
\end{itemize}
\end{frame}