Skip to content

Commit fdb3d19

Browse files
committed
readme
1 parent 6a2150f commit fdb3d19

File tree

5 files changed

+182
-0
lines changed

5 files changed

+182
-0
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# LLM Scheduler API
22

3+
## Prerequisite
4+
Slurm engine was installed. Reference: [Slurm Installation](./docs/slurm_install.md)
35
## Installation
46

57
1. Install dependency:

docs/slurm_install.md

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# Slrum Installation with source code
2+
## Prerequisite
3+
```
4+
apt-get install git gcc make ruby ruby-dev libpam0g-dev libmariadb-dev mariadb-server build-essential libssl-dev -y
5+
apt install libcgroup-dev libpam-cgroup libdbus-1-dev
6+
```
7+
## Steps
8+
1. install mysql
9+
```
10+
apt install mysql-server
11+
```
12+
13+
2. build and install libjwt
14+
dependency: apt install openssl libjansson-dev -y
15+
```
16+
git clone --depth 1 --single-branch -b v1.12.0 https://gitee.com/mirrors_benmcollins/libjwt libjwt
17+
cd libjwt
18+
autoreconf --force --install
19+
./configure --prefix=/usr/local
20+
make -j
21+
make install
22+
```
23+
3. install json-c http-parser
24+
```
25+
git clone --depth 1 --single-branch -b json-c-0.15-20200726 https://github.com/json-c/json-c.git json-c
26+
mkdir json-c-build
27+
cd json-c-build
28+
cmake ../json-c
29+
make
30+
sudo make install
31+
cd ..
32+
git clone --depth 1 --single-branch -b v2.9.4 https://gitee.com/xzgan/http-parser http_parser
33+
cd http_parser
34+
make
35+
sudo make install
36+
```
37+
38+
4. install munge
39+
reference: https://www.cnblogs.com/liwanliangblog/p/9194032.html
40+
```
41+
git clone -b munge-0.5.15 https://gitee.com/xzgan/munge
42+
cd munge
43+
./bootstrap
44+
./configure --prefix=/usr/local/munge --sysconfdir=/usr/local/munge/etc --localstatedir=/usr/local/munge/local --with-runstatedir=/usr/local/var/run --libdir=/usr/local/munge/lib64
45+
make -j 4
46+
make install
47+
export PATH=/usr/local/munge/sbin/:$PATH
48+
useradd -s /sbin/nologin -u 601 munge
49+
sudo -u munge mkdir -p /usr/local/var/run/munge
50+
chmod g-w /usr/local/munge/run/munge
51+
chown -R munge.munge /usr/local/munge/
52+
chmod 700 /usr/local/munge/etc/
53+
chmod 711 /usr/local/munge/local/
54+
chmod 755 /usr/local/var/run/munge/
55+
chmod 711 /usr/local/munge/lib
56+
57+
#create key
58+
sudo -u munge /usr/local/munge/sbin/mungekey --verbose
59+
chmod 600 /usr/local/munge/etc/munge/munge.key
60+
#create service
61+
ln -s /usr/local/munge/lib/systemd/system/munge.service /usr/lib/systemd/system/munge.service
62+
systemctl daemon-reload
63+
systemctl start munge
64+
systemctl status munge
65+
```
66+
67+
5. Install slurm
68+
wget https://download.schedmd.com/slurm/slurm-23.02.6.tar.bz2
69+
Configure Slurm
70+
```
71+
./configure -sysconfdir=/etc/slurm/ --libdir=/usr/local/lib --with-munge=/usr/local/munge --with-jwt=/usr/local/ --with-http-parser=/usr/local/ --enable-slurmrestd
72+
CORES=$(grep processor /proc/cpuinfo | wc -l)
73+
make -j $CORES
74+
make install
75+
```
76+
77+
5. check
78+
/usr/sbin/slurmd
79+
plugin so:
80+
/usr/local/lib/slurm
81+
82+
6. slurm auth
83+
```
84+
mkdir -p /var/spool/slurm
85+
chown slurm: /var/spool/slurm
86+
mkdir -p /var/log/slurm
87+
chown slurm: /var/log/slurm
88+
mkdir /var/spool/slurmctld
89+
chown slurm.slurm /var/spool/slurm/ctld
90+
mkdir -p /var/spool/slurm/ctld/
91+
cp $llm-scheduler-api/slurm/jwt_hs256.key /var/spool/slurm/ctld/
92+
```
93+
94+
7. refer $llm-scheduler-api/slurm/conf and configure /etc/slurm/slurm.conf, /etc/slurm/slurmdbd.conf, /etc/slurm/slurmrestd.conf
95+
6. start slrum daemon on every node
96+
```
97+
slurmdbd
98+
slurmd
99+
slurmctrl
100+
```
101+
102+
7. config ldap for all nodes: https://computingforgeeks.com/how-to-configure-ubuntu-as-ldap-client/
103+
104+
8. start slurm slurmrestd in head node and configure slurm api to .env
105+
```
106+
slurmrestd -f /etc/slurm/slurmrestd.conf -a rest_auth/jwt 0.0.0.0:3000 -vvvv 2>&1 >> slurm.log &
107+
```

slurm/conf/slurm.conf

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
ClusterName=cluster-opencsg
2+
SlurmctldHost=m1
3+
SlurmctldDebug=debug5
4+
SlurmdDebug=debug5
5+
GresTypes=gpu
6+
MpiDefault=none
7+
#ProctrackType=proctrack/cgroup
8+
ProctrackType=proctrack/linuxproc
9+
SlurmctldPidFile=/var/run/slurmctld.pid
10+
SlurmctldPort=6817
11+
SlurmdPidFile=/var/run/slurmd.pid
12+
SlurmdPort=6818
13+
SlurmdSpoolDir=/var/spool/slurm
14+
SlurmUser=slurm
15+
StateSaveLocation=/var/spool/slurm/ctld
16+
SwitchType=switch/none
17+
PreemptType=preempt/qos
18+
PreemptMode=SUSPEND,GANG
19+
SelectType=select/cons_tres
20+
#SelectTypeParameters=CR_CORE_MEMORY
21+
TaskPlugin=task/affinity,task/cgroup
22+
TaskPluginParam=verbose
23+
MinJobAge=172800
24+
TmpFS=/home/users
25+
AccountingStorageEnforce=associations,limits
26+
AccountingStorageHost=m1
27+
AccountingStoragePort=6819
28+
AccountingStorageType=accounting_storage/slurmdbd
29+
AccountingStoreFlags=job_comment
30+
SlurmctldLogFile=/var/log/slurm/slurmctld.log
31+
SlurmdLogFile=/var/log/slurm/slurmd.log
32+
AuthAltTypes=auth/jwt
33+
AuthAltParameters=jwt_key=/var/spool/slurm/ctld/jwt_hs256.key
34+
NodeName=m1 RealMemory=2048 CPUs=4 Boards=1 Sockets=1 CoresPerSocket=2 ThreadsPerCore=2 State=UNKNOWN
35+
NodeName=m2 CPUs=4 Boards=1 SocketsPerBoard=1 CoresPerSocket=2 Weight=2 ThreadsPerCore=2 RealMemory=15084 State=UNKNOWN
36+
NodeName=m3 CPUs=16 Boards=1 SocketsPerBoard=1 CoresPerSocket=8 Weight=3 ThreadsPerCore=2 RealMemory=15081 State=UNKNOWN
37+
PartitionName=compute Nodes=ALL Default=NO MaxTime=INFINITE State=UP AllowGroups=test
38+
PartitionName=normal Nodes=ALL Default=No MaxTime=1800 State=UP PriorityTier=20
39+
PartitionName=low Nodes=ALL Default=Yes MaxTime=1800 State=UP PriorityTier=30

slurm/conf/slurmdbd.conf

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
AuthType=auth/munge
2+
AuthInfo=socket=/usr/local/var/run/munge/munge.socket.2
3+
AuthAltTypes=auth/jwt
4+
AuthAltParameters=jwt_key=/var/spool/slurm/ctld/jwt_hs256.key
5+
6+
# slurmDBD info
7+
DbdAddr=localhost
8+
#DbdHost=localhost
9+
DbdHost=m1
10+
#DbdPort=7031
11+
SlurmUser=slurm
12+
#MessageTimeout=300
13+
DebugLevel=debug5
14+
#DefaultQOS=normal,standby
15+
DefaultQOS=default
16+
17+
LogFile=/var/log/slurm/slurmdbd.log
18+
PidFile=/var/run/slurmdbd.pid
19+
#PluginDir=/usr/lib/slurm
20+
#PrivateData=accounts,users,usage,jobs
21+
#TrackWCKey=yes
22+
#
23+
# Database info
24+
StorageType=accounting_storage/mysql
25+
StorageHost=localhost
26+
#StorageHost=localhost
27+
28+
StoragePort=3306
29+
#StoragePort=1234
30+
StoragePass=123
31+
StorageUser=slurm
32+
StorageLoc=slurm_acct_db

slurm/conf/slurmrestd.conf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
include /etc/slurm/slurm.conf
2+
AuthType=auth/jwt

0 commit comments

Comments
 (0)