Skip to content

Commit dc3cc4a

Browse files
author
Sunil Thaha
committed
chore(compose): new kepler-metal to compare models against
Signed-off-by: Sunil Thaha <[email protected]>
1 parent 927aed4 commit dc3cc4a

File tree

16 files changed

+978
-780
lines changed

16 files changed

+978
-780
lines changed

manifests/compose/dev/compose.yaml

Lines changed: 58 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@ include:
55
- ./overrides.yaml
66

77
services:
8-
kepler:
8+
kepler-models:
99
image: quay.io/sustainable_computing_io/kepler:latest
1010
ports:
1111
- 19100:9100
1212
privileged: true
1313
pid: host
1414
networks:
15-
- kepler-network
15+
- kepler-models-network
1616
- model-server-network
1717
volumes:
1818
- type: bind
@@ -22,19 +22,16 @@ services:
2222
source: /sys
2323
target: /sys
2424
- type: bind
25-
source: ./kepler/etc/kepler
25+
source: ./kepler/models/etc/kepler
2626
target: /etc/kepler
2727

2828
# NOTE: use the models from the local repo
2929
- type: bind
30-
source: ./kepler/var/lib/kepler/data/model_weight/
30+
source: ./kepler/common/var/lib/kepler/data
3131
target: /var/lib/kepler/data
32-
- type: bind
33-
source: ./kepler/var/lib/kepler/data/cpus.yaml
34-
target: /var/lib/kepler/data/cpus.yaml
3532

3633
# NOTE: for estimator - kepler communication
37-
- kepler-tmp:/tmp
34+
- estimator-tmp:/tmp
3835

3936
healthcheck:
4037
test: curl -f http://localhost:9100/metrics || exit 1
@@ -71,6 +68,52 @@ services:
7168
-disable-power-meter \
7269
-v="8"
7370
71+
kepler-metal:
72+
image: quay.io/sustainable_computing_io/kepler:latest
73+
ports:
74+
- 19200:9100
75+
privileged: true
76+
pid: host
77+
networks:
78+
- kepler-metal-network
79+
volumes:
80+
- type: bind
81+
source: /proc
82+
target: /proc
83+
- type: bind
84+
source: /sys
85+
target: /sys
86+
- type: bind
87+
source: ./kepler/metal/etc/kepler
88+
target: /etc/kepler
89+
90+
- type: bind
91+
source: ./kepler/common/var/lib/kepler/data
92+
target: /var/lib/kepler/data
93+
94+
healthcheck:
95+
test: curl -f http://localhost:9100/metrics || exit 1
96+
interval: ${HEALTHCHECK_INTERVAL:-50s}
97+
timeout: ${HEALTHCHECK_TIMEOUT:-30s}
98+
retries: ${HEALTHCHECK_RETRIES:-3}
99+
start_period: ${HEALTHCHECK_START_PERIOD:-1m}
100+
101+
cap_add:
102+
- ALL
103+
104+
entrypoint:
105+
- /usr/bin/bash
106+
- -c
107+
108+
command:
109+
- |
110+
echo "starting kepler metal";
111+
set -x;
112+
/usr/bin/kepler \
113+
-address="0.0.0.0:9100" \
114+
-v="8"
115+
116+
74117
estimator:
75118
command: [estimator, -l, debug]
76119
build: &build
@@ -79,13 +122,13 @@ services:
79122

80123
volumes:
81124
- type: bind
82-
source: ./kepler/etc/kepler
125+
source: ./kepler/models/etc/kepler
83126
target: /etc/kepler
84127

85-
- kepler-tmp:/tmp
128+
- estimator-tmp:/tmp
86129
- estimator-mnt:/mnt
87130
networks:
88-
- kepler-network
131+
- kepler-models-network
89132
- model-server-network
90133

91134
model-server:
@@ -96,20 +139,21 @@ services:
96139
<<: *build
97140
volumes:
98141
- type: bind
99-
source: ./kepler/etc/kepler
142+
source: ./kepler/models/etc/kepler
100143
target: /etc/kepler
101144
- model-server-mnt:/mnt
102145
networks:
103146
- model-server-network
104147

105148
volumes:
106149
# for kepler - estimator sock
107-
kepler-tmp:
150+
estimator-tmp:
108151

109152
# for downloading models
110153
estimator-mnt:
111154
model-server-mnt:
112155

113156
networks:
114-
kepler-network:
157+
kepler-models-network:
158+
kepler-metal-network:
115159
model-server-network:

0 commit comments

Comments
 (0)