11name : model-dev
2+ include :
3+ - path :
4+ - ../monitoring/compose.yaml
5+ - ./overrides.yaml
6+
27services :
3- kepler :
8+ kepler-models :
49 image : quay.io/sustainable_computing_io/kepler:latest
510 ports :
6- - 9100 :9100
11+ - 19100 :9100
712 privileged : true
813 pid : host
914 networks :
10- - kepler-network
15+ - kepler-models- network
1116 - model-server-network
1217 volumes :
1318 - type : bind
@@ -17,19 +22,16 @@ services:
1722 source : /sys
1823 target : /sys
1924 - type : bind
20- source : ./kepler/etc/kepler
25+ source : ./kepler/models/ etc/kepler
2126 target : /etc/kepler
2227
2328 # NOTE: use the models from the local repo
2429 - type : bind
25- source : ./kepler/var/lib/kepler/data/model_weight/
30+ source : ./kepler/common/ var/lib/kepler/data
2631 target : /var/lib/kepler/data
27- - type : bind
28- source : ./kepler/var/lib/kepler/data/cpus.yaml
29- target : /var/lib/kepler/data/cpus.yaml
3032
3133 # NOTE: for estimator - kepler communication
32- - kepler -tmp:/tmp
34+ - estimator -tmp:/tmp
3335
3436 healthcheck :
3537 test : curl -f http://localhost:9100/metrics || exit 1
@@ -66,6 +68,52 @@ services:
6668 -disable-power-meter \
6769 -v="8"
6870
71+ kepler-metal :
72+ image : quay.io/sustainable_computing_io/kepler:latest
73+ ports :
74+ - 19200:9100
75+ privileged : true
76+ pid : host
77+ networks :
78+ - kepler-metal-network
79+ volumes :
80+ - type : bind
81+ source : /proc
82+ target : /proc
83+ - type : bind
84+ source : /sys
85+ target : /sys
86+ - type : bind
87+ source : ./kepler/metal/etc/kepler
88+ target : /etc/kepler
89+
90+ - type : bind
91+ source : ./kepler/common/var/lib/kepler/data
92+ target : /var/lib/kepler/data
93+
94+ healthcheck :
95+ test : curl -f http://localhost:9100/metrics || exit 1
96+ interval : ${HEALTHCHECK_INTERVAL:-50s}
97+ timeout : ${HEALTHCHECK_TIMEOUT:-30s}
98+ retries : ${HEALTHCHECK_RETRIES:-3}
99+ start_period : ${HEALTHCHECK_START_PERIOD:-1m}
100+
101+ cap_add :
102+ - ALL
103+
104+ entrypoint :
105+ - /usr/bin/bash
106+ - -c
107+
108+ command :
109+ - |
110+ echo "starting kepler metal";
111+ set -x;
112+ /usr/bin/kepler \
113+ -address="0.0.0.0:9100" \
114+ -v="8"
115+
116+
69117 estimator :
70118 command : [estimator, -l, debug]
71119 build : &build
@@ -74,37 +122,38 @@ services:
74122
75123 volumes :
76124 - type : bind
77- source : ./kepler/etc/kepler
125+ source : ./kepler/models/ etc/kepler
78126 target : /etc/kepler
79127
80- - kepler -tmp:/tmp
128+ - estimator -tmp:/tmp
81129 - estimator-mnt:/mnt
82130 networks :
83- - kepler-network
131+ - kepler-models- network
84132 - model-server-network
85133
86134 model-server :
87135 ports :
88- - 8100 :8100
136+ - 18100 :8100
89137 command : [model-server, -l, debug]
90138 build :
91139 << : *build
92140 volumes :
93141 - type : bind
94- source : ./kepler/etc/kepler
142+ source : ./kepler/models/ etc/kepler
95143 target : /etc/kepler
96144 - model-server-mnt:/mnt
97145 networks :
98146 - model-server-network
99147
100148volumes :
101149 # for kepler - estimator sock
102- kepler -tmp :
150+ estimator -tmp :
103151
104152 # for downloading models
105153 estimator-mnt :
106154 model-server-mnt :
107155
108156networks :
109- kepler-network :
157+ kepler-models-network :
158+ kepler-metal-network :
110159 model-server-network :
0 commit comments