Skip to content

Commit 5c15b50

Browse files
authored
fix: helm chart bug, gpu info map, add log for node discovery (#145)
* fix: Update RBAC permissions for configmaps/namespaces and split secrets into separate rule * fix: add tsdb values * fix: gpu info * fix: gpu info and add log for node discovery
1 parent fb7056f commit 5c15b50

File tree

9 files changed

+168
-53
lines changed

9 files changed

+168
-53
lines changed

charts/tensor-fusion/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ type: application
1515
# This is the chart version. This version number should be incremented each time you make changes
1616
# to the chart and its templates, including the app version.
1717
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 1.2.14
18+
version: 1.2.16
1919

2020
# This is the version number of the application being deployed. This version number should be
2121
# incremented each time you make changes to the application. Versions are not expected to

charts/tensor-fusion/templates/gpu-public-gpu-info.yaml

Lines changed: 75 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ data:
1414
# - https://en.wikipedia.org/wiki/List_of_Nvidia_graphics_processing_units
1515
# - https://en.wikipedia.org/wiki/GeForce_RTX_30_series#
1616
# - https://en.wikipedia.org/wiki/GeForce_RTX_50_series
17+
# - https://github.com/exo-explore/exo/blob/main/exo/topology/device_capabilities.py
1718
1819
# Field Definition:
1920
# - 'model' is `GPUModel_BoardSlotType` to identify the GPU
@@ -31,26 +32,38 @@ data:
3132
fp16TFlops: 65
3233
3334
# Ampere Architecture Series
34-
- model: A100_SXM4
35-
fullModelName: "NVIDIA A100 80GB SXM"
35+
- model: A100_SXM
36+
fullModelName: "NVIDIA A100-SXM4-80GB"
3637
vendor: NVIDIA
3738
costPerHour: 1.89
3839
fp16TFlops: 312
3940
4041
- model: A100_PCIe
41-
fullModelName: "NVIDIA A100 80GB PCIE"
42+
fullModelName: "NVIDIA A100 80GB PCIe"
4243
vendor: NVIDIA
4344
costPerHour: 1.64
4445
fp16TFlops: 312
4546
46-
- model: A800_SXM4
47-
fullModelName: "NVIDIA A800 80GB SXM"
47+
- model: A100_40G_SXM
48+
fullModelName: "NVIDIA A100-SXM4-40G"
49+
vendor: NVIDIA
50+
costPerHour: 1.4
51+
fp16TFlops: 312
52+
53+
- model: A100_40G_PCIe
54+
fullModelName: "NVIDIA A100 40GB PCIe"
55+
vendor: NVIDIA
56+
costPerHour: 1.2
57+
fp16TFlops: 312
58+
59+
- model: A800_SXM
60+
fullModelName: "NVIDIA A800-SXM4-40G"
4861
vendor: NVIDIA
4962
costPerHour: 1.89
5063
fp16TFlops: 312
5164
5265
- model: A800_PCIe
53-
fullModelName: "NVIDIA A800 80GB PCIE"
66+
fullModelName: "NVIDIA A800 80GB PCIe"
5467
vendor: NVIDIA
5568
costPerHour: 1.64
5669
fp16TFlops: 312
@@ -69,7 +82,7 @@ data:
6982
fp16TFlops: 63
7083
7184
- model: A40
72-
fullModelName: "NVIDIA A40 48GB PCIE"
85+
fullModelName: "NVIDIA A40 48GB PCIe"
7386
vendor: NVIDIA
7487
costPerHour: 0.44
7588
fp16TFlops: 149.7
@@ -81,51 +94,51 @@ data:
8194
fp16TFlops: 38.71
8295
8396
- model: A6000Ada
84-
fullModelName: "NVIDIA A6000 Ada"
97+
fullModelName: "NVIDIA RTX 6000 Ada Generation"
8598
vendor: NVIDIA
8699
costPerHour: 0.95
87100
fp16TFlops: 323
88101
89102
# NVIDIA RTX Series
90103
# RTX 20 Series
91104
- model: RTX2060
92-
fullModelName: "NVIDIA GEFORCE RTX 2060"
105+
fullModelName: "NVIDIA GeForce RTX 2060"
93106
vendor: NVIDIA
94107
costPerHour: 0.05
95108
fp16TFlops: 12.9
96109
97110
- model: RTX2060S
98-
fullModelName: "NVIDIA GEFORCE RTX 2060 SUPER"
111+
fullModelName: "NVIDIA GeForce RTX 2060 SUPER"
99112
vendor: NVIDIA
100113
costPerHour: 0.06
101114
fp16TFlops: 14.4
102115
103116
- model: RTX2070
104-
fullModelName: "NVIDIA GEFORCE RTX 2070"
117+
fullModelName: "NVIDIA GeForce RTX 2070"
105118
vendor: NVIDIA
106119
costPerHour: 0.06
107120
fp16TFlops: 14.93
108121
109122
- model: RTX2070S
110-
fullModelName: "NVIDIA GEFORCE RTX 2070 SUPER"
123+
fullModelName: "NVIDIA GeForce RTX 2070 SUPER"
111124
vendor: NVIDIA
112125
costPerHour: 0.07
113126
fp16TFlops: 18.12
114127
115128
- model: RTX2080
116-
fullModelName: "NVIDIA GEFORCE RTX 2080"
129+
fullModelName: "NVIDIA GeForce RTX 2080"
117130
vendor: NVIDIA
118131
costPerHour: 0.08
119132
fp16TFlops: 20.14
120133
121134
- model: RTX2080S
122-
fullModelName: "NVIDIA GEFORCE RTX 2080 SUPER"
135+
fullModelName: "NVIDIA GeForce RTX 2080 SUPER"
123136
vendor: NVIDIA
124137
costPerHour: 0.09
125138
fp16TFlops: 22.30
126139
127140
- model: RTX2080Ti
128-
fullModelName: "NVIDIA GEFORCE RTX 2080 Ti"
141+
fullModelName: "NVIDIA GeForce RTX 2080 Ti"
129142
vendor: NVIDIA
130143
costPerHour: 0.1
131144
fp16TFlops: 26.9
@@ -138,61 +151,61 @@ data:
138151
139152
# RTX 30 Series
140153
- model: RTX3050
141-
fullModelName: "NVIDIA GEFORCE RTX 3050"
154+
fullModelName: "NVIDIA GeForce RTX 3050"
142155
vendor: NVIDIA
143156
costPerHour: 0.05
144157
fp16TFlops: 18.22
145158
146159
- model: RTX3060
147-
fullModelName: "NVIDIA GEFORCE RTX 3060"
160+
fullModelName: "NVIDIA GeForce RTX 3060"
148161
vendor: NVIDIA
149162
costPerHour: 0.07
150163
fp16TFlops: 26.0
151164
152165
- model: RTX3060Ti
153-
fullModelName: "NVIDIA GEFORCE RTX 3060 Ti"
166+
fullModelName: "NVIDIA GeForce RTX 3060 Ti"
154167
vendor: NVIDIA
155168
costPerHour: 0.08
156169
fp16TFlops: 32.4
157170
158171
- model: RTX3070
159-
fullModelName: "NVIDIA GEFORCE RTX 3070"
172+
fullModelName: "NVIDIA GeForce RTX 3070"
160173
vendor: NVIDIA
161174
costPerHour: 0.08
162175
fp16TFlops: 40.6
163176
164177
- model: RTX3070Ti
165-
fullModelName: "NVIDIA GEFORCE RTX 3070 Ti"
178+
fullModelName: "NVIDIA GeForce RTX 3070 Ti"
166179
vendor: NVIDIA
167180
costPerHour: 0.09
168181
fp16TFlops: 43.6
169182
170183
- model: RTX3080
171-
fullModelName: "NVIDIA GEFORCE RTX 3080 (10 GB)"
184+
fullModelName: "NVIDIA GeForce RTX 3080 (10 GB)"
172185
vendor: NVIDIA
173186
costPerHour: 0.13
174187
fp16TFlops: 59.6
175188
176189
- model: RTX3080_12GB
177-
fullModelName: "NVIDIA GEFORCE RTX 3080 (12 GB)"
190+
fullModelName: "NVIDIA GeForce RTX 3080 (12 GB)"
178191
vendor: NVIDIA
179192
costPerHour: 0.14
180193
fp16TFlops: 61.2
181194
182195
- model: RTX3080Ti
183-
fullModelName: "NVIDIA GEFORCE RTX 3080 Ti"
196+
fullModelName: "NVIDIA GeForce RTX 3080 Ti"
184197
vendor: NVIDIA
185198
costPerHour: 0.14
186199
fp16TFlops: 68.2
187200
188201
- model: RTX3090
189-
fullModelName: "NVIDIA GEFORCE RTX 3090"
202+
fullModelName: "NVIDIA GeForce RTX 3090"
190203
vendor: NVIDIA
191204
costPerHour: 0.2
192205
fp16TFlops: 71.2
193206
194207
- model: RTX3090Ti
195-
fullModelName: "NVIDIA GEFORCE RTX 3090 Ti"
208+
fullModelName: "NVIDIA GeForce RTX 3090 Ti"
196209
vendor: NVIDIA
197210
costPerHour: 0.21
198211
fp16TFlops: 80.0
@@ -218,68 +231,68 @@ data:
218231
219232
# RTX 40 Series
220233
- model: RTX4060
221-
fullModelName: "NVIDIA GEFORCE RTX 4060"
234+
fullModelName: "NVIDIA GeForce RTX 4060"
222235
vendor: NVIDIA
223236
costPerHour: 0.1
224237
fp16TFlops: 44.0
225238
226239
- model: RTX4060Ti
227-
fullModelName: "NVIDIA GEFORCE RTX 4060 Ti"
240+
fullModelName: "NVIDIA GeForce RTX 4060 Ti"
228241
vendor: NVIDIA
229242
costPerHour: 0.11
230243
fp16TFlops: 44.0
231244
232245
- model: RTX4060Ti_16GB
233-
fullModelName: "NVIDIA GEFORCE RTX 4060 Ti 16GB"
246+
fullModelName: "NVIDIA GeForce RTX 4060 Ti 16GB"
234247
vendor: NVIDIA
235248
costPerHour: 0.12
236249
fp16TFlops: 44.0
237250
238251
- model: RTX4070
239-
fullModelName: "NVIDIA GEFORCE RTX 4070"
252+
fullModelName: "NVIDIA GeForce RTX 4070"
240253
vendor: NVIDIA
241254
costPerHour: 0.15
242255
fp16TFlops: 58.0
243256
244257
- model: RTX4070S
245-
fullModelName: "NVIDIA GEFORCE RTX 4070 SUPER"
258+
fullModelName: "NVIDIA GeForce RTX 4070 SUPER"
246259
vendor: NVIDIA
247260
costPerHour: 0.18
248261
fp16TFlops: 60.0
249262
250263
- model: RTX4070Ti
251-
fullModelName: "NVIDIA GEFORCE RTX 4070 Ti"
264+
fullModelName: "NVIDIA GeForce RTX 4070 Ti"
252265
vendor: NVIDIA
253266
costPerHour: 0.2
254267
fp16TFlops: 78.86
255268
256269
- model: RTX4070TiS
257-
fullModelName: "NVIDIA GEFORCE RTX 4070 Ti SUPER"
270+
fullModelName: "NVIDIA GeForce RTX 4070 Ti SUPER"
258271
vendor: NVIDIA
259272
costPerHour: 0.22
260273
fp16TFlops: 80.0
261274
262275
- model: RTX4080
263-
fullModelName: "NVIDIA GEFORCE RTX 4080"
276+
fullModelName: "NVIDIA GeForce RTX 4080"
264277
vendor: NVIDIA
265278
costPerHour: 0.23
266279
fp16TFlops: 97.48
267280
268281
- model: RTX4080S
269-
fullModelName: "NVIDIA GEFORCE RTX 4080 SUPER"
282+
fullModelName: "NVIDIA GeForce RTX 4080 SUPER"
270283
vendor: NVIDIA
271284
costPerHour: 0.25
272285
fp16TFlops: 104.0
273286
274287
- model: RTX4090
275-
fullModelName: "NVIDIA GEFORCE RTX 4090"
288+
fullModelName: "NVIDIA GeForce RTX 4090"
276289
vendor: NVIDIA
277290
costPerHour: 0.3
278291
fp16TFlops: 165.16
279292
280293
# Hopper Architecture Series
281-
- model: H100_SXM4
282-
fullModelName: "NVIDIA H100 SXM4"
294+
- model: H100_SXM
295+
fullModelName: "NVIDIA H100 80GB HBM3"
283296
vendor: NVIDIA
284297
costPerHour: 2.99
285298
fp16TFlops: 989
@@ -289,10 +302,28 @@ data:
289302
vendor: NVIDIA
290303
costPerHour: 2.39
291304
fp16TFlops: 835
305+
306+
- model: H800_SXM
307+
fullModelName: "NVIDIA H800 80GB HBM3"
308+
vendor: NVIDIA
309+
costPerHour: 2.99
310+
fp16TFlops: 989
311+
312+
- model: H800_PCIe
313+
fullModelName: "NVIDIA H800 PCIe"
314+
vendor: NVIDIA
315+
costPerHour: 2.39
316+
fp16TFlops: 835
292317
318+
- model: H100_NVL
319+
fullModelName: "NVIDIA H100 NVL"
320+
vendor: NVIDIA
321+
costPerHour: 2.99
322+
fp16TFlops: 989
323+
293324
# Blackwell Architecture Series
294-
- model: B200_SXM4
295-
fullModelName: "NVIDIA B200 SXM4"
325+
- model: B200_SXM
326+
fullModelName: "NVIDIA B200 SXM"
296327
vendor: NVIDIA
297328
costPerHour: 10.99 # unknown price,on-request
298329
fp16TFlops: 2250
@@ -323,32 +354,32 @@ data:
323354
fp16TFlops: 27.8
324355
325356
- model: RTX4000AdaGen
326-
fullModelName: "NVIDIA RTX 4000 ADA GENERATION"
357+
fullModelName: "NVIDIA RTX 4000 Ada Generation"
327358
vendor: NVIDIA
328359
costPerHour: 0.28
329360
fp16TFlops: 26.7
330361
331362
# NVIDIA GTX Series
332363
- model: GTX1050Ti
333-
fullModelName: "NVIDIA GEFORCE GTX 1050 Ti"
364+
fullModelName: "NVIDIA GeForce GTX 1050 Ti"
334365
vendor: NVIDIA
335366
costPerHour: 0.03
336367
fp16TFlops: 4.0
337368
338369
- model: GTX1070
339-
fullModelName: "NVIDIA GEFORCE GTX 1070"
370+
fullModelName: "NVIDIA GeForce GTX 1070"
340371
vendor: NVIDIA
341372
costPerHour: 0.04
342373
fp16TFlops: 0.101
343374
344375
- model: GTX1080
345-
fullModelName: "NVIDIA GEFORCE GTX 1080"
376+
fullModelName: "NVIDIA GeForce GTX 1080"
346377
vendor: NVIDIA
347378
costPerHour: 0.05
348379
fp16TFlops: 0.138
349380
350381
- model: GTX1080Ti
351-
fullModelName: "NVIDIA GEFORCE GTX 1080 Ti"
382+
fullModelName: "NVIDIA GeForce GTX 1080 Ti"
352383
vendor: NVIDIA
353384
costPerHour: 0.06
354385
fp16TFlops: 0.177

charts/tensor-fusion/templates/greptime-standalone.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ spec:
8080
defaultMode: 420
8181
containers:
8282
- name: standalone
83-
image: docker.io/greptime/greptimedb:latest
83+
image: {{ .Values.greptime.image.repository }}:{{ .Values.greptime.image.tag }}
8484
args:
8585
- standalone
8686
- start
@@ -107,7 +107,7 @@ spec:
107107
- name: postgres
108108
containerPort: 4003
109109
protocol: TCP
110-
resources: {}
110+
resources: {{ .Values.greptime.resources }}
111111
volumeMounts:
112112
- name: datanode
113113
mountPath: /data/greptimedb

charts/tensor-fusion/templates/rbac.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,16 @@ rules:
99
resources:
1010
- configmaps
1111
- namespaces
12+
verbs:
13+
- create
14+
- get
15+
- list
16+
- patch
17+
- update
18+
- watch
19+
- apiGroups:
20+
- ""
21+
resources:
1222
- secrets
1323
verbs:
1424
- get

0 commit comments

Comments
 (0)