Skip to content

Commit a0486e4

Browse files
authored
feat: add built-in component manifest, support json monitoring format and dynamic tags (#261)
* fix: add qos to annotation, optimize helm and logs, remove sh cmd manipulate * fix: add built-in component manifest, simplify tf cluster * fix: support json and otel metrics protocol, default precision to ms
1 parent 6462881 commit a0486e4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+1827
-569
lines changed

.vscode/launch.json

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
"program": "${workspaceFolder}/cmd/main.go",
7373
},
7474
{
75-
"name": "Run Unit Tests",
75+
"name": "Run Unit Tests - Controller",
7676
"type": "go",
7777
"request": "launch",
7878
"mode": "test",
@@ -82,6 +82,18 @@
8282
},
8383
"program": "${workspaceFolder}/internal/controller",
8484
"console": "integratedTerminal"
85+
},
86+
{
87+
"name": "Run Unit Tests - Webhook",
88+
"type": "go",
89+
"request": "launch",
90+
"mode": "test",
91+
"env": {
92+
"DEBUG_MODE": "true",
93+
"GO_TESTING": "true"
94+
},
95+
"program": "${workspaceFolder}/internal/webhook/v1",
96+
"console": "integratedTerminal"
8597
}
8698
]
8799
}

.vscode/settings.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@
2525
"cloudnative",
2626
"cloudprovider",
2727
"clusterissuers",
28+
"clusterrole",
29+
"clusterrolebinding",
2830
"componentconfig",
31+
"configmap",
2932
"configz",
3033
"controllerutil",
3134
"corev",
@@ -52,6 +55,7 @@
5255
"gosec",
5356
"gpuallocator",
5457
"gpunode",
58+
"gpunodeclaims",
5559
"gpunodeclasses",
5660
"gpunodes",
5761
"gpupool",
@@ -66,6 +70,7 @@
6670
"healthz",
6771
"iface",
6872
"imageutils",
73+
"influxdata",
6974
"jsonpatch",
7075
"karpenter",
7176
"klog",
@@ -76,10 +81,14 @@
7681
"kubescheduler",
7782
"kubeschedulerconfig",
7883
"kustomization",
84+
"libcuda",
85+
"libnvidia",
86+
"lineprotocol",
7987
"metav",
8088
"metricsserver",
8189
"Milli",
8290
"mito",
91+
"mutatingwebhookconfiguration",
8392
"ngpu",
8493
"nindent",
8594
"noderesources",
@@ -99,6 +108,7 @@
99108
"readyz",
100109
"replicaset",
101110
"replicasets",
111+
"rolebinding",
102112
"runbook",
103113
"runpod",
104114
"samber",
@@ -110,6 +120,7 @@
110120
"serviceaccount",
111121
"shirou",
112122
"shortuuid",
123+
"statefulset",
113124
"statefulsets",
114125
"strategicpatch",
115126
"strategicpatches",

api/v1/gpupool_types.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,21 +296,38 @@ type ComponentConfig struct {
296296
Client *ClientConfig `json:"client,omitempty"`
297297
}
298298
type NodeDiscoveryConfig struct {
299+
Image string `json:"image,omitempty"`
300+
299301
// +optional
300302
PodTemplate *runtime.RawExtension `json:"podTemplate,omitempty"`
301303
}
302304

303305
type HypervisorConfig struct {
306+
Image string `json:"image,omitempty"`
307+
308+
VectorImage string `json:"vectorImage,omitempty"`
309+
310+
// +kubebuilder:default=8000
311+
// +kubebuilder:validation:Minimum=0
312+
// +kubebuilder:validation:Maximum=65535
313+
// +optional
314+
PortNumber *int32 `json:"portNumber,omitempty"`
315+
304316
// +optional
305317
PodTemplate *runtime.RawExtension `json:"podTemplate,omitempty"`
306318
}
307319

308320
type WorkerConfig struct {
321+
Image string `json:"image,omitempty"`
309322
// +optional
310323
PodTemplate *runtime.RawExtension `json:"podTemplate,omitempty"`
311324
}
312325

313326
type ClientConfig struct {
327+
RemoteModeImage string `json:"remoteModeImage,omitempty"`
328+
329+
EmbeddedModeImage string `json:"embeddedModeImage,omitempty"`
330+
314331
OperatorEndpoint string `json:"operatorEndpoint,omitempty"`
315332

316333
// +optional

api/v1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

charts/tensor-fusion/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ type: application
1515
# This is the chart version. This version number should be incremented each time you make changes
1616
# to the chart and its templates, including the app version.
1717
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 1.4.4
18+
version: 1.4.5
1919

2020
# This is the version number of the application being deployed. This version number should be
2121
# incremented each time you make changes to the application. Versions are not expected to

charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,8 @@ spec:
191191
properties:
192192
client:
193193
properties:
194+
embeddedModeImage:
195+
type: string
194196
operatorEndpoint:
195197
type: string
196198
patchEmbeddedWorkerToPod:
@@ -205,21 +207,37 @@ spec:
205207
patchToPod:
206208
type: object
207209
x-kubernetes-preserve-unknown-fields: true
210+
remoteModeImage:
211+
type: string
208212
type: object
209213
hypervisor:
210214
properties:
215+
image:
216+
type: string
211217
podTemplate:
212218
type: object
213219
x-kubernetes-preserve-unknown-fields: true
220+
portNumber:
221+
default: 8000
222+
format: int32
223+
maximum: 65535
224+
minimum: 0
225+
type: integer
226+
vectorImage:
227+
type: string
214228
type: object
215229
nodeDiscovery:
216230
properties:
231+
image:
232+
type: string
217233
podTemplate:
218234
type: object
219235
x-kubernetes-preserve-unknown-fields: true
220236
type: object
221237
worker:
222238
properties:
239+
image:
240+
type: string
223241
podTemplate:
224242
type: object
225243
x-kubernetes-preserve-unknown-fields: true

charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,8 @@ spec:
256256
properties:
257257
client:
258258
properties:
259+
embeddedModeImage:
260+
type: string
259261
operatorEndpoint:
260262
type: string
261263
patchEmbeddedWorkerToPod:
@@ -270,21 +272,37 @@ spec:
270272
patchToPod:
271273
type: object
272274
x-kubernetes-preserve-unknown-fields: true
275+
remoteModeImage:
276+
type: string
273277
type: object
274278
hypervisor:
275279
properties:
280+
image:
281+
type: string
276282
podTemplate:
277283
type: object
278284
x-kubernetes-preserve-unknown-fields: true
285+
portNumber:
286+
default: 8000
287+
format: int32
288+
maximum: 65535
289+
minimum: 0
290+
type: integer
291+
vectorImage:
292+
type: string
279293
type: object
280294
nodeDiscovery:
281295
properties:
296+
image:
297+
type: string
282298
podTemplate:
283299
type: object
284300
x-kubernetes-preserve-unknown-fields: true
285301
type: object
286302
worker:
287303
properties:
304+
image:
305+
type: string
288306
podTemplate:
289307
type: object
290308
x-kubernetes-preserve-unknown-fields: true

charts/tensor-fusion/templates/alert-manager.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
apiVersion: v1
33
kind: ConfigMap
44
metadata:
5-
name: alert-manager-config
5+
name: {{ .Release.Name }}-alert-manager-config
66
namespace: {{ include "tensor-fusion.namespace" . }}
77
labels:
88
tensor-fusion.ai/component: alert-manager
@@ -14,7 +14,7 @@ data:
1414
apiVersion: apps/v1
1515
kind: StatefulSet
1616
metadata:
17-
name: alert-manager
17+
name: {{ .Release.Name }}-alert-manager
1818
namespace: {{ include "tensor-fusion.namespace" . }}
1919
labels:
2020
tensor-fusion.ai/component: alert-manager
@@ -35,7 +35,7 @@ spec:
3535
volumes:
3636
- name: config
3737
configMap:
38-
name: alert-manager-config
38+
name: {{ .Release.Name }}-alert-manager-config
3939
defaultMode: 420
4040
- name: storage
4141
hostPath:

charts/tensor-fusion/templates/controller-deployment.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ spec:
6464
- name: TSDB_MYSQL_PASSWORD
6565
valueFrom:
6666
secretKeyRef:
67-
name: tf-greptimedb-secret
67+
name: {{ .Release.Name }}-greptimedb-secret
6868
key: password
6969
{{- else }}
7070
- name: TSDB_MYSQL_USER
@@ -113,7 +113,7 @@ spec:
113113
- name: TSDB_MYSQL_PASSWORD
114114
valueFrom:
115115
secretKeyRef:
116-
name: tf-greptimedb-secret
116+
name: {{ .Release.Name }}-greptimedb-secret
117117
key: password
118118
{{- else }}
119119
- name: TSDB_MYSQL_USER
@@ -157,15 +157,15 @@ spec:
157157
path: tls.key
158158
- name: vector-config
159159
configMap:
160-
name: {{ include "tensor-fusion.fullname" . }}-vector-config
160+
name: tensor-fusion-sys-vector-config
161161
defaultMode: 420
162162
- name: cloud-vendor-credentials
163163
secret:
164164
secretName: tf-cloud-vendor-credentials
165165
defaultMode: 420
166166
- configMap:
167167
defaultMode: 420
168-
name: {{ .Release.Name }}-public-gpu-info
168+
name: tensor-fusion-sys-public-gpu-info
169169
name: gpu-info
170170
- configMap:
171171
defaultMode: 420

charts/tensor-fusion/templates/gpu-public-gpu-info.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
apiVersion: v1
22
kind: ConfigMap
33
metadata:
4-
name: {{ .Release.Name }}-public-gpu-info
4+
name: tensor-fusion-sys-public-gpu-info
55
namespace: {{ include "tensor-fusion.namespace" . }}
66
labels:
77
tensor-fusion.ai/component: gpu-info

0 commit comments

Comments
 (0)