Skip to content

Commit f8ad0f8

Browse files
authored
chore: initial Helm chart for Tensor Fusion (#27)
1 parent d712213 commit f8ad0f8

26 files changed

+2654
-1
lines changed

charts/tensor-fusion/.helmignore

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Patterns to ignore when building packages.
2+
# This supports shell glob matching, relative path matching, and
3+
# negation (prefixed with !). Only one pattern per line.
4+
.DS_Store
5+
# Common VCS dirs
6+
.git/
7+
.gitignore
8+
.bzr/
9+
.bzrignore
10+
.hg/
11+
.hgignore
12+
.svn/
13+
# Common backup files
14+
*.swp
15+
*.bak
16+
*.tmp
17+
*.orig
18+
*~
19+
# Various IDEs
20+
.project
21+
.idea/
22+
*.tmproj
23+
.vscode/

charts/tensor-fusion/Chart.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
apiVersion: v2
2+
name: tensor-fusion
3+
description: A Helm chart for Kubernetes
4+
5+
# A chart can be either an 'application' or a 'library' chart.
6+
#
7+
# Application charts are a collection of templates that can be packaged into versioned archives
8+
# to be deployed.
9+
#
10+
# Library charts provide useful utilities or functions for the chart developer. They're included as
11+
# a dependency of application charts to inject those utilities and functions into the rendering
12+
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
13+
type: application
14+
15+
# This is the chart version. This version number should be incremented each time you make changes
16+
# to the chart and its templates, including the app version.
17+
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18+
version: 0.1.0
19+
20+
# This is the version number of the application being deployed. This version number should be
21+
# incremented each time you make changes to the application. Versions are not expected to
22+
# follow Semantic Versioning. They should reflect the version the application is using.
23+
# It is recommended to use it with quotes.
24+
appVersion: "1.16.0"
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
---
2+
apiVersion: apiextensions.k8s.io/v1
3+
kind: CustomResourceDefinition
4+
metadata:
5+
annotations:
6+
controller-gen.kubebuilder.io/version: v0.16.4
7+
name: gpunodeclasses.tensor-fusion.ai
8+
spec:
9+
group: tensor-fusion.ai
10+
names:
11+
kind: GPUNodeClass
12+
listKind: GPUNodeClassList
13+
plural: gpunodeclasses
14+
singular: gpunodeclass
15+
scope: Cluster
16+
versions:
17+
- name: v1
18+
schema:
19+
openAPIV3Schema:
20+
description: GPUNodeClass is the Schema for the gpunodeclasses API.
21+
properties:
22+
apiVersion:
23+
description: |-
24+
APIVersion defines the versioned schema of this representation of an object.
25+
Servers should convert recognized schemas to the latest internal value, and
26+
may reject unrecognized values.
27+
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
28+
type: string
29+
kind:
30+
description: |-
31+
Kind is a string value representing the REST resource this object represents.
32+
Servers may infer this from the endpoint the client submits requests to.
33+
Cannot be updated.
34+
In CamelCase.
35+
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
36+
type: string
37+
metadata:
38+
type: object
39+
spec:
40+
description: GPUNodeClassSpec defines the desired state of GPUNodeClass.
41+
properties:
42+
blockDeviceMappings:
43+
items:
44+
properties:
45+
deviceName:
46+
type: string
47+
ebs:
48+
properties:
49+
deleteOnTermination:
50+
type: boolean
51+
encrypted:
52+
type: boolean
53+
volumeSize:
54+
type: string
55+
volumeType:
56+
type: string
57+
type: object
58+
type: object
59+
type: array
60+
instanceProfile:
61+
type: string
62+
metadataOptions:
63+
properties:
64+
httpEndpoint:
65+
type: string
66+
httpProtocolIPv6:
67+
type: string
68+
httpPutResponseHopLimit:
69+
type: integer
70+
httpTokens:
71+
type: string
72+
type: object
73+
osImageFamily:
74+
type: string
75+
osImageSelectorTerms:
76+
items:
77+
properties:
78+
name:
79+
type: string
80+
owner:
81+
type: string
82+
type: object
83+
type: array
84+
securityGroupSelectorTerms:
85+
items:
86+
properties:
87+
id:
88+
type: string
89+
type: object
90+
type: array
91+
subnetSelectorTerms:
92+
items:
93+
properties:
94+
id:
95+
type: string
96+
type: object
97+
type: array
98+
tags:
99+
additionalProperties:
100+
type: string
101+
type: object
102+
userData:
103+
type: string
104+
type: object
105+
status:
106+
description: GPUNodeClassStatus defines the observed state of GPUNodeClass.
107+
type: object
108+
type: object
109+
served: true
110+
storage: true
111+
subresources:
112+
status: {}
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
---
2+
apiVersion: apiextensions.k8s.io/v1
3+
kind: CustomResourceDefinition
4+
metadata:
5+
annotations:
6+
controller-gen.kubebuilder.io/version: v0.16.4
7+
name: gpunodes.tensor-fusion.ai
8+
spec:
9+
group: tensor-fusion.ai
10+
names:
11+
kind: GPUNode
12+
listKind: GPUNodeList
13+
plural: gpunodes
14+
singular: gpunode
15+
scope: Cluster
16+
versions:
17+
- name: v1
18+
schema:
19+
openAPIV3Schema:
20+
description: GPUNode is the Schema for the gpunodes API.
21+
properties:
22+
apiVersion:
23+
description: |-
24+
APIVersion defines the versioned schema of this representation of an object.
25+
Servers should convert recognized schemas to the latest internal value, and
26+
may reject unrecognized values.
27+
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
28+
type: string
29+
kind:
30+
description: |-
31+
Kind is a string value representing the REST resource this object represents.
32+
Servers may infer this from the endpoint the client submits requests to.
33+
Cannot be updated.
34+
In CamelCase.
35+
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
36+
type: string
37+
metadata:
38+
type: object
39+
spec:
40+
description: GPUNodeSpec defines the desired state of GPUNode.
41+
properties:
42+
gpuCardIndices:
43+
description: |-
44+
if not all GPU cards should be used, specify the GPU card indices, default to empty,
45+
onboard all GPU cards to the pool
46+
items:
47+
type: integer
48+
type: array
49+
manageMode:
50+
type: string
51+
type: object
52+
status:
53+
description: GPUNodeStatus defines the observed state of GPUNode.
54+
properties:
55+
availableTFlops:
56+
format: int32
57+
type: integer
58+
availableVRAM:
59+
type: string
60+
conditions:
61+
items:
62+
description: Condition contains details for one aspect of the current
63+
state of this API Resource.
64+
properties:
65+
lastTransitionTime:
66+
description: |-
67+
lastTransitionTime is the last time the condition transitioned from one status to another.
68+
This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
69+
format: date-time
70+
type: string
71+
message:
72+
description: |-
73+
message is a human readable message indicating details about the transition.
74+
This may be an empty string.
75+
maxLength: 32768
76+
type: string
77+
observedGeneration:
78+
description: |-
79+
observedGeneration represents the .metadata.generation that the condition was set based upon.
80+
For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
81+
with respect to the current state of the instance.
82+
format: int64
83+
minimum: 0
84+
type: integer
85+
reason:
86+
description: |-
87+
reason contains a programmatic identifier indicating the reason for the condition's last transition.
88+
Producers of specific condition types may define expected values and meanings for this field,
89+
and whether the values are considered a guaranteed API.
90+
The value should be a CamelCase string.
91+
This field may not be empty.
92+
maxLength: 1024
93+
minLength: 1
94+
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
95+
type: string
96+
status:
97+
description: status of the condition, one of True, False, Unknown.
98+
enum:
99+
- "True"
100+
- "False"
101+
- Unknown
102+
type: string
103+
type:
104+
description: type of condition in CamelCase or in foo.example.com/CamelCase.
105+
maxLength: 316
106+
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
107+
type: string
108+
required:
109+
- lastTransitionTime
110+
- message
111+
- reason
112+
- status
113+
- type
114+
type: object
115+
type: array
116+
hypervisorStatus:
117+
properties:
118+
hypervisorState:
119+
type: string
120+
hypervisorVersion:
121+
type: string
122+
lastHeartbeatTime:
123+
format: date-time
124+
type: string
125+
type: object
126+
loadedModels:
127+
items:
128+
type: string
129+
type: array
130+
managedGPUResourceIDs:
131+
items:
132+
type: string
133+
type: array
134+
managedGPUs:
135+
format: int32
136+
type: integer
137+
nodeInfo:
138+
properties:
139+
architecture:
140+
type: string
141+
gpuCount:
142+
format: int32
143+
type: integer
144+
gpuDriverVersion:
145+
type: string
146+
gpuModel:
147+
type: string
148+
hostname:
149+
type: string
150+
ip:
151+
type: string
152+
kernalVersion:
153+
type: string
154+
operatingSystem:
155+
type: string
156+
osImage:
157+
type: string
158+
type: object
159+
phase:
160+
description: TensorFusionClusterPhase represents the phase of the
161+
TensorFusionCluster resource.
162+
type: string
163+
totalGPUs:
164+
format: int32
165+
type: integer
166+
totalTFlops:
167+
format: int32
168+
type: integer
169+
totalVRAM:
170+
type: string
171+
type: object
172+
type: object
173+
served: true
174+
storage: true
175+
subresources:
176+
status: {}

0 commit comments

Comments
 (0)