Skip to content

Commit fb3399a

Browse files
Generic worker (#802)
* feat: 🎸 use primitive parameters, add release, add tests * style: 💄 fix style * feat: 🎸 use primitive parameters, add release, add tests * style: 💄 fix style * feat: 🎸 use primitive parameters, add release, add tests * style: 💄 fix style * style: 💄 fix style * feat: 🎸 process any type of jobs with the same worker A worker can still be restricted to one or various types of jobs. See the charts for an example. * refactor: 💡 raise an error, instead of using an assert * refactor: 💡 add debug * fix: 🐛 fix check * test: 💍 fix test * test: 💍 don't persist the mongodb volume between tests * Apply suggestions from code review Co-authored-by: Andrea Francis Soria Jimenez <[email protected]> * refactor: 💡 remove is True/is False (more pythonic) * refactor: 💡 remove unneeded line --------- Co-authored-by: Andrea Francis Soria Jimenez <[email protected]>
1 parent 69ce0fa commit fb3399a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+556
-266
lines changed

chart/env/dev.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,3 +239,13 @@ sizes:
239239
limits:
240240
cpu: 1
241241
memory: "4Gi"
242+
243+
genericWorker:
244+
replicas: 1
245+
resources:
246+
requests:
247+
cpu: 100m
248+
memory: "512Mi"
249+
limits:
250+
cpu: 1
251+
memory: "4Gi"

chart/env/prod.yaml

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ firstRows:
231231

232232
nodeSelector:
233233
role-datasets-server: "true"
234-
replicas: 90
234+
replicas: 80
235235
resources:
236236
requests:
237237
cpu: 1
@@ -253,7 +253,7 @@ parquetAndDatasetInfo:
253253
maxJobsPerNamespace: 4
254254
nodeSelector:
255255
role-datasets-server: "true"
256-
replicas: 30
256+
replicas: 20
257257
resources:
258258
requests:
259259
cpu: 1
@@ -309,3 +309,18 @@ sizes:
309309
limits:
310310
cpu: 2
311311
memory: "1Gi"
312+
313+
genericWorker:
314+
queue:
315+
# Maximum number of jobs running at the same time for the same namespace
316+
maxJobsPerNamespace: 4
317+
nodeSelector:
318+
role-datasets-server: "true"
319+
replicas: 20
320+
resources:
321+
requests:
322+
cpu: 1
323+
memory: "8Gi"
324+
limits:
325+
cpu: 2
326+
memory: "30Gi"

chart/templates/_helpers.tpl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,11 @@ app.kubernetes.io/component: "{{ include "name" . }}-worker-dataset-info"
166166
app.kubernetes.io/component: "{{ include "name" . }}-worker-sizes"
167167
{{- end -}}
168168

169+
{{- define "labels.genericWorker" -}}
170+
{{ include "datasetServer.labels" . }}
171+
app.kubernetes.io/component: "{{ include "name" . }}-worker-generic"
172+
{{- end -}}
173+
169174
{{/*
170175
Return the api ingress anotation
171176
*/}}

chart/templates/worker/config-names/_container.tpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
image: {{ include "services.worker.image" . }}
77
imagePullPolicy: {{ .Values.images.pullPolicy }}
88
env:
9-
- name: WORKER_ENDPOINT
9+
- name: WORKER_ONLY_JOB_TYPES
1010
value: "/config-names"
1111
# ^ hard-coded
1212
{{ include "envCache" . | nindent 2 }}

chart/templates/worker/dataset-info/_container.tpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
image: {{ include "services.worker.image" . }}
77
imagePullPolicy: {{ .Values.images.pullPolicy }}
88
env:
9-
- name: WORKER_ENDPOINT
9+
- name: WORKER_ONLY_JOB_TYPES
1010
value: "/dataset-info"
1111
# ^ hard-coded
1212
{{ include "envCache" . | nindent 2 }}

chart/templates/worker/first-rows/_container.tpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
image: {{ include "services.worker.image" . }}
77
imagePullPolicy: {{ .Values.images.pullPolicy }}
88
env:
9-
- name: WORKER_ENDPOINT
9+
- name: WORKER_ONLY_JOB_TYPES
1010
value: "/first-rows"
1111
# ^ hard-coded
1212
{{ include "envAssets" . | nindent 2 }}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# Copyright 2022 The HuggingFace Authors.
3+
4+
{{- define "containerWorkerGeneric" -}}
5+
- name: "{{ include "name" . }}-worker-generic"
6+
image: {{ include "services.worker.image" . }}
7+
imagePullPolicy: {{ .Values.images.pullPolicy }}
8+
env:
9+
{{ include "envAssets" . | nindent 2 }}
10+
{{ include "envCache" . | nindent 2 }}
11+
{{ include "envQueue" . | nindent 2 }}
12+
{{ include "envCommon" . | nindent 2 }}
13+
{{ include "envWorker" . | nindent 2 }}
14+
{{ include "envDatasetsBased" . | nindent 2 }}
15+
- name: DATASETS_BASED_HF_DATASETS_CACHE
16+
value: {{ printf "%s/generic/datasets" .Values.cacheDirectory | quote }}
17+
- name: QUEUE_MAX_JOBS_PER_NAMESPACE
18+
# value: {{ .Values.queue.maxJobsPerNamespace | quote }}
19+
# overridden
20+
value: {{ .Values.genericWorker.queue.maxJobsPerNamespace | quote }}
21+
- name: FIRST_ROWS_MAX_BYTES
22+
value: {{ .Values.firstRows.maxBytes | quote }}
23+
- name: FIRST_ROWS_MAX_NUMBER
24+
value: {{ .Values.firstRows.maxNumber | quote }}
25+
- name: FIRST_ROWS_MIN_CELL_BYTES
26+
value: {{ .Values.firstRows.minCellBytes | quote }}
27+
- name: FIRST_ROWS_MIN_NUMBER
28+
value: {{ .Values.firstRows.minNumber| quote }}
29+
- name: FIRST_ROWS_COLUMNS_MAX_NUMBER
30+
value: {{ .Values.firstRows.columnsMaxNumber| quote }}
31+
- name: PARQUET_AND_DATASET_INFO_BLOCKED_DATASETS
32+
value: {{ .Values.parquetAndDatasetInfo.blockedDatasets | quote }}
33+
- name: PARQUET_AND_DATASET_INFO_COMMIT_MESSAGE
34+
value: {{ .Values.parquetAndDatasetInfo.commitMessage | quote }}
35+
- name: PARQUET_AND_DATASET_INFO_COMMITTER_HF_TOKEN
36+
{{- if .Values.secrets.userHfToken.fromSecret }}
37+
valueFrom:
38+
secretKeyRef:
39+
name: {{ .Values.secrets.userHfToken.secretName | quote }}
40+
key: HF_TOKEN
41+
optional: false
42+
{{- else }}
43+
value: {{ .Values.secrets.userHfToken.value }}
44+
{{- end }}
45+
- name: PARQUET_AND_DATASET_INFO_MAX_DATASET_SIZE
46+
value: {{ .Values.parquetAndDatasetInfo.maxDatasetSize | quote }}
47+
- name: PARQUET_AND_DATASET_INFO_SOURCE_REVISION
48+
value: {{ .Values.parquetAndDatasetInfo.sourceRevision | quote }}
49+
- name: PARQUET_AND_DATASET_INFO_SUPPORTED_DATASETS
50+
value: {{ .Values.parquetAndDatasetInfo.supportedDatasets | quote }}
51+
- name: PARQUET_AND_DATASET_INFO_TARGET_REVISION
52+
value: {{ .Values.parquetAndDatasetInfo.targetRevision | quote }}
53+
- name: PARQUET_AND_DATASET_INFO_URL_TEMPLATE
54+
value: {{ .Values.parquetAndDatasetInfo.urlTemplate | quote }}
55+
volumeMounts:
56+
{{ include "volumeMountAssetsRW" . | nindent 2 }}
57+
{{ include "volumeMountCache" . | nindent 2 }}
58+
securityContext:
59+
allowPrivilegeEscalation: false
60+
resources: {{ toYaml .Values.genericWorker.resources | nindent 4 }}
61+
{{- end -}}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# Copyright 2022 The HuggingFace Authors.
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
labels: {{ include "labels.genericWorker" . | nindent 4 }}
8+
name: "{{ include "name" . }}-worker-generic"
9+
namespace: {{ .Release.Namespace }}
10+
spec:
11+
progressDeadlineSeconds: 600
12+
replicas: {{ .Values.genericWorker.replicas }}
13+
revisionHistoryLimit: 10
14+
selector:
15+
matchLabels: {{ include "labels.genericWorker" . | nindent 6 }}
16+
strategy:
17+
type: Recreate
18+
template:
19+
metadata:
20+
labels: {{ include "labels.genericWorker" . | nindent 8 }}
21+
spec:
22+
{{- include "image.imagePullSecrets" . | nindent 6 }}
23+
initContainers:
24+
{{ include "initContainerAssets" . | nindent 8 }}
25+
{{ include "initContainerCache" . | nindent 8 }}
26+
containers: {{ include "containerWorkerGeneric" . | nindent 8 }}
27+
nodeSelector: {{ toYaml .Values.genericWorker.nodeSelector | nindent 8 }}
28+
tolerations: {{ toYaml .Values.genericWorker.tolerations | nindent 8 }}
29+
volumes: {{ include "volumeData" . | nindent 8 }}
30+
securityContext: {{ include "securityContext" . | nindent 8 }}

chart/templates/worker/parquet-and-dataset-info/_container.tpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
image: {{ include "services.worker.image" . }}
77
imagePullPolicy: {{ .Values.images.pullPolicy }}
88
env:
9-
- name: WORKER_ENDPOINT
9+
- name: WORKER_ONLY_JOB_TYPES
1010
value: "/parquet-and-dataset-info"
1111
# ^ hard-coded
1212
{{ include "envCache" . | nindent 2 }}

chart/templates/worker/parquet/_container.tpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
image: {{ include "services.worker.image" . }}
77
imagePullPolicy: {{ .Values.images.pullPolicy }}
88
env:
9-
- name: WORKER_ENDPOINT
9+
- name: WORKER_ONLY_JOB_TYPES
1010
value: "/parquet"
1111
# ^ hard-coded
1212
{{ include "envCache" . | nindent 2 }}

0 commit comments

Comments
 (0)