Skip to content

Commit 69ce0fa

Browse files
authored
Rename obsolete mentions to datasets_based (#805)
- rename prefix WORKER_LOOP_ to WORKER_ - rename DATASETS_BASED_ENDPOINT to WORKER_ENDPOINT - rename DATASETS_BASED_CONTENT_MAX_BYTES to WORKER_CONTENT_MAX_BYTES - ensure WORKER_STORAGE_PATHS is always used in Helm
1 parent f43a0d2 commit 69ce0fa

34 files changed

+157
-198
lines changed

DEVELOPER_GUIDE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ GITHUB_TOKEN=xxx
155155

156156
## Mac OS
157157

158-
To install the [datasets based worker](./services/worker) on Mac OS, you can follow the next steps.
158+
To install the [worker](./services/worker) on Mac OS, you can follow the next steps.
159159

160160
### First: as an administrator
161161

chart/env/dev.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,3 @@ sizes:
239239
limits:
240240
cpu: 1
241241
memory: "4Gi"
242-
243-
# --- datasets_based ---
244-
datasetsBased:
245-
contentMaxBytes: "10_000_000"

chart/env/prod.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,3 @@ sizes:
309309
limits:
310310
cpu: 2
311311
memory: "1Gi"
312-
313-
# --- datasets_based ---
314-
datasetsBased:
315-
contentMaxBytes: "10_000_000"

chart/templates/_envDatasetsBased.tpl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,5 @@
88
value: "/tmp/modules-cache"
99
- name: NUMBA_CACHE_DIR
1010
value: "/tmp/numba-cache"
11-
- name: CONTENT_MAX_BYTES
12-
value: {{ .Values.datasetsBased.contentMaxBytes}}
1311
{{- end -}}
1412

chart/templates/_envWorker.tpl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# Copyright 2022 The HuggingFace Authors.
3+
4+
{{- define "envWorker" -}}
5+
- name: WORKER_CONTENT_MAX_BYTES
6+
value: {{ .Values.worker.contentMaxBytes | quote}}
7+
# WORKER_ENDPOINT is not defined here, it's hard-coded in the template
8+
- name: WORKER_MAX_DISK_USAGE_PCT
9+
value: {{ .Values.worker.maxDiskUsagePct | quote }}
10+
- name: WORKER_MAX_LOAD_PCT
11+
value: {{ .Values.worker.maxLoadPct | quote }}
12+
- name: WORKER_MAX_MEMORY_PCT
13+
value: {{ .Values.worker.maxMemoryPct | quote }}
14+
- name: WORKER_SLEEP_SECONDS
15+
value: {{ .Values.worker.sleepSeconds | quote }}
16+
- name: WORKER_STORAGE_PATHS
17+
value: {{ .Values.assets.storageDirectory | quote }}
18+
# ^ note: for datasets_based workers, the datasets cache is automatically added, so no need to add it here
19+
{{- end -}}

chart/templates/_envWorkerLoop.tpl

Lines changed: 0 additions & 13 deletions
This file was deleted.

chart/templates/_helpers.tpl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ imagePullSecrets:
8282
{{ include "datasetsServer.images.image" (dict "imageRoot" .Values.images.services.api "global" .Values.global.huggingface) }}
8383
{{- end -}}
8484

85-
{{- define "workers.datasetsBased.image" -}}
85+
{{- define "services.worker.image" -}}
8686
{{ include "datasetsServer.images.image" (dict "imageRoot" .Values.images.services.worker "global" .Values.global.huggingface) }}
8787
{{- end -}}
8888

@@ -263,4 +263,4 @@ Return the HUB url
263263
{{- $hubName := ((list $.Release.Name "hub") | join "-") | trunc 63 | trimSuffix "-" -}}
264264
http://{{ $hubName }}
265265
{{- end -}}
266-
{{- end -}}
266+
{{- end -}}

chart/templates/worker/config-names/_container.tpl

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,19 @@
33

44
{{- define "containerWorkerConfigNames" -}}
55
- name: "{{ include "name" . }}-worker-config-names"
6-
image: {{ include "workers.datasetsBased.image" . }}
6+
image: {{ include "services.worker.image" . }}
77
imagePullPolicy: {{ .Values.images.pullPolicy }}
88
env:
9-
- name: DATASETS_BASED_ENDPOINT
9+
- name: WORKER_ENDPOINT
1010
value: "/config-names"
1111
# ^ hard-coded
1212
{{ include "envCache" . | nindent 2 }}
1313
{{ include "envQueue" . | nindent 2 }}
1414
{{ include "envCommon" . | nindent 2 }}
15-
{{ include "envWorkerLoop" . | nindent 2 }}
15+
{{ include "envWorker" . | nindent 2 }}
1616
{{ include "envDatasetsBased" . | nindent 2 }}
1717
- name: DATASETS_BASED_HF_DATASETS_CACHE
1818
value: {{ printf "%s/config-names/datasets" .Values.cacheDirectory | quote }}
19-
- name: DATASETS_BASED_CONTENT_MAX_BYTES
20-
value: {{ .Values.datasetsBased.contentMaxBytes | quote}}
2119
- name: QUEUE_MAX_JOBS_PER_NAMESPACE
2220
# value: {{ .Values.queue.maxJobsPerNamespace | quote }}
2321
# overridden

chart/templates/worker/dataset-info/_container.tpl

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,16 @@
33

44
{{- define "containerWorkerDatasetInfo" -}}
55
- name: "{{ include "name" . }}-worker-dataset-info"
6-
image: {{ include "workers.datasetsBased.image" . }}
6+
image: {{ include "services.worker.image" . }}
77
imagePullPolicy: {{ .Values.images.pullPolicy }}
88
env:
9-
- name: DATASETS_BASED_ENDPOINT
9+
- name: WORKER_ENDPOINT
1010
value: "/dataset-info"
1111
# ^ hard-coded
1212
{{ include "envCache" . | nindent 2 }}
1313
{{ include "envQueue" . | nindent 2 }}
1414
{{ include "envCommon" . | nindent 2 }}
15-
{{ include "envWorkerLoop" . | nindent 2 }}
16-
- name: DATASETS_BASED_CONTENT_MAX_BYTES
17-
value: {{ .Values.datasetsBased.contentMaxBytes | quote}}
15+
{{ include "envWorker" . | nindent 2 }}
1816
- name: QUEUE_MAX_JOBS_PER_NAMESPACE
1917
# value: {{ .Values.queue.maxJobsPerNamespace | quote }}
2018
# overridden

chart/templates/worker/first-rows/_container.tpl

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,20 @@
33

44
{{- define "containerWorkerFirstRows" -}}
55
- name: "{{ include "name" . }}-worker-first-rows"
6-
image: {{ include "workers.datasetsBased.image" . }}
6+
image: {{ include "services.worker.image" . }}
77
imagePullPolicy: {{ .Values.images.pullPolicy }}
88
env:
9-
- name: DATASETS_BASED_ENDPOINT
9+
- name: WORKER_ENDPOINT
1010
value: "/first-rows"
1111
# ^ hard-coded
1212
{{ include "envAssets" . | nindent 2 }}
1313
{{ include "envCache" . | nindent 2 }}
1414
{{ include "envQueue" . | nindent 2 }}
1515
{{ include "envCommon" . | nindent 2 }}
16-
{{ include "envWorkerLoop" . | nindent 2 }}
17-
- name: WORKER_LOOP_STORAGE_PATHS
18-
value: {{ .Values.assets.storageDirectory | quote }}
19-
# ^ note: the datasets cache is automatically added, so no need to add it here
16+
{{ include "envWorker" . | nindent 2 }}
2017
{{ include "envDatasetsBased" . | nindent 2 }}
2118
- name: DATASETS_BASED_HF_DATASETS_CACHE
2219
value: {{ printf "%s/first-rows/datasets" .Values.cacheDirectory | quote }}
23-
- name: DATASETS_BASED_CONTENT_MAX_BYTES
24-
value: {{ .Values.datasetsBased.contentMaxBytes | quote}}
2520
- name: QUEUE_MAX_JOBS_PER_NAMESPACE
2621
# value: {{ .Values.queue.maxJobsPerNamespace | quote }}
2722
# overridden

0 commit comments

Comments
 (0)