huggingface
diff --git a/‎.github/workflows/build_push_docker_hub.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/build_push_docker_hub.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/e2e.yml‎
Lines changed: 0 additions & 2 deletions b/‎.github/workflows/e2e.yml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎.github/workflows/w-datasets_based.yml‎ renamed to ‎.github/workflows/s-worker.yml‎
Lines changed: 7 additions & 7 deletions b/‎.github/workflows/w-datasets_based.yml‎ renamed to ‎.github/workflows/s-worker.yml‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎.vscode/monorepo.code-workspace‎
Lines changed: 5 additions & 6 deletions b/‎.vscode/monorepo.code-workspace‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎DEVELOPER_GUIDE.md‎
Lines changed: 6 additions & 7 deletions b/‎DEVELOPER_GUIDE.md‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎chart/env/dev.yaml‎
Lines changed: 2 additions & 3 deletions b/‎chart/env/dev.yaml‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎chart/env/prod.yaml‎
Lines changed: 2 additions & 3 deletions b/‎chart/env/prod.yaml‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎chart/templates/_helpers.tpl‎
Lines changed: 1 addition & 1 deletion b/‎chart/templates/_helpers.tpl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎chart/values.yaml‎
Lines changed: 2 additions & 3 deletions b/‎chart/values.yaml‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎docs/source/server.mdx‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/server.mdx‎
Lines changed: 1 addition & 1 deletion
@@ -20,8 +20,8 @@ jobs:
             project: admin
           - directory: services
             project: api
-          - directory: workers
-            project: datasets_based
+          - directory: services
+            project: worker
     runs-on: "ubuntu-latest"
     steps:
       - name: Checkout repository
 
@@ -11,7 +11,6 @@ on:
       - 'e2e/**'
       - 'libs/**'
       - 'services/**'
-      - 'workers/**'
       - 'chart/static-files/openapi.json'
       - '.github/workflows/_e2e_tests.yml'
       - '.github/workflows/_quality-python.yml'
@@ -23,7 +22,6 @@ on:
       - 'e2e/**'
       - 'libs/**'
       - 'services/**'
-      - 'workers/**'
       - 'chart/static-files/openapi.json'
       - '.github/workflows/_e2e_tests.yml'
       - '.github/workflows/_quality-python.yml'
 
@@ -1,25 +1,25 @@
 # SPDX-License-Identifier: Apache-2.0
 # Copyright 2022 The HuggingFace Authors.
 
-name: workers/datasets_based
+name: services/worker
 on:
   workflow_dispatch:
   push:
     branches:
       - main
     paths:
       - 'libs/libcommon/**'
-      - 'workers/datasets_based/**'
-      - '.github/workflows/w-datasets_based.yml'
+      - 'services/worker/**'
+      - '.github/workflows/s-worker.yml'
       - '.github/workflows/_quality-python.yml'
       - '.github/workflows/_unit-tests-python.yml'
       - 'tools/docker-compose-mongo.yml'
       - 'vendors/'
   pull_request:
     paths:
       - 'libs/libcommon/**'
-      - 'workers/datasets_based/**'
-      - '.github/workflows/w-datasets_based.yml'
+      - 'services/worker/**'
+      - '.github/workflows/s-worker.yml'
       - '.github/workflows/_quality-python.yml'
       - '.github/workflows/_unit-tests-python.yml'
       - 'tools/docker-compose-mongo.yml'
@@ -28,10 +28,10 @@ jobs:
   quality:
     uses: ./.github/workflows/_quality-python.yml
     with:
-      working-directory: workers/datasets_based
+      working-directory: services/worker
       is-datasets-worker: true
   unit-tests:
     uses: ./.github/workflows/_unit-tests-python.yml
     with:
-      working-directory: workers/datasets_based
+      working-directory: services/worker
       is-datasets-worker: true
@@ -25,21 +25,20 @@
       "path": "../services/api"
     },
     {
-      "name": "services/reverse-proxy",
-      "path": "../services/reverse-proxy"
+      "name": "services/worker",
+      "path": "../services/worker"
     },
     {
-      "name": "workers/datasets_based",
-      "path": "../workers/datasets_based"
+      "name": "services/reverse-proxy",
+      "path": "../services/reverse-proxy"
     }
   ],
   "settings": {
     "files.exclude": {
       "e2e": true,
       "jobs": true,
       "libs": true,
-      "services": true,
-      "workers": true
+      "services": true
     },
     "python.formatting.provider": "black",
     "python.linting.enabled": true,
 
@@ -28,7 +28,7 @@ make dev-start
 In development mode, you don't need to rebuild the docker images to apply a change in a worker.
 You can just restart the worker's docker container and it will apply your changes.
 
-To install a single job (in [jobs](./jobs)), library (in [libs](./libs)), service (in [services](./services)) or worker (in [workers](./workers)), go to their respective directory, and install Python 3.9 (consider [pyenv](https://github.com/pyenv/pyenv)) and [poetry](https://python-poetry.org/docs/master/#installation) (don't forget to add `poetry` to the `PATH` environment variable).
+To install a single job (in [jobs](./jobs)), library (in [libs](./libs)) or service (in [services](./services)), go to their respective directory, and install Python 3.9 (consider [pyenv](https://github.com/pyenv/pyenv)) and [poetry](https://python-poetry.org/docs/master/#installation) (don't forget to add `poetry` to the `PATH` environment variable).
 
 If you use pyenv:
 
@@ -51,20 +51,19 @@ If you use VSCode, it might be useful to use the ["monorepo" workspace](./.vscod
 
 ## Architecture
 
-The repository is structured as a monorepo, with Python libraries and applications in [jobs](./jobs)), [libs](./libs), [services](./services) and [workers](./workers):
+The repository is structured as a monorepo, with Python libraries and applications in [jobs](./jobs)), [libs](./libs) and [services](./services):
 
 - [jobs](./jobs) contains the one-time jobs run by Helm before deploying the pods. For now, the only job migrates the databases when needed.
 - [libs](./libs) contains the Python libraries used by the services and workers. For now, the only library is [libcommon](./libs/libcommon), which contains the common code for the services and workers.
-- [services](./services) contains the applications: the public API, the admin API (which is separated from the public API and might be published under its own domain at some point) and the reverse proxy.
-- [workers](./workers) contains the workers that process the queue asynchronously: they get a "job" (caution: not the Helm jobs, but the jobs stored in the queue), process the expected response for the associated endpoint, and store the response in the cache.
+- [services](./services) contains the applications: the public API, the admin API (which is separated from the public API and might be published under its own domain at some point), the reverse proxy, and the worker that processes the queue asynchronously: it gets a "job" (caution: the jobs stored in the queue, not the Helm jobs), processes the expected response for the associated endpoint, and stores the response in the cache.
 
 If you have access to the internal HF notion, see https://www.notion.so/huggingface2/Datasets-server-464848da2a984e999c540a4aa7f0ece5.
 
 The application is distributed in several components.
 
 [api](./services/api) is a web server that exposes the [API endpoints](https://huggingface.co/docs/datasets-server). Apart from some endpoints (`valid`, `is-valid`), all the responses are served from pre-computed responses. That's the main point of this project: generating these responses takes time, and the API server provides this service to the users.
 
-The precomputed responses are stored in a Mongo database called "cache". They are computed by [workers](./workers) which take their jobs from a job queue stored in a Mongo database called "queue", and store the results (error or valid response) into the "cache" (see [libcommon](./libs/libcommon)).
+The precomputed responses are stored in a Mongo database called "cache". They are computed by [workers](./services/worker) which take their jobs from a job queue stored in a Mongo database called "queue", and store the results (error or valid response) into the "cache" (see [libcommon](./libs/libcommon)).
 
 The API service exposes the `/webhook` endpoint which is called by the Hub on every creation, update or deletion of a dataset on the Hub. On deletion, the cached responses are deleted. On creation or update, a new job is appended in the "queue" database.
 
@@ -156,7 +155,7 @@ GITHUB_TOKEN=xxx
 
 ## Mac OS
 
-To install the [datasets based worker](./workers/datasets_based) on Mac OS, you can follow the next steps.
+To install the [datasets based worker](./services/worker) on Mac OS, you can follow the next steps.
 
 ### First: as an administrator
 
@@ -219,7 +218,7 @@ $ pyenv install 3.9.15
 Check that the expected local version of Python is used:
 
 ```bash
-$ cd workers/datasets_based
+$ cd services/worker
 $ python --version
 Python 3.9.15
 ```
 
@@ -38,11 +38,10 @@ images:
       useGlobalRegistry: false
       repository: datasets-server-services-api
       tag: sha-27ad2f7
-  workers:
-    datasetsBased:
+    worker:
       registry: huggingface
       useGlobalRegistry: false
-      repository: datasets-server-workers-datasets_based
+      repository: datasets-server-services-worker
       tag: sha-27ad2f7
 
 secrets:
 
@@ -28,11 +28,10 @@ images:
       useGlobalRegistry: false
       repository: datasets-server-services-api
       tag: sha-27ad2f7
-  workers:
-    datasetsBased:
+    worker:
       registry: huggingface
       useGlobalRegistry: false
-      repository: datasets-server-workers-datasets_based
+      repository: datasets-server-services-worker
       tag: sha-27ad2f7
 
 secrets:
 
@@ -83,7 +83,7 @@ imagePullSecrets:
 {{- end -}}
 
 {{- define "workers.datasetsBased.image" -}}
-{{ include "datasetsServer.images.image" (dict "imageRoot" .Values.images.workers.datasetsBased "global" .Values.global.huggingface) }}
+{{ include "datasetsServer.images.image" (dict "imageRoot" .Values.images.services.worker "global" .Values.global.huggingface) }}
 {{- end -}}
 
 {{- define "image.imagePullSecrets" -}}
 
@@ -35,11 +35,10 @@ images:
       useGlobalRegistry: false
       repository: datasets-server-services-api
       tag: sha-27ad2f7
-  workers:
-    datasetsBased:
+    worker:
       registry: huggingface
       useGlobalRegistry: false
-      repository: datasets-server-workers-datasets_based
+      repository: datasets-server-services-worker
       tag: sha-27ad2f7
 
 
 
@@ -25,7 +25,7 @@ You might've noticed the `/valid` and `/is-valid` endpoints don't have a job in
 
 Workers are responsible for executing the jobs in the queue. They complete the actual preprocessing requests, such as getting a list of splits and configurations. The workers can be controlled by configurable environment variables, like the minimum or the maximum number of rows returned by a worker or the maximum number of jobs to start per dataset user or organization.
 
-Take a look at the [workers configuration](https://github.com/huggingface/datasets-server/tree/main/workers/datasets_based#configuration) for a complete list of the environment variables if you're interested in learning more.
+Take a look at the [workers configuration](https://github.com/huggingface/datasets-server/tree/main/services/worker#configuration) for a complete list of the environment variables if you're interested in learning more.
 
 ## Cache