Skip to content

Commit f2cf93e

Browse files
authored
feat(sourcegraph): add support to scale worker horizontally (#582)
ref CLO-1023 https://sourcegraph.com/docs/admin/workers#3-split-jobs-and-scale-independently `worker` is not really horizontally scalable, and it requires manually creating additionally `worker` deployments with the correct `WORKER_JOB_ALLOWLIST` and `WORKER_JOB_BLOCKLIST` environment variable. This PR added native support for managing multiple worker deployment through helm template. By default, it will work as is to day with a single `worker` deployment. To enable additional dedicated worker, user can specify the following overrides: ```yml worker: replicas: - jobs: - workspaces-reconciler - jobs: - codeintel-upload-janitor ``` This will create 3 distinct worker deployment (`worker`, `worker-0`, and `worker-1`) with the correct `WORKER_JOB_*` env var. Additinoally, we also support overriding replica specific resources config. See unit test for more information. ### Checklist - [x] Follow the [manual testing process](https://github.com/sourcegraph/deploy-sourcegraph-helm/blob/main/TEST.md) - [ ] Update [changelog](https://github.com/sourcegraph/deploy-sourcegraph-helm/blob/main/charts/sourcegraph/CHANGELOG.md) - [x] Update [Kubernetes update doc](https://docs.sourcegraph.com/admin/updates/kubernetes) ### Test plan CI, and manual testing with ```yml worker: replicas: - jobs: - workspaces-reconciler resources: limits: cpu: 200m memory: 256Mi requests: cpu: 200m memory: 256Mi ``` ``` helm upgrade --install --create-namespace -n sourcegraph -f ./override.yaml sourcegraph charts/sourcegraph/. ``` all pods are healthy ![CleanShot 2024-11-20 at 13 33 17](https://github.com/user-attachments/assets/d643a24c-0028-4126-8d38-40793c643242) you can also play around with `helm template --debug -f ./override.yaml sourcegraph charts/sourcegraph/.` and inspect the rendered YAML
1 parent 01a0e97 commit f2cf93e

File tree

5 files changed

+324
-122
lines changed

5 files changed

+324
-122
lines changed

charts/sourcegraph/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,7 @@ In addition to the documented values, all services also support the following va
362362
| worker.name | string | `"worker"` | Name used by resources. Does not affect service names or PVCs. |
363363
| worker.podSecurityContext | object | `{}` | Security context for the `worker` pod, learn more from the [Kubernetes documentation](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) |
364364
| worker.replicaCount | int | `1` | Number of `worker` pod |
365+
| worker.replicas | list | `[]` | Scale worker horizontally by configuring additional replicas dedicated to specific jobs. for each replica, configure the dedicated jobs to run on this replica. learn more from https://sourcegraph.com/docs/admin/workers#3-split-jobs-and-scale-independently |
365366
| worker.resources | object | `{"limits":{"cpu":"2","memory":"4G"},"requests":{"cpu":"500m","memory":"2G"}}` | Resource requests & limits for the `worker` container, learn more from the [Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) |
366367
| worker.serviceAccount.create | bool | `false` | Enable creation of ServiceAccount for `worker` |
367368
| worker.serviceAccount.name | string | `""` | Name of the ServiceAccount to be created or an existing ServiceAccount |
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
{{- define "sourcegraph.worker" -}}
2+
{{- $top := index . 0 }}
3+
{{- $suffix := index . 1 -}}
4+
{{- $allowlist := index . 2 -}}
5+
{{- $blocklist := index . 3 -}}
6+
{{- $resources := index . 4 -}}
7+
8+
{{- $name := $top.Values.worker.name -}}
9+
{{- if $suffix -}}
10+
{{- $name = printf "%s-%s" $name $suffix -}}
11+
{{- end -}}
12+
apiVersion: apps/v1
13+
kind: Deployment
14+
metadata:
15+
annotations:
16+
description: Manages background processes.
17+
labels:
18+
{{- include "sourcegraph.labels" $top | nindent 4 }}
19+
{{- if $top.Values.worker.labels }}
20+
{{- toYaml $top.Values.worker.labels | nindent 4 }}
21+
{{- end }}
22+
deploy: sourcegraph
23+
app.kubernetes.io/component: worker
24+
name: {{ $name }}
25+
spec:
26+
minReadySeconds: 10
27+
replicas: {{ $top.Values.worker.replicaCount }}
28+
revisionHistoryLimit: {{ $top.Values.sourcegraph.revisionHistoryLimit }}
29+
selector:
30+
matchLabels:
31+
{{- include "sourcegraph.selectorLabels" $top | nindent 6 }}
32+
app: worker
33+
{{- if $suffix }}
34+
worker-replica: {{ $name | quote }}
35+
{{- end }}
36+
strategy:
37+
rollingUpdate:
38+
maxSurge: 1
39+
maxUnavailable: 1
40+
type: RollingUpdate
41+
template:
42+
metadata:
43+
annotations:
44+
kubectl.kubernetes.io/default-container: worker
45+
{{- include "sourcegraph.redisChecksum" $top | nindent 8 }}
46+
{{- if $top.Values.sourcegraph.podAnnotations }}
47+
{{- toYaml $top.Values.sourcegraph.podAnnotations | nindent 8 }}
48+
{{- end }}
49+
{{- if $top.Values.worker.podAnnotations }}
50+
{{- toYaml $top.Values.worker.podAnnotations | nindent 8 }}
51+
{{- end }}
52+
labels:
53+
{{- include "sourcegraph.selectorLabels" $top | nindent 8 }}
54+
{{- if $top.Values.sourcegraph.podLabels }}
55+
{{- toYaml $top.Values.sourcegraph.podLabels | nindent 8 }}
56+
{{- end }}
57+
{{- if $top.Values.worker.podLabels }}
58+
{{- toYaml $top.Values.worker.podLabels | nindent 8 }}
59+
{{- end }}
60+
deploy: sourcegraph
61+
app: worker
62+
{{- if $suffix }}
63+
worker-replica: {{ $name | quote }}
64+
{{- end }}
65+
spec:
66+
containers:
67+
- name: worker
68+
env:
69+
{{- include "sourcegraph.redisConnection" $top | nindent 8 }}
70+
{{- if $allowlist }}
71+
- name: WORKER_JOB_ALLOWLIST
72+
value: {{ $allowlist }}
73+
{{- end }}
74+
{{- if $blocklist }}
75+
- name: WORKER_JOB_BLOCKLIST
76+
value: {{ $blocklist }}
77+
{{- end }}
78+
{{- range $name, $item := $top.Values.worker.env}}
79+
- name: {{ $name }}
80+
{{- $item | toYaml | nindent 10 }}
81+
{{- end }}
82+
{{- if $top.Values.blobstore.enabled }}
83+
- name: PRECISE_CODE_INTEL_UPLOAD_BACKEND
84+
value: blobstore
85+
- name: PRECISE_CODE_INTEL_UPLOAD_AWS_ENDPOINT
86+
value: http://blobstore:9000
87+
{{- end }}
88+
- name: POD_NAME
89+
valueFrom:
90+
fieldRef:
91+
fieldPath: metadata.name
92+
{{- include "sourcegraph.openTelemetryEnv" $top | nindent 8 }}
93+
image: {{ include "sourcegraph.image" (list $top "worker" ) }}
94+
imagePullPolicy: {{ $top.Values.sourcegraph.image.pullPolicy }}
95+
{{- with $top.Values.worker.args }}
96+
args:
97+
{{- toYaml . | nindent 8 }}
98+
{{- end }}
99+
terminationMessagePolicy: FallbackToLogsOnError
100+
livenessProbe:
101+
httpGet:
102+
path: /healthz
103+
port: debug
104+
scheme: HTTP
105+
initialDelaySeconds: 60
106+
timeoutSeconds: 5
107+
readinessProbe:
108+
httpGet:
109+
path: /ready
110+
port: debug
111+
scheme: HTTP
112+
periodSeconds: 5
113+
timeoutSeconds: 5
114+
ports:
115+
- containerPort: 3189
116+
name: http
117+
- containerPort: 6060
118+
name: debug
119+
- containerPort: 6996
120+
name: prom
121+
{{- if not $top.Values.sourcegraph.localDevMode }}
122+
resources:
123+
{{- toYaml $resources | nindent 10 }}
124+
{{- end }}
125+
securityContext:
126+
{{- toYaml $top.Values.worker.containerSecurityContext | nindent 10 }}
127+
volumeMounts:
128+
{{- if $top.Values.worker.extraVolumeMounts }}
129+
{{- toYaml $top.Values.worker.extraVolumeMounts | nindent 8 }}
130+
{{- end }}
131+
{{- if $top.Values.worker.extraContainers }}
132+
{{- toYaml $top.Values.worker.extraContainers | nindent 6 }}
133+
{{- end }}
134+
securityContext:
135+
{{- toYaml $top.Values.worker.podSecurityContext | nindent 8 }}
136+
{{- include "sourcegraph.nodeSelector" (list $top "worker" ) | trim | nindent 6 }}
137+
{{- include "sourcegraph.affinity" (list $top "worker" ) | trim | nindent 6 }}
138+
{{- include "sourcegraph.tolerations" (list $top "worker" ) | trim | nindent 6 }}
139+
{{- with $top.Values.sourcegraph.imagePullSecrets }}
140+
imagePullSecrets:
141+
{{- toYaml . | nindent 8 }}
142+
{{- end }}
143+
{{- include "sourcegraph.renderServiceAccountName" (list $top "worker") | trim | nindent 6 }}
144+
volumes:
145+
{{- if $top.Values.worker.extraVolumes }}
146+
{{- toYaml $top.Values.worker.extraVolumes | nindent 6 }}
147+
{{- end }}
148+
{{- end -}}
Lines changed: 22 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -1,122 +1,22 @@
1-
apiVersion: apps/v1
2-
kind: Deployment
3-
metadata:
4-
annotations:
5-
description: Manages background processes.
6-
labels:
7-
{{- include "sourcegraph.labels" . | nindent 4 }}
8-
{{- if .Values.worker.labels }}
9-
{{- toYaml .Values.worker.labels | nindent 4 }}
10-
{{- end }}
11-
deploy: sourcegraph
12-
app.kubernetes.io/component: worker
13-
name: {{ .Values.worker.name }}
14-
spec:
15-
minReadySeconds: 10
16-
replicas: {{ .Values.worker.replicaCount }}
17-
revisionHistoryLimit: {{ .Values.sourcegraph.revisionHistoryLimit }}
18-
selector:
19-
matchLabels:
20-
{{- include "sourcegraph.selectorLabels" . | nindent 6 }}
21-
app: worker
22-
strategy:
23-
rollingUpdate:
24-
maxSurge: 1
25-
maxUnavailable: 1
26-
type: RollingUpdate
27-
template:
28-
metadata:
29-
annotations:
30-
kubectl.kubernetes.io/default-container: worker
31-
{{- include "sourcegraph.redisChecksum" . | nindent 8 }}
32-
{{- if .Values.sourcegraph.podAnnotations }}
33-
{{- toYaml .Values.sourcegraph.podAnnotations | nindent 8 }}
34-
{{- end }}
35-
{{- if .Values.worker.podAnnotations }}
36-
{{- toYaml .Values.worker.podAnnotations | nindent 8 }}
37-
{{- end }}
38-
labels:
39-
{{- include "sourcegraph.selectorLabels" . | nindent 8 }}
40-
{{- if .Values.sourcegraph.podLabels }}
41-
{{- toYaml .Values.sourcegraph.podLabels | nindent 8 }}
42-
{{- end }}
43-
{{- if .Values.worker.podLabels }}
44-
{{- toYaml .Values.worker.podLabels | nindent 8 }}
45-
{{- end }}
46-
deploy: sourcegraph
47-
app: worker
48-
spec:
49-
containers:
50-
- name: worker
51-
env:
52-
{{- include "sourcegraph.redisConnection" .| nindent 8 }}
53-
{{- range $name, $item := .Values.worker.env}}
54-
- name: {{ $name }}
55-
{{- $item | toYaml | nindent 10 }}
56-
{{- end }}
57-
{{- if .Values.blobstore.enabled }}
58-
- name: PRECISE_CODE_INTEL_UPLOAD_BACKEND
59-
value: blobstore
60-
- name: PRECISE_CODE_INTEL_UPLOAD_AWS_ENDPOINT
61-
value: http://blobstore:9000
62-
{{- end }}
63-
- name: POD_NAME
64-
valueFrom:
65-
fieldRef:
66-
fieldPath: metadata.name
67-
{{- include "sourcegraph.openTelemetryEnv" . | nindent 8 }}
68-
image: {{ include "sourcegraph.image" (list . "worker" ) }}
69-
imagePullPolicy: {{ .Values.sourcegraph.image.pullPolicy }}
70-
{{- with .Values.worker.args }}
71-
args:
72-
{{- toYaml . | nindent 8 }}
73-
{{- end }}
74-
terminationMessagePolicy: FallbackToLogsOnError
75-
livenessProbe:
76-
httpGet:
77-
path: /healthz
78-
port: debug
79-
scheme: HTTP
80-
initialDelaySeconds: 60
81-
timeoutSeconds: 5
82-
readinessProbe:
83-
httpGet:
84-
path: /ready
85-
port: debug
86-
scheme: HTTP
87-
periodSeconds: 5
88-
timeoutSeconds: 5
89-
ports:
90-
- containerPort: 3189
91-
name: http
92-
- containerPort: 6060
93-
name: debug
94-
- containerPort: 6996
95-
name: prom
96-
{{- if not .Values.sourcegraph.localDevMode }}
97-
resources:
98-
{{- toYaml .Values.worker.resources | nindent 10 }}
99-
{{- end }}
100-
securityContext:
101-
{{- toYaml .Values.worker.containerSecurityContext | nindent 10 }}
102-
volumeMounts:
103-
{{- if .Values.worker.extraVolumeMounts }}
104-
{{- toYaml .Values.worker.extraVolumeMounts | nindent 8 }}
105-
{{- end }}
106-
{{- if .Values.worker.extraContainers }}
107-
{{- toYaml .Values.worker.extraContainers | nindent 6 }}
108-
{{- end }}
109-
securityContext:
110-
{{- toYaml .Values.worker.podSecurityContext | nindent 8 }}
111-
{{- include "sourcegraph.nodeSelector" (list . "worker" ) | trim | nindent 6 }}
112-
{{- include "sourcegraph.affinity" (list . "worker" ) | trim | nindent 6 }}
113-
{{- include "sourcegraph.tolerations" (list . "worker" ) | trim | nindent 6 }}
114-
{{- with .Values.sourcegraph.imagePullSecrets }}
115-
imagePullSecrets:
116-
{{- toYaml . | nindent 8 }}
117-
{{- end }}
118-
{{- include "sourcegraph.renderServiceAccountName" (list . "worker") | trim | nindent 6 }}
119-
volumes:
120-
{{- if .Values.worker.extraVolumes }}
121-
{{- toYaml .Values.worker.extraVolumes | nindent 6 }}
122-
{{- end }}
1+
{{- if not .Values.worker.replicas }}
2+
{{- include "sourcegraph.worker" (list . "" "" "" .Values.worker.resources ) | nindent 0 }}
3+
{{- else }}
4+
{{- $dedicatedJobs := list }}
5+
{{- range .Values.worker.replicas }}
6+
{{- $dedicatedJobs = $dedicatedJobs | concat .jobs }}
7+
{{- end }}
8+
{{- $primaryBlocklist := join "," ($dedicatedJobs | uniq | sortAlpha) }}
9+
---
10+
{{- include "sourcegraph.worker" (list . "" "all" $primaryBlocklist $.Values.worker.resources) | nindent 0 }}
11+
12+
{{- range $idx, $item := .Values.worker.replicas }}
13+
---
14+
{{- $replicaName := printf "%d" $idx }}
15+
{{- $allowlist := join "," ($item.jobs | uniq | sortAlpha) }}
16+
{{- $resources := $.Values.worker.resources -}}
17+
{{- if $item.resources -}}
18+
{{- $resources = $item.resources -}}
19+
{{- end -}}
20+
{{- include "sourcegraph.worker" (list $ $replicaName $allowlist "" $resources) | nindent 0 }}
21+
{{- end }}
22+
{{- end }}

0 commit comments

Comments
 (0)