diff --git a/charts/accumulo/templates/alluxio-master-deployment.yaml b/charts/accumulo/templates/alluxio-master-deployment.yaml index 403ac315e1f..7035db8fa2c 100644 --- a/charts/accumulo/templates/alluxio-master-deployment.yaml +++ b/charts/accumulo/templates/alluxio-master-deployment.yaml @@ -65,6 +65,9 @@ spec: # Start master /opt/alluxio/bin/alluxio-start.sh master + + # Start job master + /opt/alluxio/bin/alluxio-start.sh job_master # Keep container running and monitor process while true; do @@ -72,6 +75,10 @@ spec: echo "Alluxio master process died, restarting..." /opt/alluxio/bin/alluxio-start.sh master fi + if ! pgrep -f "alluxio.master.AlluxioJobMaster" > /dev/null; then + echo "Alluxio job master process died, restarting..." + /opt/alluxio/bin/alluxio-start.sh job_master + fi sleep 30 done ports: @@ -81,6 +88,9 @@ spec: - name: web containerPort: 19999 protocol: TCP + - name: job-rpc + containerPort: 20001 + protocol: TCP env: - name: ALLUXIO_MASTER_HOSTNAME valueFrom: diff --git a/charts/accumulo/templates/alluxio-master-service.yaml b/charts/accumulo/templates/alluxio-master-service.yaml index 04cf6f6d74d..0c1e6802638 100644 --- a/charts/accumulo/templates/alluxio-master-service.yaml +++ b/charts/accumulo/templates/alluxio-master-service.yaml @@ -40,6 +40,10 @@ spec: port: 19999 targetPort: web protocol: TCP + - name: job-rpc + port: 20001 + targetPort: job-rpc + protocol: TCP selector: {{- include "accumulo.selectorLabels" . | nindent 4 }} app.kubernetes.io/component: alluxio-master diff --git a/charts/accumulo/templates/alluxio-worker-daemonset.yaml b/charts/accumulo/templates/alluxio-worker-daemonset.yaml index 33c83ac2a45..3e03856b055 100644 --- a/charts/accumulo/templates/alluxio-worker-daemonset.yaml +++ b/charts/accumulo/templates/alluxio-worker-daemonset.yaml @@ -56,7 +56,7 @@ spec: - | # Wait for master to be ready echo "Waiting for Alluxio master to be ready..." - until (echo > /dev/tcp/accumulo-alluxio-master/19998) >/dev/null 2>&1; do + until (echo > /dev/tcp/{{include "accumulo.fullname" .}}-alluxio-master/19998) >/dev/null 2>&1; do echo "Waiting for master..." sleep 5 done @@ -71,6 +71,9 @@ spec: # Start worker /opt/alluxio/bin/alluxio-start.sh worker + + # Start job worker + /opt/alluxio/bin/alluxio-start.sh job_worker # Keep container running and monitor process while true; do @@ -78,6 +81,10 @@ spec: echo "Alluxio worker process died, restarting..." /opt/alluxio/bin/alluxio-start.sh worker fi + if ! pgrep -f "alluxio.worker.AlluxioJobWorker" > /dev/null; then + echo "Alluxio job worker process died, restarting..." + /opt/alluxio/bin/alluxio-start.sh job_worker + fi sleep 30 done ports: @@ -85,17 +92,36 @@ spec: containerPort: 29999 protocol: TCP - name: data - containerPort: 29999 + containerPort: 29998 protocol: TCP - name: web containerPort: 30000 protocol: TCP + - name: job-rpc + containerPort: 30002 + protocol: TCP + - name: job-data + containerPort: 30003 + protocol: TCP + - name: job-web + containerPort: 30004 + protocol: TCP env: - name: POD_IP valueFrom: fieldRef: fieldPath: status.podIP - - name: WORKER_HOSTNAME + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: ALLUXIO_WORKER_HOSTNAME + value: "$(POD_NAME).{{ include "accumulo.fullname" . }}-alluxio-worker.$(POD_NAMESPACE).svc.cluster.local" + - name: ALLUXIO_JOB_WORKER_HOSTNAME value: "$(POD_NAME).{{ include "accumulo.fullname" . }}-alluxio-worker.$(POD_NAMESPACE).svc.cluster.local" - name: ALLUXIO_MASTER_HOSTNAME value: {{include "accumulo.fullname" .}}-alluxio-master diff --git a/charts/accumulo/templates/alluxio-worker-service.yaml b/charts/accumulo/templates/alluxio-worker-service.yaml index 24af51abf6f..173638be22f 100644 --- a/charts/accumulo/templates/alluxio-worker-service.yaml +++ b/charts/accumulo/templates/alluxio-worker-service.yaml @@ -26,15 +26,32 @@ metadata: app.kubernetes.io/component: alluxio-worker spec: clusterIP: None # headless + publishNotReadyAddresses: true # Enable DNS for all pods including not ready ports: - name: rpc port: 29999 targetPort: rpc protocol: TCP + - name: data + port: 29998 + targetPort: data + protocol: TCP - name: web port: 30000 targetPort: web protocol: TCP + - name: job-rpc + port: 30002 + targetPort: job-rpc + protocol: TCP + - name: job-data + port: 30003 + targetPort: job-data + protocol: TCP + - name: job-web + port: 30004 + targetPort: job-web + protocol: TCP selector: {{- include "accumulo.selectorLabels" . | nindent 4 }} app.kubernetes.io/component: alluxio-worker diff --git a/charts/accumulo/templates/configmap.yaml b/charts/accumulo/templates/configmap.yaml index 59bfc738dce..0d835c02f5e 100644 --- a/charts/accumulo/templates/configmap.yaml +++ b/charts/accumulo/templates/configmap.yaml @@ -289,14 +289,24 @@ data: alluxio.master.hostname={{ include "accumulo.fullname" . }}-alluxio-master alluxio.master.rpc.port=19998 alluxio.master.web.port=19999 - alluxio.worker.hostname={{ include "accumulo.fullname" . }}-alluxio-worker + # Worker hostname set via ALLUXIO_WORKER_HOSTNAME environment variable + alluxio.worker.bind.host=0.0.0.0 alluxio.worker.port=29999 alluxio.worker.web.port=30000 - alluxio.worker.data.port=29999 + alluxio.worker.data.port=29998 alluxio.worker.rpc.port=29999 alluxio.worker.rpc.bind.host=0.0.0.0 alluxio.worker.data.bind.host=0.0.0.0 alluxio.worker.web.bind.host=0.0.0.0 + + ## Job worker configuration + alluxio.job.master.hostname={{ include "accumulo.fullname" . }}-alluxio-master + alluxio.job.master.rpc.port=20001 + # Job worker hostname set via ALLUXIO_JOB_WORKER_HOSTNAME environment variable + alluxio.job.worker.bind.host=0.0.0.0 + alluxio.job.worker.rpc.port=30002 + alluxio.job.worker.data.port=30003 + alluxio.job.worker.web.port=30004 ## Memory and storage configuration alluxio.worker.memory.size={{ .Values.alluxio.properties.alluxio.worker.memory.size }}