Skip to content

Commit a9f70b9

Browse files
authored
Support Kerberos in helm charts as option (#24)
* Support kerberos as option * Use a secret for keytabs * Address review comments * Address review comments
1 parent c1f8b1d commit a9f70b9

File tree

7 files changed

+294
-20
lines changed

7 files changed

+294
-20
lines changed

charts/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,6 @@ the following order.
1111
`hdfs-namenode-k8s/README.md` for how to launch.
1212
2. `hdfs-datanode-k8s`: Launches the hdfs datanode daemons. See
1313
`hdfs-datanode-k8s/README.md` for how to launch.
14+
15+
Kerberos is supported. See the `kerberosEnabled` option in the namenode and
16+
datanode charts.

charts/hdfs-datanode-k8s/README.md

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,26 @@ HDFS `datanodes` running inside a kubernetes cluster. See the other chart for
99
$ kubectl label node YOUR-MASTER-NAME hdfs-datanode-exclude=yes
1010
```
1111

12-
2. Launch this helm chart, `hdfs-datanode-k8s`.
12+
2. (Skip this if you do not plan to enable Kerberos)
13+
Conduct the Kerberos setups described in the namenode
14+
[README.md](../hdfs-namenode-k8s/README.md), if you have not done that
15+
already.
16+
17+
3. Launch this helm chart, `hdfs-datanode-k8s`.
1318

1419
```
1520
$ helm install -n my-hdfs-datanode hdfs-datanode-k8s
1621
```
1722

18-
3. Confirm the daemons are launched.
23+
If enabling Kerberos, specify necessary options. For instance,
24+
25+
```
26+
$ helm install -n my-hdfs-datanode \
27+
--set kerberosEnabled=true,kerberosRealm=MYCOMPANY.COM hdfs-datanode-k8s
28+
```
29+
The two variables above are required. For other variables, see values.yaml.
30+
31+
4. Confirm the daemons are launched.
1932

2033
```
2134
$ kubectl get pods | grep hdfs-datanode-
@@ -24,7 +37,10 @@ HDFS `datanodes` running inside a kubernetes cluster. See the other chart for
2437
```
2538

2639
`Datanode` daemons run on every cluster node. They also mount k8s `hostPath`
27-
local disk volumes.
40+
local disk volumes. You may want to restrict access of `hostPath`
41+
using `pod security policy`.
42+
See [reference](https://github.com/kubernetes/examples/blob/master/staging/podsecuritypolicy/rbac/README.md))
43+
2844

2945
`Datanodes` are using `hostNetwork` to register to `namenode` using
3046
physical IPs.
@@ -34,4 +50,4 @@ Note they run under the `default` namespace.
3450
###Credits
3551

3652
This chart is using public Hadoop docker images hosted by
37-
[uhopper](https://hub.docker.com/u/uhopper/).
53+
[uhopper](https://hub.docker.com/u/uhopper/).

charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml

Lines changed: 98 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,46 @@ spec:
3232
- name: datanode
3333
image: uhopper/hadoop-datanode:2.7.2
3434
env:
35+
# The following env vars are listed according to low-to-high precedence order.
36+
# i.e. Whoever comes last will override the earlier value of the same variable.
37+
{{- if .Values.kerberosEnabled }}
38+
- name: CORE_CONF_hadoop_security_authentication
39+
value: kerberos
40+
- name: CORE_CONF_hadoop_security_authorization
41+
value: "true"
42+
- name: CORE_CONF_hadoop_rpc_protection
43+
value: privacy
44+
- name: HDFS_CONF_dfs_block_access_token_enable
45+
value: "true"
46+
- name: HDFS_CONF_dfs_encrypt_data_transfer
47+
value: "true"
48+
- name: HDFS_CONF_dfs_datanode_kerberos_principal
49+
value: hdfs/_HOST@{{ required "A valid kerberosRealm entry required!" .Values.kerberosRealm }}
50+
- name: HDFS_CONF_dfs_datanode_kerberos_https_principal
51+
value: http/_HOST@{{ required "A valid kerberosRealm entry required!" .Values.kerberosRealm }}
52+
- name: HDFS_CONF_dfs_web_authentication_kerberos_principal
53+
value: http/_HOST@{{ required "A valid kerberosRealm entry required!" .Values.kerberosRealm }}
54+
- name: HDFS_CONF_dfs_datanode_keytab_file
55+
value: /etc/security/hdfs.keytab
56+
{{- if .Values.jsvcEnabled }}
57+
- name: HDFS_CONF_dfs_datanode_address
58+
value: 0.0.0.0:1004
59+
- name: HDFS_CONF_dfs_datanode_http_address
60+
value: 0.0.0.0:1006
61+
- name: HADOOP_SECURE_DN_USER
62+
value: root
63+
- name: JSVC_OUTFILE
64+
value: /dev/stdout
65+
- name: JSVC_ERRFILE
66+
value: /dev/stderr
67+
- name: JSVC_HOME
68+
value: /jsvc-home
69+
{{- end }}
70+
{{- end }}
71+
{{- range $key, $value := .Values.customHadoopConfig }}
72+
- name: {{ $key | quote }}
73+
value: {{ $value | quote }}
74+
{{- end }}
3575
- name: CORE_CONF_fs_defaultFS
3676
value: hdfs://hdfs-namenode-0.hdfs-namenode.default.svc.cluster.local:8020
3777
# The below uses two loops to make sure the last item does not have comma. It uses index 0
@@ -48,26 +88,63 @@ spec:
4888
/hadoop/dfs/data/{{ $index }}
4989
{{- end }}
5090
{{- end }}
51-
# We now add custom hadoop configuration provided
52-
{{- range $key, $value := .Values.customHadoopConfig }}
53-
{{- if and (ne $key "HDFS_CONF_dfs_datanode_data_dir") (ne $key "CORE_CONF_fs_defaultFS") }}
54-
- name: {{ $key | quote }}
55-
value: {{ $value | quote }}
56-
{{- end }}
57-
{{- end }}
5891
livenessProbe:
5992
initialDelaySeconds: 30
6093
httpGet:
6194
host: 127.0.0.1
6295
path: /
96+
{{- if and .Values.kerberosEnabled .Values.jsvcEnabled }}
97+
port: 1006
98+
{{- else }}
6399
port: 50075
100+
{{- end }}
64101
securityContext:
65102
privileged: true
66103
volumeMounts:
67104
{{- range $index, $path := .Values.dataNodeHostPath }}
68105
- name: hdfs-data-{{ $index }}
69106
mountPath: /hadoop/dfs/data/{{ $index }}
70107
{{- end }}
108+
{{- if .Values.kerberosEnabled }}
109+
- name: kerberos-config
110+
mountPath: /etc/krb5.conf
111+
subPath: {{ .Values.kerberosConfigFileName }}
112+
readOnly: true
113+
- name: kerberos-keytab-copy
114+
mountPath: /etc/security/
115+
readOnly: true
116+
{{- if .Values.jsvcEnabled }}
117+
- name: jsvc-home
118+
mountPath: /jsvc-home
119+
{{- end }}
120+
{{- end }}
121+
{{- if and .Values.kerberosEnabled .Values.jsvcEnabled }}
122+
initContainers:
123+
- name: copy-kerberos-keytab
124+
image: busybox:1.27.1
125+
command: ['sh', '-c']
126+
args:
127+
- cp /kerberos-keytabs/$MY_NODE_NAME.keytab /kerberos-keytab-copy/hdfs.keytab
128+
env:
129+
- name: MY_NODE_NAME
130+
valueFrom:
131+
fieldRef:
132+
fieldPath: spec.nodeName
133+
volumeMounts:
134+
- name: kerberos-keytabs
135+
mountPath: /kerberos-keytabs
136+
- name: kerberos-keytab-copy
137+
mountPath: /kerberos-keytab-copy
138+
- name: copy-jsvc
139+
# Pull by digest because the image doesn't have tags to pin.
140+
image: mschlimb/jsvc@sha256:bf20eb9a319e9a2f87473d8da7418d21503a97528b932800b6b8417cd31e30ef
141+
command: ['sh', '-c']
142+
args:
143+
- cp /usr/bin/jsvc /jsvc-home/jsvc
144+
volumeMounts:
145+
- name: jsvc-home
146+
mountPath: /jsvc-home
147+
{{- end }}
71148
affinity:
72149
nodeAffinity:
73150
requiredDuringSchedulingIgnoredDuringExecution:
@@ -82,3 +159,17 @@ spec:
82159
hostPath:
83160
path: {{ $path }}
84161
{{- end }}
162+
{{- if .Values.kerberosEnabled }}
163+
- name: kerberos-config
164+
configMap:
165+
name: {{ .Values.kerberosConfigMap }}
166+
- name: kerberos-keytabs
167+
secret:
168+
secretName: {{ .Values.kerberosKeytabsSecret }}
169+
- name: kerberos-keytab-copy
170+
emptyDir: {}
171+
{{- if .Values.jsvcEnabled }}
172+
- name: jsvc-home
173+
emptyDir: {}
174+
{{- end }}
175+
{{- end }}

charts/hdfs-datanode-k8s/values.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,31 @@ dataNodeHostPath:
3131
customHadoopConfig: {}
3232
# Set variables through a hash where env variable is the key, e.g.
3333
# HDFS_CONF_dfs_datanode_use_datanode_hostname: "false"
34+
35+
# Whether or not Kerberos support is enabled.
36+
kerberosEnabled: false
37+
38+
# Required to be non-empty if Kerberos is enabled. Specify your Kerberos realm name.
39+
# This should match the realm name in your Kerberos config file.
40+
kerberosRealm: ""
41+
42+
# Effective only if Kerberos is enabled. Name of the k8s config map containing
43+
# the kerberos config file.
44+
kerberosConfigMap: kerberos-config
45+
46+
# Effective only if Kerberos is enabled. Name of the kerberos config file inside
47+
# the config map.
48+
kerberosConfigFileName: krb5.conf
49+
50+
# Effective only if Kerberos is enabled. Name of the k8s secret containing
51+
# the kerberos keytab files of per-host hdfs principals. The secret should
52+
# have multiple data items. Each data item name should be formatted as:
53+
# `HOST-NAME.keytab`
54+
# where HOST-NAME should match the cluster node
55+
# host name that each per-host HDFS principal is associated with.
56+
kerberosKeytabsSecret: hdfs-kerberos-keytabs
57+
58+
# Effective only if Kerberos is enabled. Enable protection of datanodes using
59+
# the jsvc utility. See the reference doc at
60+
# https://hadoop.apache.org/docs/r2.7.2/hadoop-project-dist/hadoop-common/SecureMode.html#Secure_DataNode
61+
jsvcEnabled: true

charts/hdfs-namenode-k8s/README.md

Lines changed: 61 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,68 @@ HDFS `namenode` running inside a kubernetes cluster. See the other chart for
1212
$ kubectl label nodes YOUR-HOST hdfs-namenode-selector=hdfs-namenode-0
1313
```
1414

15-
2. Launch this helm chart, `hdfs-namenode-k8s`.
15+
2. (Skip this if you do not plan to enable Kerberos)
16+
Prepare Kerberos setup, following the steps below.
17+
18+
- Create a config map containing your Kerberos config file. This will be
19+
mounted onto the namenode and datanode pods.
20+
21+
```
22+
$ kubectl create configmap kerberos-config --from-file=/etc/krb5.conf
23+
```
24+
25+
- Generate per-host principal accounts and password keytab files for the namenode
26+
and datanode daemons. This is typically done in your Kerberos KDC host. For example,
27+
suppose the namenode will run on the k8s cluster node kube-n1.mycompany.com,
28+
and your datanodes will run on kube-n1.mycompany.com and kube-n2.mycompany.com.
29+
And your Kerberos realm is MYCOMPANY.COM, then
30+
31+
```
32+
$ kadmin.local -q "addprinc -randkey hdfs/[email protected]"
33+
$ kadmin.local -q "addprinc -randkey http/[email protected]"
34+
$ mkdir hdfs-keytabs
35+
$ kadmin.local -q "ktadd -norandkey \
36+
-k hdfs-keytabs/kube-n1.mycompany.com.keytab \
37+
38+
39+
40+
$ kadmin.local -q "addprinc -randkey hdfs/[email protected]"
41+
$ kadmin.local -q "addprinc -randkey http/[email protected]"
42+
$ kadmin.local -q "ktadd -norandkey \
43+
-k hdfs-keytabs/kube-n2.mycompany.com.keytab \
44+
45+
46+
$ kadmin.local -q "ktadd -norandkey \
47+
-k hdfs-keytabs/kube-n2.mycompany.com.keytab \
48+
49+
50+
```
51+
52+
- Create a k8s secret containing all the keytab files. This will be mounted
53+
onto the namenode and datanode pods. (You may want to restrict access to
54+
this secret using k8s
55+
[RBAC](https://kubernetes.io/docs/admin/authorization/rbac/),
56+
to minimize exposure of the keytab files.
57+
```
58+
$ kubectl create secret generic hdfs-kerberos-keytabs \
59+
--from-file=kube-n1.mycompany.com.keytab \
60+
--from-file=kube-n2.mycompany.com.keytab
61+
```
62+
63+
3. Launch this namenode helm chart, `hdfs-namenode-k8s`.
1664
1765
```
1866
$ helm install -n my-hdfs-namenode hdfs-namenode-k8s
1967
```
2068
21-
3. Confirm the daemon is launched.
69+
If enabling Kerberos, specify necessary options. For instance,
70+
```
71+
$ helm install -n my-hdfs-namenode \
72+
--set kerberosEnabled=true,kerberosRealm=MYCOMPANY.COM hdfs-namenode-k8s
73+
```
74+
The two variables above are required. For other variables, see values.yaml.
75+
76+
4. Confirm the daemon is launched.
2277
2378
```
2479
$ kubectl get pods | grep hdfs-namenode
@@ -28,10 +83,12 @@ HDFS `namenode` running inside a kubernetes cluster. See the other chart for
2883
There will be only one `namenode` instance. i.e. High Availability (HA) is not
2984
supported at the moment. The `namenode` instance is supposed to be pinned to
3085
a cluster host using a node label, as shown in the usage above. `Namenode`
31-
mount a local disk directory using k8s `hostPath` volume.
86+
mount a local disk directory using k8s `hostPath` volume. You may want to
87+
restrict access of `hostPath` using `pod security policy`.
88+
See [reference](https://github.com/kubernetes/examples/blob/master/staging/podsecuritypolicy/rbac/README.md)
3289
3390
`namenode` is using `hostNetwork` so it can see physical IPs of datanodes
34-
without an overlay network such as weave-net mask them.
91+
without an overlay network such as weave-net masking them.
3592
3693
###Credits
3794

charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,21 +51,67 @@ spec:
5151
- name: hdfs-namenode
5252
image: uhopper/hadoop-namenode:2.7.2
5353
env:
54-
- name: CLUSTER_NAME
55-
value: hdfs-k8s
56-
# We now add custom hadoop configuration provided
54+
# The following env vars are listed according to low-to-high precedence order.
55+
# i.e. Whoever comes last will override the earlier value of the same variable.
56+
{{- if .Values.kerberosEnabled }}
57+
- name: CORE_CONF_hadoop_security_authentication
58+
value: kerberos
59+
- name: CORE_CONF_hadoop_security_authorization
60+
value: "true"
61+
- name: CORE_CONF_hadoop_rpc_protection
62+
value: privacy
63+
- name: HDFS_CONF_dfs_block_access_token_enable
64+
value: "true"
65+
- name: HDFS_CONF_dfs_encrypt_data_transfer
66+
value: "true"
67+
- name: HDFS_CONF_dfs_namenode_kerberos_principal
68+
value: hdfs/_HOST@{{ required "A valid kerberosRealm entry required!" .Values.kerberosRealm }}
69+
- name: HDFS_CONF_dfs_namenode_kerberos_https_principal
70+
value: http/_HOST@{{ required "A valid kerberosRealm entry required!" .Values.kerberosRealm }}
71+
- name: HDFS_CONF_dfs_web_authentication_kerberos_principal
72+
value: http/_HOST@{{ required "A valid kerberosRealm entry required!" .Values.kerberosRealm }}
73+
- name: HDFS_CONF_dfs_namenode_keytab_file
74+
value: /etc/security/hdfs.keytab
75+
{{- end }}
5776
{{- range $key, $value := .Values.customHadoopConfig }}
58-
{{- if ne $key "CLUSTER_NAME" }}
5977
- name: {{ $key | quote }}
6078
value: {{ $value | quote }}
6179
{{- end }}
62-
{{- end }}
80+
- name: CLUSTER_NAME
81+
value: hdfs-k8s
6382
ports:
6483
- containerPort: 8020
6584
name: fs
6685
volumeMounts:
6786
- name: hdfs-name
6887
mountPath: /hadoop/dfs/name
88+
{{- if .Values.kerberosEnabled }}
89+
- name: kerberos-config
90+
mountPath: /etc/krb5.conf
91+
subPath: {{ .Values.kerberosConfigFileName }}
92+
readOnly: true
93+
- name: kerberos-keytab-copy
94+
mountPath: /etc/security/
95+
readOnly: true
96+
{{- end }}
97+
{{- if .Values.kerberosEnabled }}
98+
initContainers:
99+
- name: copy-kerberos-keytab
100+
image: busybox:1.27.1
101+
command: ['sh', '-c']
102+
args:
103+
- cp /kerberos-keytabs/$MY_NODE_NAME.keytab /kerberos-keytab-copy/hdfs.keytab
104+
env:
105+
- name: MY_NODE_NAME
106+
valueFrom:
107+
fieldRef:
108+
fieldPath: spec.nodeName
109+
volumeMounts:
110+
- name: kerberos-keytabs
111+
mountPath: /kerberos-keytabs
112+
- name: kerberos-keytab-copy
113+
mountPath: /kerberos-keytab-copy
114+
{{- end }}
69115
# Pin the pod to a node. You can label your node like below:
70116
# $ kubectl label nodes YOUR-NODE hdfs-namenode-selector=hdfs-namenode-0
71117
nodeSelector:
@@ -75,3 +121,13 @@ spec:
75121
- name: hdfs-name
76122
hostPath:
77123
path: {{ .Values.nameNodeHostPath }}
124+
{{- if .Values.kerberosEnabled }}
125+
- name: kerberos-config
126+
configMap:
127+
name: {{ .Values.kerberosConfigMap }}
128+
- name: kerberos-keytabs
129+
secret:
130+
secretName: {{ .Values.kerberosKeytabsSecret }}
131+
- name: kerberos-keytab-copy
132+
emptyDir: {}
133+
{{- end }}

0 commit comments

Comments
 (0)