Gradiant
diff --git a/‎charts/hdfs/.helmignore‎
Lines changed: 21 additions & 0 deletions b/‎charts/hdfs/.helmignore‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎charts/hdfs/Chart.yaml‎
Lines changed: 14 additions & 0 deletions b/‎charts/hdfs/Chart.yaml‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎charts/hdfs/README.md‎
Lines changed: 83 additions & 0 deletions b/‎charts/hdfs/README.md‎
Lines changed: 83 additions & 0 deletions
diff --git a/‎charts/hdfs/resources/config/bootstrap.sh‎
Lines changed: 48 additions & 0 deletions b/‎charts/hdfs/resources/config/bootstrap.sh‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎charts/hdfs/resources/config/core-site.xml‎
Lines changed: 16 additions & 0 deletions b/‎charts/hdfs/resources/config/core-site.xml‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎charts/hdfs/resources/config/hdfs-site.xml‎
Lines changed: 35 additions & 0 deletions b/‎charts/hdfs/resources/config/hdfs-site.xml‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎charts/hdfs/resources/config/httpfs-signature.secret‎
Lines changed: 1 addition & 0 deletions b/‎charts/hdfs/resources/config/httpfs-signature.secret‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎charts/hdfs/resources/config/httpfs-site.xml‎
Lines changed: 7 additions & 0 deletions b/‎charts/hdfs/resources/config/httpfs-site.xml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎charts/hdfs/resources/config/mapred-site.xml‎
Lines changed: 4 additions & 0 deletions b/‎charts/hdfs/resources/config/mapred-site.xml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎charts/hdfs/resources/config/yarn-site.xml‎
Lines changed: 4 additions & 0 deletions b/‎charts/hdfs/resources/config/yarn-site.xml‎
Lines changed: 4 additions & 0 deletions
@@ -0,0 +1,21 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
@@ -0,0 +1,14 @@
+apiVersion: v1
+appVersion: 2.7.7
+description: The Apache Hadoop software library is a framework that allows for the
+  distributed processing of large data sets across clusters of computers using simple
+  programming models.
+home: https://hadoop.apache.org/
+icon: http://hadoop.apache.org/images/hadoop-logo.jpg
+maintainers:
+- email: cgiraldo@gradiant.org
+  name: cgiraldo
+name: hdfs
+sources:
+- https://github.com/apache/hadoop
+version: 0.1.0
@@ -0,0 +1,83 @@
+
+# Hadoop Chart
+
+** This is the readme from the original hadoop helm chart (https://github.com/helm/charts/tree/master/stable/hadoop) **
+** This version removes yarn manager and provides advanced hadoop configuration through env variables ** 
+
+[Hadoop](https://hadoop.apache.org/) is a framework for running large scale distributed applications.
+
+This chart is primarily intended to be used for YARN and MapReduce job execution where HDFS is just used as a means to transport small artifacts within the framework and not for a distributed filesystem. Data should be read from cloud based datastores such as Google Cloud Storage, S3 or Swift.
+
+## Chart Details
+
+## Installing the Chart
+
+To install the chart with the release name `hadoop` that utilizes 50% of the available node resources:
+
+```
+$ helm install --name hadoop $(stable/hadoop/tools/calc_resources.sh 50) stable/hadoop
+```
+
+> Note that you need at least 2GB of free memory per NodeManager pod, if your cluster isn't large enough, not all pods will be scheduled.
+
+The optional [`calc_resources.sh`](./tools/calc_resources.sh) script is used as a convenience helper to set the `yarn.numNodes`, and `yarn.nodeManager.resources` appropriately to utilize all nodes in the Kubernetes cluster and a given percentage of their resources. For example, with a 3 node `n1-standard-4` GKE cluster and an argument of `50`, this would create 3 NodeManager pods claiming 2 cores and 7.5Gi of memory.
+
+### Persistence
+
+To install the chart with persistent volumes:
+
+```
+$ helm install --name hadoop $(stable/hadoop/tools/calc_resources.sh 50) \
+  --set persistence.nameNode.enabled=true \
+  --set persistence.nameNode.storageClass=standard \
+  --set persistence.dataNode.enabled=true \
+  --set persistence.dataNode.storageClass=standard \
+  stable/hadoop
+```
+
+> Change the value of `storageClass` to match your volume driver. `standard` works for Google Container Engine clusters.
+
+## Configuration
+
+The following table lists the configurable parameters of the Hadoop chart and their default values.
+
+| Parameter                                         | Description                                                                        | Default                                                          |
+| ------------------------------------------------- | -------------------------------                                                    | ---------------------------------------------------------------- |
+| `image.repository`                                | Hadoop image ([source](https://github.com/Comcast/kube-yarn/tree/master/image))    | `danisla/hadoop`                                                 |
+| `image.tag`                                       | Hadoop image tag                                                                   | `2.9.0`                                                          |
+| `imagee.pullPolicy`                               | Pull policy for the images                                                         | `IfNotPresent`                                                   |
+| `hadoopVersion`                                   | Version of hadoop libraries being used                                             | `2.9.0`                                                          |
+| `antiAffinity`                                    | Pod antiaffinity, `hard` or `soft`                                                 | `hard`                                                           |
+| `hdfs.nameNode.pdbMinAvailable`                   | PDB for HDFS NameNode                                                              | `1`                                                              |
+| `hdfs.nameNode.resources`                         | resources for the HDFS NameNode                                                    | `requests:memory=256Mi,cpu=10m,limits:memory=2048Mi,cpu=1000m`   |
+| `hdfs.dataNode.replicas`                          | Number of HDFS DataNode replicas                                                   | `1`                                                              |
+| `hdfs.dataNode.pdbMinAvailable`                   | PDB for HDFS DataNode                                                              | `1`                                                              |
+| `hdfs.dataNode.resources`                         | resources for the HDFS DataNode                                                    | `requests:memory=256Mi,cpu=10m,limits:memory=2048Mi,cpu=1000m`   |
+| `yarn.resourceManager.pdbMinAvailable`            | PDB for the YARN ResourceManager                                                   | `1`                                                              |
+| `yarn.resourceManager.resources`                  | resources for the YARN ResourceManager                                             | `requests:memory=256Mi,cpu=10m,limits:memory=2048Mi,cpu=1000m`   |
+| `yarn.nodeManager.pdbMinAvailable`                | PDB for the YARN NodeManager                                                       | `1`                                                              |
+| `yarn.nodeManager.replicas`                       | Number of YARN NodeManager replicas                                                | `2`                                                              |
+| `yarn.nodeManager.parallelCreate`                 | Create all nodeManager statefulset pods in parallel (K8S 1.7+)                     | `false`                                                          |
+| `yarn.nodeManager.resources`                      | Resource limits and requests for YARN NodeManager pods                             | `requests:memory=2048Mi,cpu=1000m,limits:memory=2048Mi,cpu=1000m`|
+| `persistence.nameNode.enabled`                    | Enable/disable persistent volume                                                   | `false`                                                          |
+| `persistence.nameNode.storageClass`               | Name of the StorageClass to use per your volume provider                           | `-`                                                              |
+| `persistence.nameNode.accessMode`                 | Access mode for the volume                                                         | `ReadWriteOnce`                                                  |
+| `persistence.nameNode.size`                       | Size of the volume                                                                 | `50Gi`                                                           |
+| `persistence.dataNode.enabled`                    | Enable/disable persistent volume                                                   | `false`                                                          |
+| `persistence.dataNode.storageClass`               | Name of the StorageClass to use per your volume provider                           | `-`                                                              |
+| `persistence.dataNode.accessMode`                 | Access mode for the volume                                                         | `ReadWriteOnce`                                                  |
+| `persistence.dataNode.size`                       | Size of the volume                                                                 | `200Gi`                                                          |
+
+## Related charts
+
+The [Zeppelin Notebook](https://github.com/kubernetes/charts/tree/master/stable/zeppelin) chart can use the hadoop config for the hadoop cluster and use the YARN executor:
+
+```
+helm install --set hadoop.useConfigMap=true stable/zeppelin
+```
+
+# References
+
+- This is a variation of the hadoop helm chart of stable helm repo (https://github.com/helm/charts/tree/master/stable/hadoop).
+
+- Original K8S Hadoop adaptation this chart was derived from: https://github.com/Comcast/kube-yarn
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+: ${HADOOP_PREFIX:=/usr/local/hadoop}
+
+. $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
+
+# Directory to find config artifacts
+CONFIG_DIR="/tmp/hadoop-config"
+
+# Copy config files from volume mount
+
+for f in slaves core-site.xml hdfs-site.xml mapred-site.xml yarn-site.xml; do
+    if [[ -e ${CONFIG_DIR}/$f ]]; then
+    cp ${CONFIG_DIR}/$f $HADOOP_PREFIX/etc/hadoop/$f
+    else
+    echo "ERROR: Could not find $f in $CONFIG_DIR"
+    exit 1
+    fi
+done
+
+# installing libraries if any - (resource urls added comma separated to the ACP system variable)
+cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do  echo == $cp; curl -LO $cp ; done; cd -
+if [[ $2 == "namenode" ]]; then
+    if [ ! -d "/dfs/name" ]; then
+    mkdir -p /dfs/name
+    $HADOOP_PREFIX/bin/hdfs namenode -format -force -nonInteractive
+    fi
+    $HADOOP_PREFIX/sbin/hadoop-daemon.sh start namenode
+fi
+if [[ $2 == "datanode" ]]; then
+    if [ ! -d "/dfs/data" ]; then
+    mkdir -p /dfs/data
+    fi
+    #  wait up to 30 seconds for namenode
+    (while [[ $count -lt 15 && -z `curl -sf http://{{ include "hdfs.fullname" . }}-namenode:50070` ]]; do ((count=count+1)) ; echo "Waiting for {{ include "hdfs.fullname" . }}-namenode" ; sleep 2; done && [[ $count -lt 15 ]])
+    [[ $? -ne 0 ]] && echo "Timeout waiting for hdfs namenode, exiting." && exit 1
+
+    $HADOOP_PREFIX/sbin/hadoop-daemon.sh start datanode
+fi
+if [[ $1 == "-d" ]]; then
+    until find ${HADOOP_PREFIX}/logs -mmin -1 | egrep -q '.*'; echo "`date`: Waiting for logs..." ; do sleep 2 ; done
+    tail -F ${HADOOP_PREFIX}/logs/* &
+    while true; do sleep 1000; done
+fi
+
+if [[ $1 == "-bash" ]]; then
+    /bin/bash
+fi
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+    <property><name>fs.defaultFS</name><value>hdfs://{{ include "hdfs.fullname" . }}-namenode:{{ .Values.nameNode.port }}/</value></property>
+    <property><name>hadoop.proxyuser.root.hosts</name>
+            <value>*</value>
+    </property>
+    <property>
+        <name>hadoop.proxyuser.root.groups</name>
+        <value>*</value>
+    </property>
+    {{- range $key, $value := index .Values.conf "coreSite" }}
+
+    <property><name>{{ $key }}</name><value>{{ $value }}</value></property>
+    {{- end }}
+</configuration>
@@ -0,0 +1,35 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+    <property><name>dfs.datanode.use.datanode.hostname</name><value>false</value></property>
+    <property><name>dfs.client.use.datanode.hostname</name><value>false</value></property>
+    <property><name>dfs.datanode.data.dir</name><value>file:///dfs/data</value>
+    <description>DataNode directory</description>
+    </property>
+
+    <property>
+    <name>dfs.namenode.name.dir</name>
+    <value>file:///dfs/name</value>
+    <description>NameNode directory for namespace and transaction logs storage.</description>
+    </property>
+
+    <property>
+    <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
+    <value>false</value>
+    </property>
+
+    <!-- Bind to all interfaces -->
+    <property>
+    <name>dfs.namenode.rpc-bind-host</name>
+    <value>0.0.0.0</value>
+    </property>
+    <property>
+    <name>dfs.namenode.servicerpc-bind-host</name>
+    <value>0.0.0.0</value>
+    </property>
+    <!-- /Bind to all interfaces -->
+    {{- range $key, $value := index .Values.conf "hdfsSite" }}
+    <property><name>{{ $key }}</name><value>{{ $value }}</value></property>
+    {{- end }}
+
+</configuration>
@@ -0,0 +1 @@
+hadoop httpfs secret
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+    {{- range $key, $value := index .Values.conf "httpfsSite" }}
+    <property><name>{{ $key }}</name><value>{{ $value }}</value></property>
+    {{- end }}
+</configuration>
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+</configuration>
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+</configuration>