diff --git a/demos/hbase-hdfs-load-cycling-data/create-hfile-and-import-to-hbase.yaml b/demos/hbase-hdfs-load-cycling-data/create-hfile-and-import-to-hbase.yaml index 67bc539b..7c561ed3 100644 --- a/demos/hbase-hdfs-load-cycling-data/create-hfile-and-import-to-hbase.yaml +++ b/demos/hbase-hdfs-load-cycling-data/create-hfile-and-import-to-hbase.yaml @@ -28,16 +28,24 @@ spec: - mountPath: /stackable/conf/hbase-env.sh name: config-volume-hbase subPath: hbase-env.sh - command: [ "bash", "-c", "/stackable/hbase/bin/hbase \ + command: + - bash + - -euo + - pipefail + - -c + - | + # https://hbase.apache.org/book.html#tools + /stackable/hbase/bin/hbase \ org.apache.hadoop.hbase.mapreduce.ImportTsv \ -Dimporttsv.separator=, \ -Dimporttsv.columns=HBASE_ROW_KEY,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual \ -Dimporttsv.bulk.output=hdfs://hdfs/data/hfile \ - cycling-tripdata hdfs://hdfs/data/raw/demo-cycling-tripdata.csv.gz \ - && /stackable/hbase/bin/hbase \ + cycling-tripdata hdfs://hdfs/data/raw/demo-cycling-tripdata.csv.gz + + /stackable/hbase/bin/hbase \ org.apache.hadoop.hbase.tool.LoadIncrementalHFiles \ hdfs://hdfs/data/hfile \ - cycling-tripdata" ] # https://hbase.apache.org/book.html#tools + cycling-tripdata volumes: - name: config-volume-hbase configMap: diff --git a/demos/hbase-hdfs-load-cycling-data/distcp-cycling-data.yaml b/demos/hbase-hdfs-load-cycling-data/distcp-cycling-data.yaml index 67cfe784..ad564e6f 100644 --- a/demos/hbase-hdfs-load-cycling-data/distcp-cycling-data.yaml +++ b/demos/hbase-hdfs-load-cycling-data/distcp-cycling-data.yaml @@ -9,9 +9,9 @@ spec: containers: - name: distcp-cycling-data # We use 24.3.0 here which contains the distcp MapReduce components - # This is not included in the 24.7 images and will fail. + # This is not included in the 24.7 and 24.11 images and will fail. # See: https://github.com/stackabletech/docker-images/issues/793 - image: docker.stackable.tech/stackable/hadoop:3.4.0-stackable0.0.0-dev + image: docker.stackable.tech/stackable/hadoop:3.3.6-stackable24.3.0 env: - name: HADOOP_USER_NAME value: stackable @@ -20,7 +20,16 @@ spec: - name: HADOOP_CLASSPATH value: "/stackable/hadoop/share/hadoop/tools/lib/*.jar" # yamllint disable-line rule:line-length - command: ["bash", "-c", "bin/hdfs dfs -mkdir -p /data/raw && bin/hadoop distcp -D fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider s3a://public-backup-nyc-tlc/cycling-tripdata/demo-cycling-tripdata.csv.gz hdfs://hdfs/data/raw"] + command: + - bash + - -euo + - pipefail + - -c + - | + bin/hdfs dfs -mkdir -p /data/raw + bin/hadoop distcp -D fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider \ + s3a://public-backup-nyc-tlc/cycling-tripdata/demo-cycling-tripdata.csv.gz \ + hdfs://hdfs/data/raw volumeMounts: - name: config-volume-hdfs mountPath: /stackable/conf/hdfs diff --git a/docs/modules/demos/pages/hbase-hdfs-load-cycling-data.adoc b/docs/modules/demos/pages/hbase-hdfs-load-cycling-data.adoc index 5ef8d790..1d00ddd3 100644 --- a/docs/modules/demos/pages/hbase-hdfs-load-cycling-data.adoc +++ b/docs/modules/demos/pages/hbase-hdfs-load-cycling-data.adoc @@ -206,7 +206,7 @@ image::hbase-hdfs-load-cycling-data/hbase-table-ui.png[] == Accessing the HDFS web interface -You can also see HDFS details via a UI by running `stackablectl stacklet list` and following the link next to one of the namenodes. +You can also see HDFS details via a UI by running `stackablectl stacklet list` and following the http links next to the namenodes. Below you will see the overview of your HDFS cluster. @@ -218,6 +218,11 @@ image::hbase-hdfs-load-cycling-data/hdfs-datanode.png[] You can also browse the file system by clicking on the `Utilities` tab and selecting `Browse the file system`. +[TIP] +==== +Check that the namenode you browse to is the _active_ namenode in the Overview page. Otherwise you will not be able to browse files. +==== + image::hbase-hdfs-load-cycling-data/hdfs-data.png[] Navigate in the file system to the folder `data` and then the `raw` folder.