Skip to content

Commit 9f9f38c

Browse files
authored
Update apache-hive-migrate-workloads.md
Fix typo and script formatting
1 parent 0b21556 commit 9f9f38c

File tree

1 file changed

+42
-42
lines changed

1 file changed

+42
-42
lines changed

articles/hdinsight/interactive-query/apache-hive-migrate-workloads.md

Lines changed: 42 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -34,56 +34,56 @@ Create a new copy of your external metastore. If you're using an external metast
3434

3535
If you're using the internal metastore, you can use queries to export object definitions in the Hive metastore, and import them into a new database.
3636

37-
Note: For ACID tables, a new copy of the data will be created. Once this script is complete, it is assumed that the old cluster will not longer be used for accessing the data/metadata referred to in the script.
37+
Note: For ACID tables, a new copy of the data will be created. Once this script is complete, it is assumed that the old cluster will no longer be used for accessing the data/metadata referred to in the script.
3838

3939
1. Connect to the HDInsight cluster by using a [Secure Shell (SSH) client](../hdinsight-hadoop-linux-use-ssh-unix.md).
4040

4141
1. Connect to HiveServer2 with your [Beeline client](../hadoop/apache-hadoop-use-hive-beeline.md) from your open SSH session by entering the following command:
4242

4343
```hiveql
44-
for d in `beeline -u "jdbc:hive2://localhost:10001/;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show databases;"`;
45-
do
46-
echo "Scanning Database: $d"
47-
echo "create database if not exists $d; use $d;" >> alltables.hql;
48-
for t in `beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show tables;"`;
44+
for d in `beeline -u "jdbc:hive2://localhost:10001/;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show databases;"`;
4945
do
50-
echo "Copying Table: $t"
51-
ddl=`beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show create table $t;"`;
52-
53-
echo "$ddl;" >> alltables.hql;
54-
lowerddl=$(echo $ddl | awk '{print tolower($0)}')
55-
if [[ $lowerddl == *"'transactional'='true'"* ]]; then
56-
if [[ $lowerddl == *"partitioned by"* ]]; then
57-
# partitioned
58-
raw_cols=$(beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show create table $t;" | tr '\n' ' ' | grep -io "CREATE TABLE .*" | cut -d"(" -f2- | cut -f1 -d")" | sed 's/`//g');
59-
ptn_cols=$(beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show create table $t;" | tr '\n' ' ' | grep -io "PARTITIONED BY .*" | cut -f1 -d")" | cut -d"(" -f2- | sed 's/`//g');
60-
final_cols=$(echo "(" $raw_cols "," $ptn_cols ")")
61-
62-
beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "create external table ext_$t $final_cols TBLPROPERTIES ('transactional'='false');";
63-
beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "insert into ext_$t select * from $t;";
64-
staging_ddl=`beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show create table ext_$t;"`;
65-
dir=$(echo $staging_ddl | grep -io " LOCATION .*" | grep -m1 -o "'.*" | sed "s/'[^-]*//2g" | cut -c2-);
66-
67-
parsed_ptn_cols=$(echo $ptn_cols| sed 's/ [a-z]*,/,/g' | sed '$s/\w*$//g');
68-
echo "create table flattened_$t $final_cols;" >> alltables.hql;
69-
echo "load data inpath '$dir' into table flattened_$t;" >> alltables.hql;
70-
echo "insert into $t partition($parsed_ptn_cols) select * from flattened_$t;" >> alltables.hql;
71-
echo "drop table flattened_$t;" >> alltables.hql;
72-
beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "drop table ext_$t";
73-
else
74-
# not partitioned
75-
beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "create external table ext_$t like $t TBLPROPERTIES ('transactional'='false');";
76-
staging_ddl=`beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show create table ext_$t;"`;
77-
dir=$(echo $staging_ddl | grep -io " LOCATION .*" | grep -m1 -o "'.*" | sed "s/'[^-]*//2g" | cut -c2-);
78-
79-
beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "insert into ext_$t select * from $t;";
80-
echo "load data inpath '$dir' into table $t;" >> alltables.hql;
81-
beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "drop table ext_$t";
46+
echo "Scanning Database: $d"
47+
echo "create database if not exists $d; use $d;" >> alltables.hql;
48+
for t in `beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show tables;"`;
49+
do
50+
echo "Copying Table: $t"
51+
ddl=`beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show create table $t;"`;
52+
53+
echo "$ddl;" >> alltables.hql;
54+
lowerddl=$(echo $ddl | awk '{print tolower($0)}')
55+
if [[ $lowerddl == *"'transactional'='true'"* ]]; then
56+
if [[ $lowerddl == *"partitioned by"* ]]; then
57+
# partitioned
58+
raw_cols=$(beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show create table $t;" | tr '\n' ' ' | grep -io "CREATE TABLE .*" | cut -d"(" -f2- | cut -f1 -d")" | sed 's/`//g');
59+
ptn_cols=$(beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show create table $t;" | tr '\n' ' ' | grep -io "PARTITIONED BY .*" | cut -f1 -d")" | cut -d"(" -f2- | sed 's/`//g');
60+
final_cols=$(echo "(" $raw_cols "," $ptn_cols ")")
61+
62+
beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "create external table ext_$t $final_cols TBLPROPERTIES ('transactional'='false');";
63+
beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "insert into ext_$t select * from $t;";
64+
staging_ddl=`beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show create table ext_$t;"`;
65+
dir=$(echo $staging_ddl | grep -io " LOCATION .*" | grep -m1 -o "'.*" | sed "s/'[^-]*//2g" | cut -c2-);
66+
67+
parsed_ptn_cols=$(echo $ptn_cols| sed 's/ [a-z]*,/,/g' | sed '$s/\w*$//g');
68+
echo "create table flattened_$t $final_cols;" >> alltables.hql;
69+
echo "load data inpath '$dir' into table flattened_$t;" >> alltables.hql;
70+
echo "insert into $t partition($parsed_ptn_cols) select * from flattened_$t;" >> alltables.hql;
71+
echo "drop table flattened_$t;" >> alltables.hql;
72+
beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "drop table ext_$t";
73+
else
74+
# not partitioned
75+
beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "create external table ext_$t like $t TBLPROPERTIES ('transactional'='false');";
76+
staging_ddl=`beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show create table ext_$t;"`;
77+
dir=$(echo $staging_ddl | grep -io " LOCATION .*" | grep -m1 -o "'.*" | sed "s/'[^-]*//2g" | cut -c2-);
78+
79+
beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "insert into ext_$t select * from $t;";
80+
echo "load data inpath '$dir' into table $t;" >> alltables.hql;
81+
beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "drop table ext_$t";
82+
fi
8283
fi
83-
fi
84-
echo "$ddl" | grep -q "PARTITIONED\s*BY" && echo "MSCK REPAIR TABLE $t;" >> alltables.hql;
85-
done;
86-
done
84+
echo "$ddl" | grep -q "PARTITIONED\s*BY" && echo "MSCK REPAIR TABLE $t;" >> alltables.hql;
85+
done;
86+
done
8787
```
8888
8989
This command generates a file named **alltables.hql**.

0 commit comments

Comments
 (0)