CDAP-14442 removed parquet, and orc dependencies

albertshau · albertshau · commit 242309f0a848 · 2018-11-05T15:45:48.000-08:00
Removed parquet and orc dependencies, as the formats have been
moved to plugins. Did not remove avro since it is used by the
BigQuery plugins.

Removed exports for avro, parquet, and orc. Avro is now a private
dependency, used only within the source and not required by
the program.

Also fixed a NPE that would happen if the big query table does not
exist when hitting the get schema button.
diff --git a/pom.xml b/pom.xml
@@ -57,7 +57,7 @@
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <cdap.version>5.1.0</cdap.version>
-    <hydrator.version>2.1.0</hydrator.version>
+    <hydrator.version>2.1.1-SNAPSHOT</hydrator.version>
     <guava.version>20.0</guava.version>
     <slf4j.version>1.7.5</slf4j.version>
     <junit.version>4.12</junit.version>
@@ -67,11 +67,9 @@
     <google.cloud.spanner.version>0.53.0-beta</google.cloud.spanner.version>
     <spark.version>1.6.1</spark.version>
     <google.cloud.speech.version>0.54.0-beta</google.cloud.speech.version>
-    <parquet.avro.version>1.8.1</parquet.avro.version>
     <google.protobuf.java.version>3.4.0</google.protobuf.java.version>
     <google.cloud.pubsub.version>1.36.0</google.cloud.pubsub.version>
-    <orc.mapreduce.version>1.1.0</orc.mapreduce.version>
-    <avro.mapred.version>1.7.7</avro.mapred.version>
+    <avro.version>1.7.7</avro.version>
     <jackson.core.version>2.8.11.1</jackson.core.version>
   </properties>
 
@@ -97,7 +95,7 @@
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro</artifactId>
-      <version>1.7.7</version>
+      <version>${avro.version}</version>
     </dependency>
     <dependency>
       <groupId>co.cask.cdap</groupId>
@@ -265,17 +263,6 @@
       <artifactId>google-cloud-speech</artifactId>
       <version>${google.cloud.speech.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.parquet</groupId>
-      <artifactId>parquet-avro</artifactId>
-      <version>${parquet.avro.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
     <dependency>
       <groupId>com.google.protobuf</groupId>
       <artifactId>protobuf-java</artifactId>
@@ -286,34 +273,11 @@
       <artifactId>google-cloud-pubsub</artifactId>
       <version>${google.cloud.pubsub.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.orc</groupId>
-      <artifactId>orc-mapreduce</artifactId>
-      <version>${orc.mapreduce.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-hdfs</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-common</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-mapreduce-client-core</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-storage-api</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro-mapred</artifactId>
       <classifier>hadoop2</classifier>
-      <version>${avro.mapred.version}</version>
+      <version>${avro.version}</version>
     </dependency>
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
@@ -524,12 +488,7 @@
             <!--Only @Plugin classes in the export packages will be included as plugin-->
             <_exportcontents>
               co.cask.gcp.*;
-              co.cask.hydrator.format.*;
               com.google.cloud.hadoop.*;
-              org.apache.avro.mapred.*;
-              org.apache.avro.mapreduce;
-              org.apache.parquet.avro.*;
-              org.apache.parquet.hadoop.*;
               org.apache.spark.streaming.pubsub*;
             </_exportcontents>
           </instructions>
diff --git a/src/main/java/co/cask/gcp/bigquery/BigQuerySource.java b/src/main/java/co/cask/gcp/bigquery/BigQuerySource.java
@@ -165,16 +165,16 @@ public void onRunFinish(boolean succeeded, BatchSourceContext context) {
   @Path("getSchema")
   public Schema getSchema(BigQuerySourceConfig request) throws Exception {
     String dataset = request.getDataset();
-    String table = request.getTable();
+    String tableName = request.getTable();
     String project = request.getDatasetProject();
-    Table bqTable = BigQueryUtils.getBigQueryTable(request.getServiceAccountFilePath(), project, dataset, table);
+    Table table = BigQueryUtils.getBigQueryTable(request.getServiceAccountFilePath(), project, dataset, tableName);
     if (table == null) {
       // Table does not exist
       throw new IllegalArgumentException(String.format("BigQuery table '%s:%s.%s' does not exist",
-                                                       project, dataset, table));
+                                                       project, dataset, tableName));
     }
 
-    com.google.cloud.bigquery.Schema bgSchema = bqTable.getDefinition().getSchema();
+    com.google.cloud.bigquery.Schema bgSchema = table.getDefinition().getSchema();
     if (bgSchema == null) {
       throw new IllegalArgumentException(String.format("Cannot read from table '%s:%s.%s' because it has no schema.",
                                                        project, dataset, table));
@@ -195,7 +195,7 @@ private void validateOutputSchema() throws IOException {
     if (table == null) {
       // Table does not exist
       throw new IllegalArgumentException(String.format("BigQuery table '%s:%s.%s' does not exist.",
-                                                       project, dataset, table));
+                                                       project, dataset, tableName));
     }
 
     com.google.cloud.bigquery.Schema bgSchema = table.getDefinition().getSchema();
diff --git a/src/main/java/co/cask/gcp/spanner/sink/SpannerSink.java b/src/main/java/co/cask/gcp/spanner/sink/SpannerSink.java
@@ -42,11 +42,11 @@
 import com.google.cloud.spanner.Spanner;
 import com.google.cloud.spanner.SpannerException;
 import com.google.cloud.spanner.Statement;
+import com.google.common.base.Strings;
 import com.google.spanner.admin.database.v1.CreateDatabaseMetadata;
 import com.google.spanner.admin.database.v1.UpdateDatabaseDdlMetadata;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.NullWritable;
-import org.apache.parquet.Strings;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;