Skip to content

Commit 242309f

Browse files
committed
CDAP-14442 removed parquet, and orc dependencies
Removed parquet and orc dependencies, as the formats have been moved to plugins. Did not remove avro since it is used by the BigQuery plugins. Removed exports for avro, parquet, and orc. Avro is now a private dependency, used only within the source and not required by the program. Also fixed a NPE that would happen if the big query table does not exist when hitting the get schema button.
1 parent 05b09b4 commit 242309f

File tree

3 files changed

+10
-51
lines changed

3 files changed

+10
-51
lines changed

pom.xml

Lines changed: 4 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
<properties>
5858
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
5959
<cdap.version>5.1.0</cdap.version>
60-
<hydrator.version>2.1.0</hydrator.version>
60+
<hydrator.version>2.1.1-SNAPSHOT</hydrator.version>
6161
<guava.version>20.0</guava.version>
6262
<slf4j.version>1.7.5</slf4j.version>
6363
<junit.version>4.12</junit.version>
@@ -67,11 +67,9 @@
6767
<google.cloud.spanner.version>0.53.0-beta</google.cloud.spanner.version>
6868
<spark.version>1.6.1</spark.version>
6969
<google.cloud.speech.version>0.54.0-beta</google.cloud.speech.version>
70-
<parquet.avro.version>1.8.1</parquet.avro.version>
7170
<google.protobuf.java.version>3.4.0</google.protobuf.java.version>
7271
<google.cloud.pubsub.version>1.36.0</google.cloud.pubsub.version>
73-
<orc.mapreduce.version>1.1.0</orc.mapreduce.version>
74-
<avro.mapred.version>1.7.7</avro.mapred.version>
72+
<avro.version>1.7.7</avro.version>
7573
<jackson.core.version>2.8.11.1</jackson.core.version>
7674
</properties>
7775

@@ -97,7 +95,7 @@
9795
<dependency>
9896
<groupId>org.apache.avro</groupId>
9997
<artifactId>avro</artifactId>
100-
<version>1.7.7</version>
98+
<version>${avro.version}</version>
10199
</dependency>
102100
<dependency>
103101
<groupId>co.cask.cdap</groupId>
@@ -265,17 +263,6 @@
265263
<artifactId>google-cloud-speech</artifactId>
266264
<version>${google.cloud.speech.version}</version>
267265
</dependency>
268-
<dependency>
269-
<groupId>org.apache.parquet</groupId>
270-
<artifactId>parquet-avro</artifactId>
271-
<version>${parquet.avro.version}</version>
272-
<exclusions>
273-
<exclusion>
274-
<groupId>org.apache.avro</groupId>
275-
<artifactId>avro</artifactId>
276-
</exclusion>
277-
</exclusions>
278-
</dependency>
279266
<dependency>
280267
<groupId>com.google.protobuf</groupId>
281268
<artifactId>protobuf-java</artifactId>
@@ -286,34 +273,11 @@
286273
<artifactId>google-cloud-pubsub</artifactId>
287274
<version>${google.cloud.pubsub.version}</version>
288275
</dependency>
289-
<dependency>
290-
<groupId>org.apache.orc</groupId>
291-
<artifactId>orc-mapreduce</artifactId>
292-
<version>${orc.mapreduce.version}</version>
293-
<exclusions>
294-
<exclusion>
295-
<groupId>org.apache.hadoop</groupId>
296-
<artifactId>hadoop-hdfs</artifactId>
297-
</exclusion>
298-
<exclusion>
299-
<groupId>org.apache.hadoop</groupId>
300-
<artifactId>hadoop-common</artifactId>
301-
</exclusion>
302-
<exclusion>
303-
<groupId>org.apache.hadoop</groupId>
304-
<artifactId>hadoop-mapreduce-client-core</artifactId>
305-
</exclusion>
306-
<exclusion>
307-
<groupId>org.apache.hive</groupId>
308-
<artifactId>hive-storage-api</artifactId>
309-
</exclusion>
310-
</exclusions>
311-
</dependency>
312276
<dependency>
313277
<groupId>org.apache.avro</groupId>
314278
<artifactId>avro-mapred</artifactId>
315279
<classifier>hadoop2</classifier>
316-
<version>${avro.mapred.version}</version>
280+
<version>${avro.version}</version>
317281
</dependency>
318282
<dependency>
319283
<groupId>com.fasterxml.jackson.core</groupId>
@@ -524,12 +488,7 @@
524488
<!--Only @Plugin classes in the export packages will be included as plugin-->
525489
<_exportcontents>
526490
co.cask.gcp.*;
527-
co.cask.hydrator.format.*;
528491
com.google.cloud.hadoop.*;
529-
org.apache.avro.mapred.*;
530-
org.apache.avro.mapreduce;
531-
org.apache.parquet.avro.*;
532-
org.apache.parquet.hadoop.*;
533492
org.apache.spark.streaming.pubsub*;
534493
</_exportcontents>
535494
</instructions>

src/main/java/co/cask/gcp/bigquery/BigQuerySource.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -165,16 +165,16 @@ public void onRunFinish(boolean succeeded, BatchSourceContext context) {
165165
@Path("getSchema")
166166
public Schema getSchema(BigQuerySourceConfig request) throws Exception {
167167
String dataset = request.getDataset();
168-
String table = request.getTable();
168+
String tableName = request.getTable();
169169
String project = request.getDatasetProject();
170-
Table bqTable = BigQueryUtils.getBigQueryTable(request.getServiceAccountFilePath(), project, dataset, table);
170+
Table table = BigQueryUtils.getBigQueryTable(request.getServiceAccountFilePath(), project, dataset, tableName);
171171
if (table == null) {
172172
// Table does not exist
173173
throw new IllegalArgumentException(String.format("BigQuery table '%s:%s.%s' does not exist",
174-
project, dataset, table));
174+
project, dataset, tableName));
175175
}
176176

177-
com.google.cloud.bigquery.Schema bgSchema = bqTable.getDefinition().getSchema();
177+
com.google.cloud.bigquery.Schema bgSchema = table.getDefinition().getSchema();
178178
if (bgSchema == null) {
179179
throw new IllegalArgumentException(String.format("Cannot read from table '%s:%s.%s' because it has no schema.",
180180
project, dataset, table));
@@ -195,7 +195,7 @@ private void validateOutputSchema() throws IOException {
195195
if (table == null) {
196196
// Table does not exist
197197
throw new IllegalArgumentException(String.format("BigQuery table '%s:%s.%s' does not exist.",
198-
project, dataset, table));
198+
project, dataset, tableName));
199199
}
200200

201201
com.google.cloud.bigquery.Schema bgSchema = table.getDefinition().getSchema();

src/main/java/co/cask/gcp/spanner/sink/SpannerSink.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,11 @@
4242
import com.google.cloud.spanner.Spanner;
4343
import com.google.cloud.spanner.SpannerException;
4444
import com.google.cloud.spanner.Statement;
45+
import com.google.common.base.Strings;
4546
import com.google.spanner.admin.database.v1.CreateDatabaseMetadata;
4647
import com.google.spanner.admin.database.v1.UpdateDatabaseDdlMetadata;
4748
import org.apache.hadoop.conf.Configuration;
4849
import org.apache.hadoop.io.NullWritable;
49-
import org.apache.parquet.Strings;
5050
import org.slf4j.Logger;
5151
import org.slf4j.LoggerFactory;
5252

0 commit comments

Comments
 (0)