Skip to content

Commit 37556b2

Browse files
authored
Merge pull request #515 from AdaptiveScale/cherrypick-release/0.14/PLUGIN-499
CherryPick PLUGIN-499 for release/0.14
2 parents b926531 + d5dbb92 commit 37556b2

File tree

3 files changed

+31
-3
lines changed

3 files changed

+31
-3
lines changed

src/main/java/io/cdap/plugin/gcp/bigquery/source/BigQuerySource.java

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import com.google.cloud.bigquery.StandardTableDefinition;
2525
import com.google.cloud.bigquery.Table;
2626
import com.google.cloud.bigquery.TableDefinition.Type;
27+
import com.google.cloud.bigquery.TableId;
2728
import com.google.cloud.bigquery.TimePartitioning;
2829
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration;
2930
import io.cdap.cdap.api.annotation.Description;
@@ -118,8 +119,7 @@ public void prepareRun(BatchSourceContext context) throws Exception {
118119
Schema configuredSchema = getOutputSchema(collector);
119120

120121
String serviceAccount = config.getServiceAccount();
121-
Credentials credentials = serviceAccount == null ?
122-
null : GCPUtils.loadServiceAccountCredentials(serviceAccount, config.isServiceAccountFilePath());
122+
Credentials credentials = getCredentials(serviceAccount);
123123
BigQuery bigQuery = GCPUtils.getBigQuery(config.getDatasetProject(), credentials);
124124

125125
uuid = UUID.randomUUID();
@@ -163,6 +163,9 @@ public void prepareRun(BatchSourceContext context) throws Exception {
163163
if (config.getViewMaterializationDataset() != null) {
164164
configuration.set(BigQueryConstants.CONFIG_VIEW_MATERIALIZATION_DATASET, config.getViewMaterializationDataset());
165165
}
166+
String temporaryTableName = String.format("_%s_%s", config.getTable(),
167+
UUID.randomUUID().toString().replaceAll("-", "_"));
168+
configuration.set(BigQueryConstants.CONFIG_TEMPORARY_TABLE_NAME, temporaryTableName);
166169

167170
String temporaryGcsPath = String.format("gs://%s/%s/hadoop/input/%s", bucket, uuid, uuid);
168171
PartitionedBigQueryInputFormat.setTemporaryCloudStorageDirectory(configuration, temporaryGcsPath);
@@ -203,6 +206,24 @@ public void transform(KeyValue<LongWritable, GenericData.Record> input, Emitter<
203206

204207
@Override
205208
public void onRunFinish(boolean succeeded, BatchSourceContext context) {
209+
deleteGcsTemporaryDirectory();
210+
deleteBigQueryTemporaryTable();
211+
}
212+
213+
private void deleteBigQueryTemporaryTable() {
214+
String temporaryTable = configuration.get(BigQueryConstants.CONFIG_TEMPORARY_TABLE_NAME);
215+
try {
216+
String serviceAccount = config.getServiceAccount();
217+
Credentials credentials = getCredentials(serviceAccount);
218+
BigQuery bigQuery = GCPUtils.getBigQuery(config.getDatasetProject(), credentials);
219+
bigQuery.delete(TableId.of(config.getProject(), config.getDataset(), temporaryTable));
220+
LOG.debug("Deleted temporary table '{}'", temporaryTable);
221+
} catch (IOException e) {
222+
LOG.error("Failed to load service account credentials: {}", e.getMessage(), e);
223+
}
224+
}
225+
226+
private void deleteGcsTemporaryDirectory() {
206227
org.apache.hadoop.fs.Path gcsPath = null;
207228
String bucket = config.getBucket();
208229
if (bucket == null) {
@@ -221,6 +242,12 @@ public void onRunFinish(boolean succeeded, BatchSourceContext context) {
221242
}
222243
}
223244

245+
@Nullable
246+
private Credentials getCredentials(@Nullable String serviceAccount) throws IOException {
247+
return serviceAccount == null ?
248+
null : GCPUtils.loadServiceAccountCredentials(serviceAccount, config.isServiceAccountFilePath());
249+
}
250+
224251
public Schema getSchema(FailureCollector collector) {
225252
com.google.cloud.bigquery.Schema bqSchema = getBQSchema(collector);
226253
return BigQueryUtil.getTableSchema(bqSchema, collector);

src/main/java/io/cdap/plugin/gcp/bigquery/source/PartitionedBigQueryInputFormat.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ private void processQuery(JobContext context) throws IOException, InterruptedExc
110110
TableReference sourceTable = new TableReference().setDatasetId(datasetId).setProjectId(inputProjectId)
111111
.setTableId(tableName);
112112
String location = bigQueryHelper.getTable(sourceTable).getLocation();
113-
String temporaryTableName = String.format("%s_%s", tableName, UUID.randomUUID().toString().replaceAll("-", "_"));
113+
String temporaryTableName = configuration.get(BigQueryConstants.CONFIG_TEMPORARY_TABLE_NAME);
114114
TableReference exportTableReference = createExportTableReference(type, inputProjectId, datasetId,
115115
temporaryTableName, configuration);
116116
runQuery(bigQueryHelper, inputProjectId, exportTableReference, query, location);

src/main/java/io/cdap/plugin/gcp/bigquery/util/BigQueryConstants.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,5 @@ public interface BigQueryConstants {
4242
String CONFIG_PARTITION_INTEGER_RANGE_START = "cdap.bq.sink.partition.integer.range.start";
4343
String CONFIG_PARTITION_INTEGER_RANGE_END = "cdap.bq.sink.partition.integer.range.end";
4444
String CONFIG_PARTITION_INTEGER_RANGE_INTERVAL = "cdap.bq.sink.partition.integer.range.interval";
45+
String CONFIG_TEMPORARY_TABLE_NAME = "cdap.bq.source.temporary.table.name";
4546
}

0 commit comments

Comments
 (0)