|
29 | 29 | import com.google.cloud.kms.v1.CryptoKeyName; |
30 | 30 | import com.google.cloud.storage.Storage; |
31 | 31 | import com.google.common.base.Strings; |
| 32 | +import com.google.common.collect.ImmutableMap; |
| 33 | +import com.google.gson.Gson; |
32 | 34 | import io.cdap.cdap.api.annotation.Description; |
33 | 35 | import io.cdap.cdap.api.annotation.Metadata; |
34 | 36 | import io.cdap.cdap.api.annotation.MetadataProperty; |
|
46 | 48 | import io.cdap.cdap.etl.api.batch.BatchSource; |
47 | 49 | import io.cdap.cdap.etl.api.batch.BatchSourceContext; |
48 | 50 | import io.cdap.cdap.etl.api.connector.Connector; |
| 51 | +import io.cdap.cdap.etl.api.engine.sql.SQLEngineInput; |
49 | 52 | import io.cdap.cdap.etl.api.validation.ValidationFailure; |
50 | 53 | import io.cdap.plugin.common.LineageRecorder; |
51 | 54 | import io.cdap.plugin.gcp.bigquery.connector.BigQueryConnector; |
| 55 | +import io.cdap.plugin.gcp.bigquery.sqlengine.BigQueryReadDataset; |
| 56 | +import io.cdap.plugin.gcp.bigquery.sqlengine.BigQuerySQLEngine; |
| 57 | +import io.cdap.plugin.gcp.bigquery.sqlengine.BigQueryWrite; |
52 | 58 | import io.cdap.plugin.gcp.bigquery.util.BigQueryConstants; |
53 | 59 | import io.cdap.plugin.gcp.bigquery.util.BigQueryUtil; |
54 | 60 | import io.cdap.plugin.gcp.common.CmekUtils; |
|
61 | 67 |
|
62 | 68 | import java.time.DateTimeException; |
63 | 69 | import java.time.LocalDate; |
| 70 | +import java.util.List; |
64 | 71 | import java.util.UUID; |
65 | 72 | import java.util.stream.Collectors; |
66 | 73 | import javax.annotation.Nullable; |
|
76 | 83 | @Metadata(properties = {@MetadataProperty(key = Connector.PLUGIN_TYPE, value = BigQueryConnector.NAME)}) |
77 | 84 | public final class BigQuerySource extends BatchSource<LongWritable, GenericData.Record, StructuredRecord> { |
78 | 85 | private static final Logger LOG = LoggerFactory.getLogger(BigQuerySource.class); |
| 86 | + private static final Gson GSON = new Gson(); |
79 | 87 | public static final String NAME = "BigQueryTable"; |
80 | 88 | private BigQuerySourceConfig config; |
81 | 89 | private Schema outputSchema; |
@@ -165,7 +173,7 @@ public void prepareRun(BatchSourceContext context) throws Exception { |
165 | 173 | // We call emitLineage before since it creates the dataset with schema. |
166 | 174 | Type sourceTableType = config.getSourceTableType(); |
167 | 175 | emitLineage(context, configuredSchema, sourceTableType, config.getTable()); |
168 | | - setInputFormat(context); |
| 176 | + setInputFormat(context, configuredSchema); |
169 | 177 | } |
170 | 178 |
|
171 | 179 | @Override |
@@ -335,8 +343,31 @@ private void validatePartitionProperties(FailureCollector collector) { |
335 | 343 | } |
336 | 344 | } |
337 | 345 |
|
338 | | - private void setInputFormat(BatchSourceContext context) { |
| 346 | + private void setInputFormat(BatchSourceContext context, |
| 347 | + Schema configuredSchema) { |
| 348 | + // Set input for Spark |
339 | 349 | context.setInput(Input.of(config.referenceName, new BigQueryInputFormatProvider(configuration))); |
| 350 | + |
| 351 | + // Add output for SQL Engine Direct read |
| 352 | + ImmutableMap.Builder<String, String> arguments = new ImmutableMap.Builder<>(); |
| 353 | + |
| 354 | + if (configuredSchema == null) { |
| 355 | + LOG.debug("BigQuery SQL Engine Input was not initialized. Schema was empty."); |
| 356 | + return; |
| 357 | + } |
| 358 | + |
| 359 | + List<String> fieldNames = configuredSchema.getFields().stream().map(f -> f.getName()).collect(Collectors.toList()); |
| 360 | + |
| 361 | + arguments |
| 362 | + .put(BigQueryReadDataset.SQL_INPUT_CONFIG, GSON.toJson(config)) |
| 363 | + .put(BigQueryReadDataset.SQL_INPUT_SCHEMA, GSON.toJson(configuredSchema)) |
| 364 | + .put(BigQueryReadDataset.SQL_INPUT_FIELDS, GSON.toJson(fieldNames)); |
| 365 | + |
| 366 | + Input sqlEngineInput = new SQLEngineInput(config.referenceName, |
| 367 | + context.getStageName(), |
| 368 | + BigQuerySQLEngine.class.getName(), |
| 369 | + arguments.build()); |
| 370 | + context.setInput(sqlEngineInput); |
340 | 371 | } |
341 | 372 |
|
342 | 373 | private void emitLineage(BatchSourceContext context, Schema schema, Type sourceTableType, |
|
0 commit comments