Skip to content

Commit a4f7152

Browse files
committed
CDAP-14481 add get schema button for gcs file source
1 parent e012cd6 commit a4f7152

File tree

5 files changed

+28
-3
lines changed

5 files changed

+28
-3
lines changed

docs/BigQueryTable-batchsource.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ must be readable by all users running the job.
2121

2222
Properties
2323
----------
24-
**Reference Name:** Name used to uniquely identify this sink for lineage, annotating metadata, etc.
24+
**Reference Name:** Name used to uniquely identify this source for lineage, annotating metadata, etc.
2525

2626
**Project ID**: Google Cloud Project ID, which uniquely identifies a project.
2727
It can be found on the Dashboard in the Google Cloud Platform Console.

docs/GCSFile-batchsource.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ must be readable by all users running the job.
2323

2424
Properties
2525
----------
26-
**Reference Name:** Name used to uniquely identify this sink for lineage, annotating metadata, etc.
26+
**Reference Name:** Name used to uniquely identify this source for lineage, annotating metadata, etc.
2727

2828
**Project ID**: Google Cloud Project ID, which uniquely identifies a project.
2929
It can be found on the Dashboard in the Google Cloud Platform Console.

docs/Spanner-batchsource.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ must be readable by all users running the job.
2121

2222
Properties
2323
----------
24-
**Reference Name:** Name used to uniquely identify this sink for lineage, annotating metadata, etc.
24+
**Reference Name:** Name used to uniquely identify this source for lineage, annotating metadata, etc.
2525

2626
**Project ID**: Google Cloud Project ID, which uniquely identifies a project.
2727
It can be found on the Dashboard in the Google Cloud Platform Console.

src/main/java/co/cask/gcp/gcs/source/GCSSource.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import co.cask.cdap.api.annotation.Name;
2222
import co.cask.cdap.api.annotation.Plugin;
2323
import co.cask.cdap.api.data.schema.Schema;
24+
import co.cask.cdap.api.plugin.EndpointPluginContext;
2425
import co.cask.cdap.etl.api.PipelineConfigurer;
2526
import co.cask.cdap.etl.api.batch.BatchSource;
2627
import co.cask.cdap.etl.api.batch.BatchSourceContext;
@@ -41,6 +42,7 @@
4142
import java.util.Map;
4243
import java.util.regex.Pattern;
4344
import javax.annotation.Nullable;
45+
import javax.ws.rs.Path;
4446

4547
/**
4648
* Class description here.
@@ -87,6 +89,23 @@ protected void recordLineage(LineageRecorder lineageRecorder, List<String> outpu
8789
lineageRecorder.recordRead("Read", "Read from Google Cloud Storage.", outputFields);
8890
}
8991

92+
/**
93+
* Endpoint method to get the output schema of a source.
94+
*
95+
* @param config configuration for the source
96+
* @param pluginContext context to create plugins
97+
* @return schema of fields
98+
*/
99+
@Path("getSchema")
100+
public Schema getSchema(GCSSourceConfig config, EndpointPluginContext pluginContext) {
101+
FileFormat fileFormat = config.getFormat();
102+
if (fileFormat == null) {
103+
return config.getSchema();
104+
}
105+
Schema schema = fileFormat.getSchema(config.getPathField());
106+
return schema == null ? config.getSchema() : schema;
107+
}
108+
90109
/**
91110
* Config for the plugin.
92111
*/

widgets/GCSFile-batchsource.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@
4747
"tsv"
4848
],
4949
"default": "text"
50+
},
51+
"plugin-function": {
52+
"method": "POST",
53+
"widget": "outputSchema",
54+
"output-property": "schema",
55+
"plugin-method": "getSchema"
5056
}
5157
},
5258
{

0 commit comments

Comments
 (0)