1616
1717package io .cdap .plugin .gcp .gcs .source ;
1818
19+ import com .google .auth .Credentials ;
20+ import com .google .cloud .storage .Storage ;
21+ import com .google .cloud .storage .StorageException ;
1922import com .google .common .base .Strings ;
2023import com .google .gson .Gson ;
2124import com .google .gson .reflect .TypeToken ;
2528import io .cdap .cdap .api .annotation .MetadataProperty ;
2629import io .cdap .cdap .api .annotation .Name ;
2730import io .cdap .cdap .api .annotation .Plugin ;
28- import io .cdap .cdap .api .data .schema .Schema ;
29- import io .cdap .cdap .api .plugin .PluginConfig ;
3031import io .cdap .cdap .etl .api .FailureCollector ;
3132import io .cdap .cdap .etl .api .PipelineConfigurer ;
3233import io .cdap .cdap .etl .api .batch .BatchSource ;
3334import io .cdap .cdap .etl .api .batch .BatchSourceContext ;
3435import io .cdap .cdap .etl .api .connector .Connector ;
36+ import io .cdap .plugin .common .Asset ;
3537import io .cdap .plugin .common .ConfigUtil ;
36- import io .cdap .plugin .common .Constants ;
37- import io .cdap .plugin .common .IdUtils ;
3838import io .cdap .plugin .common .LineageRecorder ;
39- import io .cdap .plugin .format .FileFormat ;
40- import io .cdap .plugin .format .charset .fixedlength .FixedLengthCharset ;
39+ import io .cdap .plugin .common .ReferenceNames ;
4140import io .cdap .plugin .format .input .PathTrackingInputFormat ;
4241import io .cdap .plugin .format .plugin .AbstractFileSource ;
4342import io .cdap .plugin .format .plugin .AbstractFileSourceConfig ;
4443import io .cdap .plugin .format .plugin .FileSourceProperties ;
4544import io .cdap .plugin .gcp .common .GCPConnectorConfig ;
4645import io .cdap .plugin .gcp .common .GCPUtils ;
4746import io .cdap .plugin .gcp .crypto .EncryptedFileSystem ;
48- import io .cdap .plugin .gcp .gcs .Formats ;
4947import io .cdap .plugin .gcp .gcs .GCSPath ;
5048import io .cdap .plugin .gcp .gcs .connector .GCSConnector ;
5149
6765public class GCSSource extends AbstractFileSource <GCSSource .GCSSourceConfig > {
6866 public static final String NAME = "GCSFile" ;
6967 private final GCSSourceConfig config ;
68+ private Asset asset ;
7069
7170 public GCSSource (GCSSourceConfig config ) {
7271 super (config );
@@ -78,6 +77,34 @@ public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
7877 super .configurePipeline (pipelineConfigurer );
7978 }
8079
80+ @ Override
81+ public void prepareRun (BatchSourceContext context ) throws Exception {
82+ // Get location of the source for lineage
83+ String location ;
84+ String bucketName = GCSPath .from (config .getPath ()).getBucket ();
85+ Credentials credentials = config .connection .getServiceAccount () == null ?
86+ null : GCPUtils .loadServiceAccountCredentials (config .connection .getServiceAccount (),
87+ config .connection .isServiceAccountFilePath ());
88+ Storage storage = GCPUtils .getStorage (config .connection .getProject (), credentials );
89+ try {
90+ location = storage .get (bucketName ).getLocation ();
91+ } catch (StorageException e ) {
92+ throw new RuntimeException (
93+ String .format ("Unable to access bucket %s. " , bucketName )
94+ + "Ensure you entered the correct bucket path and have permissions for it." , e );
95+ }
96+
97+ // create asset for lineage
98+ String referenceName = Strings .isNullOrEmpty (config .getReferenceName ())
99+ ? ReferenceNames .normalizeFqn (config .getPath ())
100+ : config .getReferenceName ();
101+ asset = Asset .builder (referenceName )
102+ .setFqn (config .getPath ()).setLocation (location ).build ();
103+
104+ // super is called down here to avoid instantiating the lineage recorder with a null asset
105+ super .prepareRun (context );
106+ }
107+
81108 @ Override
82109 protected Map <String , String > getFileSystemProperties (BatchSourceContext context ) {
83110 Map <String , String > properties = GCPUtils .getFileSystemProperties (config .connection , config .getPath (),
@@ -101,6 +128,11 @@ protected Map<String, String> getFileSystemProperties(BatchSourceContext context
101128 return properties ;
102129 }
103130
131+ @ Override
132+ protected LineageRecorder getLineageRecorder (BatchSourceContext context ) {
133+ return new LineageRecorder (context , asset );
134+ }
135+
104136 @ Override
105137 protected void recordLineage (LineageRecorder lineageRecorder , List <String > outputFields ) {
106138 lineageRecorder .recordRead ("Read" , String .format ("Read%sfrom Google Cloud Storage." ,
0 commit comments