diff --git a/src/e2e-test/features/gcs/source/GCSSourceError.feature b/src/e2e-test/features/gcs/source/GCSSourceError.feature index c74f1d7c1..5af6f881b 100644 --- a/src/e2e-test/features/gcs/source/GCSSourceError.feature +++ b/src/e2e-test/features/gcs/source/GCSSourceError.feature @@ -11,6 +11,7 @@ Feature: GCS source - Verify GCS Source plugin error scenarios | property | | path | | format | + | referenceName | Scenario: To verify Error message for invalid bucket name Given Open Datafusion Project to configure pipeline diff --git a/src/e2e-test/features/gcs/source/GCSSourceToBigQuery.feature b/src/e2e-test/features/gcs/source/GCSSourceToBigQuery.feature index eebb3a152..967afe456 100644 --- a/src/e2e-test/features/gcs/source/GCSSourceToBigQuery.feature +++ b/src/e2e-test/features/gcs/source/GCSSourceToBigQuery.feature @@ -285,3 +285,268 @@ Feature: GCS source - Verification of GCS to BQ successful data transfer Then Verify the pipeline status is "Succeeded" Then Get count of no of records transferred to target BigQuery Table Then Validate the values of records transferred from GCS bucket file is equal to the values of target BigQuery table + + @GCS_TSV_TEST @BQ_SINK_TEST + Scenario: To verify successful data transfer from GCS source to BigQuery sink using tsv file format + Given Open Datafusion Project to configure pipeline + When Select plugin: "GCS" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "BigQuery" from the plugins list as: "Sink" + Then Connect source as "GCS" and sink as "BigQuery" to establish connection + Then Open GCS source properties + Then Enter GCS property projectId and reference name + Then Override Service account details if set in environment variables + Then Enter GCS source property path "gcsTsvFile" + Then Select GCS property format "tsv" + Then Toggle GCS source property skip header to true + Then Validate output schema with expectedSchema "gcsTsvFileSchema" + Then Validate "GCS" plugin properties + Then Close the GCS properties + Then Open BigQuery sink properties + Then Override Service account details if set in environment variables + Then Enter the BigQuery sink mandatory properties + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + Then Save the pipeline + Then Preview and run the pipeline + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Verify the preview run status of pipeline in the logs is "succeeded" + Then Close the pipeline logs + Then Click on preview data for BigQuery sink + Then Verify preview output schema matches the outputSchema captured in properties + Then Close the preview data + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Get count of no of records transferred to target BigQuery Table + Then Validate the values of records transferred from GCS bucket file is equal to the values of target BigQuery table + + @GCS_PARQUET_TEST @BQ_SINK_TEST + Scenario: To verify successful data transfer from GCS source to BigQuery sink using parquet file format + Given Open Datafusion Project to configure pipeline + When Select plugin: "GCS" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "BigQuery" from the plugins list as: "Sink" + Then Connect source as "GCS" and sink as "BigQuery" to establish connection + Then Open GCS source properties + Then Enter GCS property projectId and reference name + Then Override Service account details if set in environment variables + Then Enter GCS source property path "gcsParquetFile" + Then Select GCS property format "parquet" + Then Validate output schema with expectedSchema "gcsParquetFileSchema" + Then Validate "GCS" plugin properties + Then Close the GCS properties + Then Open BigQuery sink properties + Then Override Service account details if set in environment variables + Then Enter the BigQuery sink mandatory properties + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + Then Save the pipeline + Then Preview and run the pipeline + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Verify the preview run status of pipeline in the logs is "succeeded" + Then Close the pipeline logs + Then Click on preview data for BigQuery sink + Then Verify preview output schema matches the outputSchema captured in properties + Then Close the preview data + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Get count of no of records transferred to target BigQuery Table + Then Validate the values of records transferred from GCS bucket file is equal to the values of target BigQuery table + + @GCS_JSON_TEST @BQ_SINK_TEST + Scenario: To verify successful data transfer from GCS source to BigQuery sink using json file format + Given Open Datafusion Project to configure pipeline + When Select plugin: "GCS" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "BigQuery" from the plugins list as: "Sink" + Then Connect source as "GCS" and sink as "BigQuery" to establish connection + Then Open GCS source properties + Then Enter GCS property projectId and reference name + Then Override Service account details if set in environment variables + Then Enter GCS source property path "gcsJsonFile" + Then Select GCS property format "json" + Then Enter GCS source property output schema "outputSchema" as macro argument "OutSchema" + Then Validate "GCS" plugin properties + Then Close the GCS properties + Then Open BigQuery sink properties + Then Override Service account details if set in environment variables + Then Enter the BigQuery sink mandatory properties + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + Then Save the pipeline + Then Preview and run the pipeline + Then Enter runtime argument value "gcsJsonFileSchema" for key "OutSchema" + Then Run the preview of pipeline with runtime arguments + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Verify the preview run status of pipeline in the logs is "succeeded" + Then Close the pipeline logs + Then Close the preview + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Enter runtime argument value "gcsJsonFileSchema" for key "OutSchema" + Then Run the Pipeline in Runtime with runtime arguments + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Get count of no of records transferred to target BigQuery Table + Then Validate the values of records transferred from GCS bucket file is equal to the values of target BigQuery table + + @GCS_CSV_TEST @BQ_SINK_TEST + Scenario: To verify Successful GCS to BigQuery data transfer with enable data file encryption flag true + Given Open Datafusion Project to configure pipeline + When Select plugin: "GCS" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "BigQuery" from the plugins list as: "Sink" + Then Connect source as "GCS" and sink as "BigQuery" to establish connection + Then Open GCS source properties + Then Enter GCS property projectId and reference name + Then Override Service account details if set in environment variables + Then Enter GCS source property path "gcsCsvFile" + Then Select GCS property format "csv" + Then Toggle GCS source property skip header to true + Then Validate output schema with expectedSchema "gcsCsvFileSchema" + Then Validate "GCS" plugin properties + Then Select radio button plugin property: "encrypted" with value: "true" + Then Close the GCS properties + Then Open BigQuery sink properties + Then Override Service account details if set in environment variables + Then Enter the BigQuery sink mandatory properties + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + Then Save the pipeline + Then Preview and run the pipeline + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Verify the preview run status of pipeline in the logs is "succeeded" + Then Close the pipeline logs + Then Close the preview + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Get count of no of records transferred to target BigQuery Table + Then Validate the values of records transferred from GCS bucket file is equal to the values of target BigQuery table + + @GCS_CSV_TEST @BQ_SINK_TEST @BigQuery_Sink_Required + Scenario:To verify successful records transfer from GCS source to BigQuery sink with macro fields enabled at source + Given Open Datafusion Project to configure pipeline + When Select plugin: "GCS" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "BigQuery" from the plugins list as: "Sink" + Then Open GCS source properties + Then Enter GCS property reference name + Then Enter GCS property "projectId" as macro argument "gcsProjectId" + Then Enter GCS property "serviceAccountType" as macro argument "serviceAccountType" + Then Enter GCS property "serviceAccountFilePath" as macro argument "serviceAccount" + Then Enter GCS property "serviceAccountJSON" as macro argument "serviceAccount" + Then Enter GCS property "path" as macro argument "gcsSourcePath" + Then Enter GCS property "format" as macro argument "gcsFormat" + Then Enter GCS source property "skipHeader" as macro argument "gcsSkipHeader" + Then Click on the Macro button of Property: "sampleSize" and set the value to: "SampleSize" + Then Click on the Macro button of Property: "override" and set the value to: "OverRide" + Then Click on the Macro button of Property: "minSplitSize" and set the value to: "MinSplit" + Then Click on the Macro button of Property: "maxSplitSize" and set the value to: "MaxSplit" + Then Click on the Macro button of Property: "fileRegex" and set the value to: "FileReg" + Then Click on the Macro button of Property: "pathField" and set the value to: "PathF" + Then Click on the Macro button of Property: "filenameOnly" and set the value to: "FilenameOnly" + Then Click on the Macro button of Property: "recursive" and set the value to: "ReadFilesRecursively" + Then Click on the Macro button of Property: "ignoreNonExistingFolders" and set the value to: "IgnoreNonExistingFolders" + Then Click on the Macro button of Property: "encrypted" and set the value to: "DataFileEncrypted" + Then Click on the Macro button of Property: "encryptedMetadataSuffix" and set the value to: "testmeta" + Then Click on the Macro button of Property: "fileSystemProperties" and set the value to: "FileSystemPr" + Then Click on the Macro button of Property: "fileEncoding" and set the value to: "Encode" + Then Enter GCS source property output schema "outputSchema" as macro argument "gcsOutputSchema" + Then Validate "GCS" plugin properties + Then Close the GCS properties + Then Open BigQuery sink properties + Then Enter BigQuery property reference name + Then Enter BigQuery property "projectId" as macro argument "bqProjectId" + Then Enter BigQuery property "datasetProjectId" as macro argument "bqDatasetProjectId" + Then Enter GCS property "serviceAccountType" as macro argument "serviceAccountType" + Then Enter GCS property "serviceAccountFilePath" as macro argument "serviceAccount" + Then Enter GCS property "serviceAccountJSON" as macro argument "serviceAccount" + Then Enter BigQuery property "dataset" as macro argument "bqDataset" + Then Enter BigQuery property "table" as macro argument "bqTargetTable" + Then Enter BigQuery sink property "truncateTable" as macro argument "bqTruncateTable" + Then Enter BigQuery sink property "updateTableSchema" as macro argument "bqUpdateTableSchema" + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + Then Connect source as "GCS" and sink as "BigQuery" to establish connection + Then Save the pipeline + Then Preview and run the pipeline + Then Enter runtime argument value "projectId" for key "gcsProjectId" + Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" + Then Enter runtime argument value "serviceAccount" for key "serviceAccount" + Then Enter runtime argument value "gcsCsvFile" for GCS source property path key "gcsSourcePath" + Then Enter runtime argument value "gcsSkipHeaderTrue" for key "gcsSkipHeader" + Then Enter runtime argument value "csvFormat" for key "gcsFormat" + Then Enter runtime argument value "sampleSize" for key "SampleSize" + Then Enter runtime argument value "gcsOverrideField" for key "OverRide" + Then Enter runtime argument value "gcsMinSplitSize" for key "MinSplit" + Then Enter runtime argument value "gcsMaxSplitSize" for key "MaxSplit" + Then Enter runtime argument value "fileRegex" for key "FileReg" + Then Enter runtime argument value "gcsPathField" for key "PathF" + Then Enter runtime argument value "filenameOnly" for GCS source property path key "FilenameOnly" + Then Enter runtime argument value "recursive" for GCS source property path key "ReadFilesRecursively" + Then Enter runtime argument value "ignoreNonExistingFolders" for GCS source property path key "IgnoreNonExistingFolders" + Then Enter runtime argument value "encrypted" for GCS source property path key "DataFileEncrypted" + Then Enter runtime argument value "encryptedMetadataSuffix" for GCS source property path key "testmeta" + Then Enter runtime argument value "gcsCSVFileSysProperty" for key "FileSystemPr" + Then Enter runtime argument value "fileEncoding" for key "Encode" + Then Enter runtime argument value "gcsPathFieldOutputSchema" for key "gcsOutputSchema" + Then Enter runtime argument value "projectId" for key "bqProjectId" + Then Enter runtime argument value "projectId" for key "bqDatasetProjectId" + Then Enter runtime argument value "dataset" for key "bqDataset" + Then Enter runtime argument value for BigQuery sink table name key "bqTargetTable" + Then Enter runtime argument value "bqTruncateTableTrue" for key "bqTruncateTable" + Then Enter runtime argument value "bqUpdateTableSchemaTrue" for key "bqUpdateTableSchema" + Then Run the preview of pipeline with runtime arguments + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Verify the preview run status of pipeline in the logs is "succeeded" + Then Close the pipeline logs + Then Close the preview + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Enter runtime argument value "projectId" for key "gcsProjectId" + Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" + Then Enter runtime argument value "serviceAccount" for key "serviceAccount" + Then Enter runtime argument value "gcsCsvFile" for GCS source property path key "gcsSourcePath" + Then Enter runtime argument value "gcsSkipHeaderTrue" for key "gcsSkipHeader" + Then Enter runtime argument value "csvFormat" for key "gcsFormat" + Then Enter runtime argument value "sampleSize" for key "SampleSize" + Then Enter runtime argument value "gcsOverrideField" for key "OverRide" + Then Enter runtime argument value "gcsMinSplitSize" for key "MinSplit" + Then Enter runtime argument value "gcsMaxSplitSize" for key "MaxSplit" + Then Enter runtime argument value "fileRegex" for key "FileReg" + Then Enter runtime argument value "gcsPathField" for key "PathF" + Then Enter runtime argument value "filenameOnly" for GCS source property path key "FilenameOnly" + Then Enter runtime argument value "recursive" for GCS source property path key "ReadFilesRecursively" + Then Enter runtime argument value "ignoreNonExistingFolders" for GCS source property path key "IgnoreNonExistingFolders" + Then Enter runtime argument value "encrypted" for GCS source property path key "DataFileEncrypted" + Then Enter runtime argument value "encryptedMetadataSuffix" for GCS source property path key "testmeta" + Then Enter runtime argument value "gcsCSVFileSysProperty" for key "FileSystemPr" + Then Enter runtime argument value "fileEncoding" for key "Encode" + Then Enter runtime argument value "gcsPathFieldOutputSchema" for key "gcsOutputSchema" + Then Enter runtime argument value "projectId" for key "bqProjectId" + Then Enter runtime argument value "projectId" for key "bqDatasetProjectId" + Then Enter runtime argument value "dataset" for key "bqDataset" + Then Enter runtime argument value for BigQuery sink table name key "bqTargetTable" + Then Enter runtime argument value "bqTruncateTableTrue" for key "bqTruncateTable" + Then Enter runtime argument value "bqUpdateTableSchemaTrue" for key "bqUpdateTableSchema" + Then Run the Pipeline in Runtime with runtime arguments + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Get count of no of records transferred to target BigQuery Table + Then Validate the values of records transferred from GCS bucket file is equal to the values of target BigQuery table diff --git a/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java b/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java index 8662619b2..cee5d0846 100644 --- a/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java +++ b/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java @@ -134,6 +134,11 @@ public static void createBucketWithTSVFile() throws IOException, URISyntaxExcept gcsSourceBucketName = createGCSBucketWithFile(PluginPropertyUtils.pluginProp("gcsTsvFile")); } + @Before(order = 1, value = "@GCS_JSON_TEST") + public static void createBucketWithJSONFile() throws IOException, URISyntaxException { + gcsSourceBucketName = createGCSBucketWithFile(PluginPropertyUtils.pluginProp("gcsJsonFile")); + } + @Before(order = 1, value = "@GCS_BLOB_TEST") public static void createBucketWithBlobFile() throws IOException, URISyntaxException { gcsSourceBucketName = createGCSBucketWithFile(PluginPropertyUtils.pluginProp("gcsBlobFile")); @@ -205,7 +210,7 @@ public static void createBucketWithAvroTestFile() throws IOException, URISyntaxE "or @GCS_DELIMITED_TEST or @GCS_TEXT_TEST or @GCS_OUTPUT_FIELD_TEST or @GCS_DATATYPE_1_TEST or " + "@GCS_DATATYPE_2_TEST or @GCS_READ_RECURSIVE_TEST or @GCS_DELETE_WILDCARD_TEST or @GCS_CSV_RANGE_TEST or" + " @GCS_PARQUET_TEST or @GCS_AVRO_TEST or @GCS_DATATYPE_TEST or @GCS_AVRO_FILE or @GCS_CSV or " + - "GCS_MULTIPLE_FILES_TEST or GCS_MULTIPLE_FILES_REGEX_TEST") + "GCS_MULTIPLE_FILES_TEST or GCS_MULTIPLE_FILES_REGEX_TEST or @GCS_JSON_TEST") public static void deleteSourceBucketWithFile() { deleteGCSBucket(gcsSourceBucketName); PluginPropertyUtils.removePluginProp("gcsSourceBucketName"); diff --git a/src/e2e-test/resources/pluginParameters.properties b/src/e2e-test/resources/pluginParameters.properties index aae33e0e8..2c64e16f2 100644 --- a/src/e2e-test/resources/pluginParameters.properties +++ b/src/e2e-test/resources/pluginParameters.properties @@ -45,7 +45,6 @@ gcsWildcardPath2=testdata/GCS_WILDCARD_TEST/wildcard* gcsWildcardPath3=testdata/GCS_WILDCARD_TEST/test* gcsWildcardMultiBucketsPath1=testdata/GCS_RECURSIVE_TEST/*.csv;\ testdata/GCS_RECURSIVE_TEST/recursiveFile2* - gcsOverrideField=id gcsOverrideInt_FloatSchema=[{"key":"id","value":"float"},{"key":"name","value":"string"},\ {"key":"yearofbirth","value":"int"},{"key":"isdeleted","value":"boolean"},{"key":"email","value":"string"},\ @@ -110,6 +109,7 @@ gcsDataTypeTest2File=testdata/GCS_DATATYPE_TEST_2.csv gcsReadRecursivePath=testdata/GCS_RECURSIVE_TEST gcsReadWildcardPath=testdata/GCS_WILDCARD_TEST,testdata/GCS_WILDCARD_TEST/test gcsFileSysProperty={"textinputformat.record.delimiter": "@"} +gcsCSVFileSysProperty={"csvinputformat.record.csv": "1"} gcsDatatypeChange=[{"key":"createddate","value":"datetime"},{"key":"revenue","value":"double"},\ {"key":"points","value":"decimal"},{"key":"BytesData","value":"bytes"}] gcsDataTypeTestFileSchema=[{"key":"id","value":"int"},{"key":"name","value":"string"},\ @@ -159,6 +159,22 @@ gcsParquetFileSchema=[{"key":"workforce","value":"string"},{"key":"report_year", {"key":"race_black","value":"long"},{"key":"race_hispanic_latinx","value":"long"},\ {"key":"race_native_american","value":"long"},{"key":"race_white","value":"long"},\ {"key":"tablename","value":"string"}] +gcsJsonFile=testdata/GCS_JSON_TEST.json +gcsJsonFileSchema={ "type": "record", "name": "text", "fields": [ { "name":"user", "type": "string" }, { "name": "age", "type": "int" }, { "name": "city", "type": "string" } ] } +sampleSize=1000 +fileRegex=.*\.csv$ +pathField=Employeename +minSplitSize=100 +maxSplitSize=120 +encrypted=false +recursive=false +ignoreNonExistingFolders=false +filenameOnly=false +fileEncoding=UTF-8 +encryptedMetadataSuffix=.metadata +gcsPathFieldOutputSchema={ "type": "record", "name": "text", "fields": [ \ + { "name": "EmployeeDepartment", "type": "string" }, { "name": "Employeename", "type": "string" }, \ + { "name": "Salary", "type": "int" }, { "name": "wotkhours", "type": "int" }, { "name": "pathFieldColumn", "type": "string" } ] } ## GCS-PLUGIN-PROPERTIES-END ## BIGQUERY-PLUGIN-PROPERTIES-START diff --git a/src/e2e-test/resources/testdata/GCS_JSON_TEST.json b/src/e2e-test/resources/testdata/GCS_JSON_TEST.json new file mode 100755 index 000000000..2b7c21e6a --- /dev/null +++ b/src/e2e-test/resources/testdata/GCS_JSON_TEST.json @@ -0,0 +1 @@ +{"user": "Alice","age": "25","city": "New York"}