From 045e76ae5a1ba22fb7388e10c57aeb3def64629e Mon Sep 17 00:00:00 2001 From: Nivaldo Humberto Oliveira Tokuda Date: Tue, 27 Apr 2021 17:01:46 -0300 Subject: [PATCH 01/15] Add Firestore source --- megalist_dataflow/main.py | 14 +- megalist_dataflow/models/options.py | 2 + .../sources/firestore_execution_source.py | 127 ++++++++++++++++++ 3 files changed, 140 insertions(+), 3 deletions(-) create mode 100644 megalist_dataflow/sources/firestore_execution_source.py diff --git a/megalist_dataflow/main.py b/megalist_dataflow/main.py index 7458a2ca..f0ff49e3 100644 --- a/megalist_dataflow/main.py +++ b/megalist_dataflow/main.py @@ -21,6 +21,7 @@ from mappers.ads_ssd_hashing_mapper import AdsSSDHashingMapper from mappers.ads_user_list_pii_hashing_mapper import AdsUserListPIIHashingMapper from sources.spreadsheet_execution_source import SpreadsheetExecutionSource +from sources.firestore_execution_source import FirestoreExecutionSource from sources.batches_from_executions import BatchesFromExecutions from uploaders.appsflyer.appsflyer_s2s_uploader_async import AppsFlyerS2SUploaderDoFn from uploaders.campaign_manager.campaign_manager_conversion_uploader import CampaignManagerConversionUploaderDoFn @@ -185,11 +186,18 @@ def run(argv=None): dataflow_options.access_token, dataflow_options.refresh_token) - sheets_config = SheetsConfig(oauth_credentials) + if dataflow_options.setup_sheet_id.is_accessible(): + sheets_config = SheetsConfig(oauth_credentials) with beam.Pipeline(options=pipeline_options) as pipeline: - executions = (pipeline | 'Load executions' >> beam.io.Read( - SpreadsheetExecutionSource(sheets_config, dataflow_options.setup_sheet_id))) + if dataflow_options.setup_sheet_id.is_accessible(): + executions = (pipeline | 'Load executions' >> beam.io.Read( + SpreadsheetExecutionSource(sheets_config, dataflow_options.setup_sheet_id))) + elif dataflow_options.setup_firestore_collection.is_accessible(): + executions = (pipeline | 'Load executions' >> beam.io.Read( + FirestoreExecutionSource(dataflow_options.setup_firestore_collection))) + else: + raise Exception('No valid parameter source (setup_sheet_id/setup_firestore_collection) included in the arguments') executions | GoogleAdsSSDStep( oauth_credentials, dataflow_options, AdsSSDHashingMapper()) diff --git a/megalist_dataflow/models/options.py b/megalist_dataflow/models/options.py index 8f0d3365..816afa7c 100644 --- a/megalist_dataflow/models/options.py +++ b/megalist_dataflow/models/options.py @@ -31,6 +31,8 @@ def _add_argparse_args(cls, parser): # Set up parser.add_value_provider_argument( '--setup_sheet_id', help='Id of Spreadsheet with execution info') + parser.add_value_provider_argument( + '--setup_firestore_collection', help='Name of Firestore collection with execution info') parser.add_value_provider_argument( '--bq_ops_dataset', help='Auxliary bigquery dataset used for Megalista operations') diff --git a/megalist_dataflow/sources/firestore_execution_source.py b/megalist_dataflow/sources/firestore_execution_source.py new file mode 100644 index 00000000..802cd13a --- /dev/null +++ b/megalist_dataflow/sources/firestore_execution_source.py @@ -0,0 +1,127 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import distutils.util +import logging + +from apache_beam.options.value_provider import ValueProvider + +from google.cloud import firestore +from sources.base_bounded_source import BaseBoundedSource +from models.execution import Destination, DestinationType +from models.execution import Execution, AccountConfig +from models.execution import Source, SourceType + + +class FirestoreExecutionSource(BaseBoundedSource): + """ + Read Execution data from a Firestore collection. The collection name is set-up in the parameter "setup_firestore_collection" + """ + + def __init__( + self, + setup_firestore_collection: ValueProvider + ): + super().__init__() + self._setup_firestore_collection = setup_firestore_collection + + def _do_count(self): + # TODO: implement count + return 3 + + def read(self, range_tracker): + def document_to_dict(doc): + if not doc.exists: + return None + doc_dict = doc.to_dict() + doc_dict['id'] = doc.id + return doc_dict + + firestore_collection = self._setup_firestore_collection.get() + logging.getLogger("megalista.FirestoreExecutionSource").info(f"Loading Firestore collection {firestore_collection}...") + db = firestore.Client() + entries = db.collection(self._setup_firestore_collection.get()).where('active', '==', 'yes').stream() + entries = [document_to_dict(doc) for doc in entries] + + account_data = document_to_dict(db.collection(self._setup_firestore_collection.get()).document('account_config').get()) + + if not account_data: + raise Exception('Firestore collection is absent') + google_ads_id = account_data.get('google_ads_id', 'chave_vazia') + mcc_trix = account_data.get('mcc_trix', 'FALSE') + mcc = False if mcc_trix is None else bool(distutils.util.strtobool(mcc_trix)) + app_id = account_data.get('app_id', 'chave_vazia') + google_analytics_account_id = account_data.get('google_analytics_account_id', 'chave_vazia') + campaign_manager_account_id = account_data.get('campaign_manager_account_id', 'chave_vazia') + + account_config = AccountConfig(google_ads_id, mcc, google_analytics_account_id, campaign_manager_account_id, app_id) + logging.getLogger("megalista.FirestoreExecutionSource").info(f"Loaded: {account_config}") + + sources = self._read_sources(entries) + destinations = self._read_destination(entries) + if entries: + for entry in entries: + if entry['active'].upper() == 'YES': + logging.getLogger("megalista.FirestoreExecutionSource").info( + f"Executing step Source:{sources[entry['id'] + '_source'].source_name} -> Destination:{destinations[entry['id'] + '_destination'].destination_name}") + yield Execution(account_config, sources[entry['id'] + '_source'], destinations[entry['id'] + '_destination']) + else: + logging.getLogger("megalista.FirestoreExecutionSource").warn("No schedules found!") + + @staticmethod + def _read_sources(entries): + sources = {} + if entries: + for entry in entries: + metadata = [entry['bq_dataset'], entry['bq_table']] #TODO: flexibilize for other source types + source = Source(entry['id'] + '_source', SourceType[entry['source']], metadata) + sources[source.source_name] = source + else: + logging.getLogger("megalista.FirestoreExecutionSource").warn("No sources found!") + return sources + + @staticmethod + def _read_destination(entries): + def create_metadata_list(entry): + metadata_list = { + 'ADS_OFFLINE_CONVERSION': ['gads_conversion_name'], + 'ADS_SSD_UPLOAD': ['gads_conversion_name', 'gads_external_upload_id'], + 'ADS_CUSTOMER_MATCH_CONTACT_INFO_UPLOAD': ['gads_audience_name', 'gads_operation', 'gads_hash'], + 'ADS_CUSTOMER_MATCH_MOBILE_DEVICE_ID_UPLOAD': ['gads_audience_name', 'gads_operation', 'gads_metadata3', 'gads_app_id'], + 'ADS_CUSTOMER_MATCH_USER_ID_UPLOAD': ['gads_audience_name', 'gads_operation', 'gads_metadata3'], + 'GA_MEASUREMENT_PROTOCOL': ['google_analytics_property_id', 'google_analytics_non_interaction'], + 'CM_OFFLINE_CONVERSION': ['campaign_manager_floodlight_activity_id', 'campaign_manager_floodlight_configuration_id'], + 'APPSFLYER_S2S_EVENTS': ['appsflyer_app_id'], + } + + entry_type = entry['type'] + metadata = metadata_list.get(entry_type, None) + if not metadata: + raise Exception(f'Upload type not implemented: {entry_type}') + entry_metadata = [] + for m in metadata: + if m in entry: + entry_metadata.append(entry[m]) + else: + raise Exception(f'Missing field in Firestore document for {entry_type}: {m}') + return entry_metadata + + + destinations = {} + if entries: + for entry in entries: + destination = Destination(entry['id'] + '_destination', DestinationType[entry['type']], create_metadata_list(entry)) + destinations[destination.destination_name] = destination + else: + logging.getLogger("megalista.FirestoreExecutionSource").warn("No destinations found!") + return destinations From 0a3d5b9f4a6b01b8003e66cbd476e352eb09e4e3 Mon Sep 17 00:00:00 2001 From: Caio Tomazelli Date: Fri, 30 Apr 2021 18:10:42 -0300 Subject: [PATCH 02/15] Fixes documentation --- README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 33e427b4..8a24f65c 100644 --- a/README.md +++ b/README.md @@ -111,8 +111,11 @@ python3 megalist_dataflow/main.py \ ``` ### Deploying Pipeline -To deploy, use the following command: -`./deploy_cloud.sh project_id bucket_name region_name` +To deploy, use the following commands from the root folder: +``` +cd terraform +./scripts/deploy_cloud.sh project_id bucket_name region_name +``` #### Manually executing pipeline using Dataflow UI To execute the pipeline, use the following steps: From d6acb459b61b0a8f83fbba2eb6b288f88c618e59 Mon Sep 17 00:00:00 2001 From: Caio Tomazelli Date: Fri, 30 Apr 2021 18:11:44 -0300 Subject: [PATCH 03/15] Fixing deploy_cloud.sh typo. --- terraform/scripts/deploy_cloud.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/scripts/deploy_cloud.sh b/terraform/scripts/deploy_cloud.sh index 37578e91..e238c78b 100755 --- a/terraform/scripts/deploy_cloud.sh +++ b/terraform/scripts/deploy_cloud.sh @@ -26,7 +26,7 @@ echo "Configuration GCP project in gcloud" gcloud config set project "$1" echo "Build Dataflow metadata" python3 -m pip install --user -q -r requirements.txt -python3 -m main --runner DataflowRunner --project "$1" --gcp_project_id "$1" --temp_location"gs://$2/tmp/" --region "$3" --setup_file ./setup.py --template_location "gs://$2/templates/megalista" --num_workers 1 --autoscaling_algorithm=NONE +python3 -m main --runner DataflowRunner --project "$1" --gcp_project_id "$1" --temp_location "gs://$2/tmp/" --region "$3" --setup_file ./setup.py --template_location "gs://$2/templates/megalista" --num_workers 1 --autoscaling_algorithm=NONE echo "Copy megalista_medata to bucket $2" gsutil cp megalist_metadata "gs://$2/templates/megalista_metadata" cd .. From 08f89e16076c34c574908c4ffed05caff2f1bab4 Mon Sep 17 00:00:00 2001 From: nivaldoh Date: Fri, 30 Apr 2021 20:02:51 -0300 Subject: [PATCH 04/15] refactor: Reorganize parameters Translate constants and remove unnecessary metadata --- .../sources/firestore_execution_source.py | 14 +++++++------- .../google_ads/customer_match/abstract_uploader.py | 5 +++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/megalist_dataflow/sources/firestore_execution_source.py b/megalist_dataflow/sources/firestore_execution_source.py index 802cd13a..1e17e1e3 100644 --- a/megalist_dataflow/sources/firestore_execution_source.py +++ b/megalist_dataflow/sources/firestore_execution_source.py @@ -1,4 +1,4 @@ -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -57,12 +57,12 @@ def document_to_dict(doc): if not account_data: raise Exception('Firestore collection is absent') - google_ads_id = account_data.get('google_ads_id', 'chave_vazia') + google_ads_id = account_data.get('google_ads_id', 'empty') mcc_trix = account_data.get('mcc_trix', 'FALSE') mcc = False if mcc_trix is None else bool(distutils.util.strtobool(mcc_trix)) - app_id = account_data.get('app_id', 'chave_vazia') - google_analytics_account_id = account_data.get('google_analytics_account_id', 'chave_vazia') - campaign_manager_account_id = account_data.get('campaign_manager_account_id', 'chave_vazia') + app_id = account_data.get('app_id', 'empty') + google_analytics_account_id = account_data.get('google_analytics_account_id', 'empty') + campaign_manager_account_id = account_data.get('campaign_manager_account_id', 'empty') account_config = AccountConfig(google_ads_id, mcc, google_analytics_account_id, campaign_manager_account_id, app_id) logging.getLogger("megalista.FirestoreExecutionSource").info(f"Loaded: {account_config}") @@ -97,8 +97,8 @@ def create_metadata_list(entry): 'ADS_OFFLINE_CONVERSION': ['gads_conversion_name'], 'ADS_SSD_UPLOAD': ['gads_conversion_name', 'gads_external_upload_id'], 'ADS_CUSTOMER_MATCH_CONTACT_INFO_UPLOAD': ['gads_audience_name', 'gads_operation', 'gads_hash'], - 'ADS_CUSTOMER_MATCH_MOBILE_DEVICE_ID_UPLOAD': ['gads_audience_name', 'gads_operation', 'gads_metadata3', 'gads_app_id'], - 'ADS_CUSTOMER_MATCH_USER_ID_UPLOAD': ['gads_audience_name', 'gads_operation', 'gads_metadata3'], + 'ADS_CUSTOMER_MATCH_MOBILE_DEVICE_ID_UPLOAD': ['gads_audience_name', 'gads_operation'], + 'ADS_CUSTOMER_MATCH_USER_ID_UPLOAD': ['gads_audience_name', 'gads_operation'], 'GA_MEASUREMENT_PROTOCOL': ['google_analytics_property_id', 'google_analytics_non_interaction'], 'CM_OFFLINE_CONVERSION': ['campaign_manager_floodlight_activity_id', 'campaign_manager_floodlight_configuration_id'], 'APPSFLYER_S2S_EVENTS': ['appsflyer_app_id'], diff --git a/megalist_dataflow/uploaders/google_ads/customer_match/abstract_uploader.py b/megalist_dataflow/uploaders/google_ads/customer_match/abstract_uploader.py index cdf638dc..80216cb8 100644 --- a/megalist_dataflow/uploaders/google_ads/customer_match/abstract_uploader.py +++ b/megalist_dataflow/uploaders/google_ads/customer_match/abstract_uploader.py @@ -114,7 +114,7 @@ def process(self, batch: Batch, **kwargs) -> None: rows = self.get_filtered_rows( batch.elements, self.get_row_keys()) - + mutate_members_operation = { 'operand': { 'userListId': list_id, @@ -126,7 +126,8 @@ def process(self, batch: Batch, **kwargs) -> None: utils.safe_call_api(self.call_api, logging, user_list_service, [mutate_members_operation]) def call_api(self, service, operations): - service.mutateMembers(operations) + r = service.mutateMembers(operations) + print(f'\n\n{r}\n\n') def get_filtered_rows(self, rows: List[Any], keys: List[str]) -> List[Dict[str, Any]]: From 373a9f328fca2c355ad16f3257be926c3f30ac1e Mon Sep 17 00:00:00 2001 From: nivaldoh Date: Fri, 30 Apr 2021 20:08:42 -0300 Subject: [PATCH 05/15] refactor: Remove placeholders --- .../uploaders/google_ads/customer_match/abstract_uploader.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/megalist_dataflow/uploaders/google_ads/customer_match/abstract_uploader.py b/megalist_dataflow/uploaders/google_ads/customer_match/abstract_uploader.py index 80216cb8..b3b59185 100644 --- a/megalist_dataflow/uploaders/google_ads/customer_match/abstract_uploader.py +++ b/megalist_dataflow/uploaders/google_ads/customer_match/abstract_uploader.py @@ -126,8 +126,7 @@ def process(self, batch: Batch, **kwargs) -> None: utils.safe_call_api(self.call_api, logging, user_list_service, [mutate_members_operation]) def call_api(self, service, operations): - r = service.mutateMembers(operations) - print(f'\n\n{r}\n\n') + service.mutateMembers(operations) def get_filtered_rows(self, rows: List[Any], keys: List[str]) -> List[Dict[str, Any]]: From c50a5ac0bba17f9c492ca562b479e331ab1872e2 Mon Sep 17 00:00:00 2001 From: nivaldoh Date: Fri, 30 Apr 2021 20:11:35 -0300 Subject: [PATCH 06/15] refactor: Reset indentation --- .../uploaders/google_ads/customer_match/abstract_uploader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/megalist_dataflow/uploaders/google_ads/customer_match/abstract_uploader.py b/megalist_dataflow/uploaders/google_ads/customer_match/abstract_uploader.py index b3b59185..cdf638dc 100644 --- a/megalist_dataflow/uploaders/google_ads/customer_match/abstract_uploader.py +++ b/megalist_dataflow/uploaders/google_ads/customer_match/abstract_uploader.py @@ -114,7 +114,7 @@ def process(self, batch: Batch, **kwargs) -> None: rows = self.get_filtered_rows( batch.elements, self.get_row_keys()) - + mutate_members_operation = { 'operand': { 'userListId': list_id, From a78f5b2641f35b67d667f83cebe6333a96fdfa1b Mon Sep 17 00:00:00 2001 From: Joaquim Neto Date: Tue, 13 Apr 2021 20:22:35 -0300 Subject: [PATCH 07/15] docs: update README to MDS Changed name for Marketing Data Sync and added credit to Google Project --- README.md | 53 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 8a24f65c..9d872439 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,21 @@ -# Megalista +# MDS - Marketing Data Sync -Sample integration code for onboarding offline/CRM data from BigQuery as custom audiences or offline conversions in Google Ads, Google Analytics 360, Google Display & Video 360 and Google Campaign Manager. +Solution based on the [Google Megalista project](https://github.com/google/megalista). -**Disclaimer:** This is not an officially supported Google product. +
+ +
+ +

+ + semantic-release + + + Code quality + +

+ +Sample integration code for onboarding offline/CRM data from BigQuery as custom audiences or offline conversions in Google Ads, Google Analytics 360, Google Display & Video 360, Google Campaign Manager and Facebook Ads. ## Supported integrations - **Google Ads** @@ -25,7 +38,7 @@ Sample integration code for onboarding offline/CRM data from BigQuery as custom - S2S Offline events API (conversion upload), to be used for audience creation and in-app events with Google Ads and DV360 [[details]](https://support.appsflyer.com/hc/en-us/articles/207034486-API-de-eventos-de-servidor-para-servidor-S2S-mobile-para-mobile) ## How does it work -Megalista was design to separate the configuration of conversion/audience upload rules from the engine, giving more freedom for non-technical teams (i.e. Media and Business Inteligence) to setup multiple upload rules on their own. +MDS was design to separate the configuration of conversion/audience upload rules from the engine, giving more freedom for non-technical teams (i.e. Media and Business Inteligence) to setup multiple upload rules on their own. The solution consists in #1 a Google Spreadsheet (template) in which all rules are defined by mapping a data source (BigQuery Table) to a destination (data upload endpoint) and #2, an apache beam workflow running on Google Dataflow, scheduled to upload the data in batch mode. @@ -46,7 +59,7 @@ The solution consists in #1 a Google Spreadsheet (template) in which all rules a - **Google Cloud SDK** ### Access Requirements -Those are the minimum roles necessary to deploy Megalista: +Those are the minimum roles necessary to deploy MDS: - OAuth Config Editor - BigQuery User - BigQuery Job User @@ -81,23 +94,23 @@ In order to create it, follow these steps: - On the **OAuth Consent Screen** and configure an *Application name* - Then, go to the **Credentials** and create an *OAuth client Id* with Application type set as *Desktop App* - This will generate a *Client Id* and a *Client secret* - - Run the **generate_megalist_token.sh** script in this folder providing these two values and follow the instructions - - Sample: `./generate_megalist_token.sh client_id client_secret` + - Run the **generate_mds_token.sh** script in this folder providing these two values and follow the instructions + - Sample: `./generate_mds_token.sh client_id client_secret` - This will generate the *Access Token* and the *Refresh token* ### Creating a bucket on Cloud Storage This bucket will hold the deployed code for this solution. To create it, navigate to the *Storage* link on the top-left menu on GCP and click on *Create bucket*. You can use Regional location and Standard data type for this bucket. -## Running Megalista +## Running MDS We recommend first running it locally and make sure that everything works. Make some sample tables on BigQuery for one of the uploaders and make sure that the data is getting correctly to the destination. After that is done, upload the Dataflow template to GCP and try running it manually via the UI to make sure it works. -Lastly, configure the Cloud Scheduler to run Megalista in the frequency desired and you'll have a fully functional data integration pipeline. +Lastly, configure the Cloud Scheduler to run MDS in the frequency desired and you'll have a fully functional data integration pipeline. ### Running locally ```bash -python3 megalist_dataflow/main.py \ +python3 mds_dataflow/main.py \ --runner DirectRunner \ --developer_token ${GOOGLE_ADS_DEVELOPER_TOKEN} \ --setup_sheet_id ${CONFIGURATION_SHEET_ID} \ @@ -122,7 +135,7 @@ To execute the pipeline, use the following steps: - Go to **Dataflow** on GCP console - Click on *Create job from template* - On the template selection dropdown, select *Custom template* -- Find the *megalist* file on the bucket you've created, on the templates folder +- Find the *mds* file on the bucket you've created, on the templates folder - Fill in the parameters required and execute ### Scheduling pipeline @@ -130,7 +143,7 @@ To schedule daily/hourly runs, go to **Cloud Scheduler**: - Click on *create job* - Add a name and frequency as desired - For *target* set as HTTP -- Configure a *POST* for url: https://dataflow.googleapis.com/v1b3/projects/${YOUR_PROJECT_ID}/locations/${LOCATION}/templates:launch?gcsPath=gs://${BUCKET_NAME}/templates/megalist, replacing the params with the actual values +- Configure a *POST* for url: https://dataflow.googleapis.com/v1b3/projects/${YOUR_PROJECT_ID}/locations/${LOCATION}/templates:launch?gcsPath=gs://${BUCKET_NAME}/templates/mds, replacing the params with the actual values - For a sample on the *body* of the request, check **cloud_config/scheduler.json** - Add OAuth Headers - Scope: https://www.googleapis.com/auth/cloud-platform @@ -145,4 +158,18 @@ It's recommended to create a new Service Account to be used with the Cloud Sched ## Usage -Every upload method expects as source a BigQuery data with specific fields, in addition to specific configuration metadata. For details on how to setup your upload routines, refer to the [Megalista Wiki](https://github.com/google/megalista/wiki) or the [Megalista user guide](https://github.com/google/megalista/blob/main/documentation/Megalista%20-%20Technical%20User%20Guide%20-%20EXTERNAL.pdf). +Every upload method expects as source a BigQuery data with specific fields, in addition to specific configuration metadata. For details on how to setup your upload routines, refer to the [MDS Wiki](https://github.com/dp6/marketing-data-sync/wiki) or the [MDS user guide](https://github.com/dp6/marketing-data-sync/blob/main/documentation/mds%20-%20Technical%20User%20Guide%20-%20EXTERNAL.pdf). + +### Mandatory requirements + +Only contributions that meet the following requirements will be accepted: + +- [Commit pattern](https://www.conventionalcommits.org/en/v1.0.0/) + +## Support: + +**DP6 Koopa-troopa Team** + +_e-mail: _ + + From 5cf834bd924caa10f5cb7d3c45892108e7e0baa6 Mon Sep 17 00:00:00 2001 From: Joaquim Neto Date: Tue, 13 Apr 2021 20:29:05 -0300 Subject: [PATCH 08/15] docs: :lipstick: Added theme dp6.github.io --- _config.yml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 _config.yml diff --git a/_config.yml b/_config.yml new file mode 100644 index 00000000..99fa0f4c --- /dev/null +++ b/_config.yml @@ -0,0 +1,8 @@ +title: DP6 - Centro de inovações +initiative: 'Marketing Data Sync' + +remote_theme: dp6/dp6.github.io + +plugins: + - jekyll-sitemap + - jekyll-gzip From 8ae461c2a47820680c9085f0c3c7331cbd22863e Mon Sep 17 00:00:00 2001 From: Joaquim Neto Date: Fri, 23 Apr 2021 18:47:03 -0300 Subject: [PATCH 09/15] ci: :construction_worker: Added Codacy and change docs --- .github/ISSUE_TEMPLATE/bug_report.md | 27 ++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 19 ++++++ .github/PULL_REQUEST_TEMPLATE.md | 5 ++ .github/dependabot.yml | 13 ++++ .github/workflows/codacy-analysis.yml | 36 +++++++++++ CODE_OF_CONDUCT.md | 76 +++++++++++++++++++++++ README.md | 1 + cloud_config/scheduler_sample.json | 6 +- megalist_dataflow/uploaders/utils.py | 2 +- run_cloud.sh | 2 +- 10 files changed, 182 insertions(+), 5 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/codacy-analysis.yml create mode 100644 CODE_OF_CONDUCT.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..7955b54b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,27 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '[BUG]' +labels: bug +assignees: '' +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: + +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..9383cc48 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,19 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '[NEW]' +labels: enhancement +assignees: '' +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..69fd1516 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,5 @@ +**What issue does this pull request resolve?** + +**What changes did you make?** + +**Is there anything that requires more attention while reviewing?** diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..f7628b26 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,13 @@ +version: 2 +updates: + # Maintain dependencies for GitHub Actions + - package-ecosystem: 'github-actions' + directory: '/' + schedule: + interval: 'weekly' + + # Maintain dependencies for npm + - package-ecosystem: 'pip' + directory: '/megalist_dataflow' + schedule: + interval: 'weekly' diff --git a/.github/workflows/codacy-analysis.yml b/.github/workflows/codacy-analysis.yml new file mode 100644 index 00000000..7e62010d --- /dev/null +++ b/.github/workflows/codacy-analysis.yml @@ -0,0 +1,36 @@ +# This workflow checks out code, performs a Codacy security scan +# and integrates the results with the +# GitHub Advanced Security code scanning feature. For more information on +# the Codacy security scan action usage and parameters, see +# https://github.com/codacy/codacy-analysis-cli-action. +# For more information on Codacy Analysis CLI in general, see +# https://github.com/codacy/codacy-analysis-cli. + +name: Codacy + +on: ['push'] + +jobs: + codacy-security-scan: + name: Codacy Analysis + runs-on: ubuntu-latest + steps: + # Checkout the repository to the GitHub Actions runner + - name: Checkout code + uses: actions/checkout@v2 + + # Execute Codacy Analysis CLI and generate a SARIF output with the security issues identified during the analysis + - name: Run Codacy Analysis CLI + uses: codacy/codacy-analysis-cli-action@3.0.0 + with: + # Check https://github.com/codacy/codacy-analysis-cli#project-token to get your project token from your Codacy repository + # You can also omit the token and run the tools that support default configurations + project-token: ${{ secrets.CODACY_PROJECT_TOKEN }} + verbose: true + output: results.sarif + format: sarif + # Adjust severity of non-security issues + gh-code-scanning-compat: true + # Force 0 exit code to allow SARIF file generation + # This will handover control about PR rejection to the GitHub side + max-allowed-issues: 2147483647 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..51db4fdd --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to make participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive criticism +- Focusing on what is best for the community +- Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery and unwelcome sexual attention or + advances +- Trolling, insulting/derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or electronic + address, without explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when +an individual is representing the project or its community in public spaces. +Examples of representing a project or community include using an official +project e-mail address, posting via an official social media account, or acting +as an appointed representative at an online or offline event. Representation of +a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at koopas@dp6.com.br. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/README.md b/README.md index 9d872439..ee7a30a4 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ Solution based on the [Google Megalista project](https://github.com/google/megal Code quality +

Sample integration code for onboarding offline/CRM data from BigQuery as custom audiences or offline conversions in Google Ads, Google Analytics 360, Google Display & Video 360, Google Campaign Manager and Facebook Ads. diff --git a/cloud_config/scheduler_sample.json b/cloud_config/scheduler_sample.json index cc6e5393..b653367a 100644 --- a/cloud_config/scheduler_sample.json +++ b/cloud_config/scheduler_sample.json @@ -1,5 +1,5 @@ { - "jobName": "megalist_daily", + "jobName": "mds_daily", "parameters": { "developer_token": "Google Ads Developer Token", "client_id": "GCP OAuth Client id", @@ -7,11 +7,11 @@ "access_token": "GCP OAuth access token", "refresh_token": "GCP OAuth refresh token", "setup_sheet_id": "Setup Google Sheets Id", - "bq_ops_dataset": "Auxliary bigquery dataset used for Megalista operations", + "bq_ops_dataset": "Auxliary bigquery dataset used for MDS operations", "appsflyer_dev_key": "Apps flyer dev key" }, "environment": { - "tempLocation": "gs://megalist-data/temp", + "tempLocation": "gs://bucket-name/temp", "zone": "us-central1-f" } } diff --git a/megalist_dataflow/uploaders/utils.py b/megalist_dataflow/uploaders/utils.py index 9e2724e6..c458685a 100644 --- a/megalist_dataflow/uploaders/utils.py +++ b/megalist_dataflow/uploaders/utils.py @@ -32,7 +32,7 @@ def get_ads_service(service_name, version, oauth_credentials, developer_token, client = adwords.AdWordsClient( developer_token, oauth2_client, - 'MegaList Dataflow', + 'Mds Dataflow', client_customer_id=customer_id) return client.GetService(service_name, version=version) diff --git a/run_cloud.sh b/run_cloud.sh index db709d44..d2b53ed0 100755 --- a/run_cloud.sh +++ b/run_cloud.sh @@ -21,4 +21,4 @@ fi gcloud config set project $1 token=$(gcloud auth application-default print-access-token) -curl -H "Authorization: Bearer $token" -H "Content-Type:application/json" "https://dataflow.googleapis.com/v1b3/projects/$1/locations/$3/templates:launch?gcsPath=gs://$2/templates/megalist" --data-binary "@cloud_config/scheduler.json" +curl -H "Authorization: Bearer $token" -H "Content-Type:application/json" "https://dataflow.googleapis.com/v1b3/projects/$1/locations/$3/templates:launch?gcsPath=gs://$2/templates/mds" --data-binary "@cloud_config/scheduler.json" From 4e6653ee3e2dbb8c11aaec458c147ab0aa890373 Mon Sep 17 00:00:00 2001 From: Joaquim Neto Date: Sun, 2 May 2021 18:50:41 -0300 Subject: [PATCH 10/15] ci: :green_heart: Fixing semantic-release config - Added config variables in pyproject.toml to replace default behavior - Changed dependabot scheduler frequency --- .github/CHANGELOG.md | 0 .github/dependabot.yml | 4 ++-- .github/workflows/codacy-analysis.yml | 2 +- .github/workflows/python-app.yml | 2 +- pyproject.toml | 4 +++- 5 files changed, 7 insertions(+), 5 deletions(-) create mode 100644 .github/CHANGELOG.md diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md new file mode 100644 index 00000000..e69de29b diff --git a/.github/dependabot.yml b/.github/dependabot.yml index f7628b26..5108f1ea 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -4,10 +4,10 @@ updates: - package-ecosystem: 'github-actions' directory: '/' schedule: - interval: 'weekly' + interval: 'monthly' # Maintain dependencies for npm - package-ecosystem: 'pip' directory: '/megalist_dataflow' schedule: - interval: 'weekly' + interval: 'monthly' diff --git a/.github/workflows/codacy-analysis.yml b/.github/workflows/codacy-analysis.yml index 7e62010d..4eaa4f4e 100644 --- a/.github/workflows/codacy-analysis.yml +++ b/.github/workflows/codacy-analysis.yml @@ -6,7 +6,7 @@ # For more information on Codacy Analysis CLI in general, see # https://github.com/codacy/codacy-analysis-cli. -name: Codacy +name: Codacy Analysis on: ['push'] diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 554bc341..19ed5399 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -7,7 +7,7 @@ on: push: branches: [ develop ] pull_request: - branches: [ main ] + branches: [ main, master ] jobs: unit_testing: diff --git a/pyproject.toml b/pyproject.toml index 8f7fa7a6..9a5d7b55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,8 @@ [tool.semantic_release] upload_to_pypi = false +branch = main version_variable = [ 'megalist_dataflow/setup.py:__version__' ] - +version_source = tag +build_command = false From 271a41064fa4843fd17c46ce0812d21f39814b04 Mon Sep 17 00:00:00 2001 From: Joaquim Neto Date: Sun, 2 May 2021 19:15:26 -0300 Subject: [PATCH 11/15] ci: Fixes gitaction workflows --- .github/workflows/python-app.yml | 7 +++++-- .github/workflows/semantic-release.yml | 3 +-- .github/workflows/terraform.yml | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 19ed5399..ddda561d 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Python testing +name: Python on: push: @@ -11,7 +11,7 @@ on: jobs: unit_testing: - + name: Test runs-on: ubuntu-latest steps: @@ -27,3 +27,6 @@ jobs: - name: Run tests run: | ./run_tests.sh + - name: Upload coverage to Codacy + run: export CODACY_PROJECT_TOKEN=${{ secrets.CODACY_PROJECT_TOKEN }} && bash <(curl -Ls https://coverage.codacy.com/get.sh) report -r megalist_dataflow/* + continue-on-error: true diff --git a/.github/workflows/semantic-release.yml b/.github/workflows/semantic-release.yml index cbb0134e..7c0c9254 100644 --- a/.github/workflows/semantic-release.yml +++ b/.github/workflows/semantic-release.yml @@ -2,8 +2,7 @@ name: Semantic Release on: push: - branches: - - main + branches: [ main, master ] jobs: release: diff --git a/.github/workflows/terraform.yml b/.github/workflows/terraform.yml index 662f6652..f4f60225 100644 --- a/.github/workflows/terraform.yml +++ b/.github/workflows/terraform.yml @@ -4,7 +4,7 @@ on: ['push'] jobs: terraform-actions: - name: Workflow + name: tf validate runs-on: ubuntu-latest defaults: run: From a82a6e7eaacb7f4dd5965342cc7ec54ad69fbdf7 Mon Sep 17 00:00:00 2001 From: Joaquim Neto Date: Sun, 2 May 2021 19:34:07 -0300 Subject: [PATCH 12/15] perf: :arrow_up: Update dependencies pytest-cov, pytz, aiohttp, six and terraform.yml --- .github/workflows/terraform.yml | 2 +- megalist_dataflow/requirements.txt | 8 ++++---- megalist_dataflow/setup.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/terraform.yml b/.github/workflows/terraform.yml index 662f6652..762ea472 100644 --- a/.github/workflows/terraform.yml +++ b/.github/workflows/terraform.yml @@ -14,7 +14,7 @@ jobs: uses: actions/checkout@master - name: HashiCorp - Setup Terraform - uses: hashicorp/setup-terraform@v1.2.1 + uses: hashicorp/setup-terraform@v1.3.2 with: terraform_version: 0.14.6 diff --git a/megalist_dataflow/requirements.txt b/megalist_dataflow/requirements.txt index 3c563904..2bcf012e 100644 --- a/megalist_dataflow/requirements.txt +++ b/megalist_dataflow/requirements.txt @@ -9,13 +9,13 @@ apache-beam==2.28.0 google-cloud-datastore==1.13.1 google-apitools==0.5.31 pytest==5.4.3 -pytest-cov==2.10.0 +pytest-cov==2.11.1 pytest-mock==3.2.0 requests-mock==1.8.0 -pytz==2020.1 +pytz==2021.1 wheel==0.34.2 pyarrow==0.17.1 -aiohttp==3.6.2 +aiohttp==3.7.4 bloom-filter==1.3 -six==1.13.0 +six==1.15.0 mypy==0.790 \ No newline at end of file diff --git a/megalist_dataflow/setup.py b/megalist_dataflow/setup.py index 73cb2d21..f92b930f 100644 --- a/megalist_dataflow/setup.py +++ b/megalist_dataflow/setup.py @@ -22,6 +22,6 @@ url='https://cse.googlesource.com/solutions/megalist', install_requires=['googleads==24.1.0', 'google-api-python-client==1.10.0', 'google-cloud-core==1.3.0', 'google-cloud-bigquery==1.26.0', - 'google-cloud-datastore==1.13.1', 'aiohttp==3.6.2'], + 'google-cloud-datastore==1.13.1', 'aiohttp==3.7.4'], packages=setuptools.find_packages(), ) From a7d12394410c988f7f87c7e24c478d8f4d535c7f Mon Sep 17 00:00:00 2001 From: Joaquim Neto Date: Sun, 2 May 2021 22:35:45 -0300 Subject: [PATCH 13/15] chore: fixe pyproject.toml (#20) --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9a5d7b55..0f6ff6fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,8 @@ [tool.semantic_release] upload_to_pypi = false -branch = main +branch = 'master' version_variable = [ 'megalist_dataflow/setup.py:__version__' ] -version_source = tag +version_source = 'tag' build_command = false From 488dc58a06cf232399e732fd80e2623610fd95d6 Mon Sep 17 00:00:00 2001 From: Joaquim Neto Date: Sun, 2 May 2021 22:49:01 -0300 Subject: [PATCH 14/15] =?UTF-8?q?perf:=20=E2=9A=A1=20Add=20partial=20failu?= =?UTF-8?q?re=20support=20for=20Google=20Ads=20=20(#10)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: nivaldoh --- megalist_dataflow/uploaders/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/megalist_dataflow/uploaders/utils.py b/megalist_dataflow/uploaders/utils.py index c458685a..a2a859a9 100644 --- a/megalist_dataflow/uploaders/utils.py +++ b/megalist_dataflow/uploaders/utils.py @@ -34,6 +34,7 @@ def get_ads_service(service_name, version, oauth_credentials, developer_token, oauth2_client, 'Mds Dataflow', client_customer_id=customer_id) + client.partial_failure = True return client.GetService(service_name, version=version) From 5583a21d630de0e60a22d2bff8253c0ef2344ca1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 3 May 2021 02:19:01 +0000 Subject: [PATCH 15/15] Bump pyarrow from 0.17.1 to 4.0.0 in /megalist_dataflow Bumps [pyarrow](https://github.com/apache/arrow) from 0.17.1 to 4.0.0. - [Release notes](https://github.com/apache/arrow/releases) - [Commits](https://github.com/apache/arrow/compare/apache-arrow-0.17.1...apache-arrow-4.0.0) Signed-off-by: dependabot[bot] --- megalist_dataflow/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/megalist_dataflow/requirements.txt b/megalist_dataflow/requirements.txt index 2bcf012e..df7b5eec 100644 --- a/megalist_dataflow/requirements.txt +++ b/megalist_dataflow/requirements.txt @@ -14,7 +14,7 @@ pytest-mock==3.2.0 requests-mock==1.8.0 pytz==2021.1 wheel==0.34.2 -pyarrow==0.17.1 +pyarrow==4.0.0 aiohttp==3.7.4 bloom-filter==1.3 six==1.15.0