MobilityData
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎functions-python/helpers/gtfs_validator_common.py‎
Lines changed: 39 additions & 0 deletions b/‎functions-python/helpers/gtfs_validator_common.py‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎functions-python/helpers/query_helper.py‎
Lines changed: 35 additions & 0 deletions b/‎functions-python/helpers/query_helper.py‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎functions-python/helpers/validation_report/validation_report_update.py‎
Lines changed: 99 additions & 0 deletions b/‎functions-python/helpers/validation_report/validation_report_update.py‎
Lines changed: 99 additions & 0 deletions
diff --git a/‎functions-python/tasks_executor/.coveragerc‎
Lines changed: 10 additions & 0 deletions b/‎functions-python/tasks_executor/.coveragerc‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎functions-python/tasks_executor/README.md‎
Lines changed: 25 additions & 0 deletions b/‎functions-python/tasks_executor/README.md‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎functions-python/tasks_executor/function_config.json‎
Lines changed: 21 additions & 0 deletions b/‎functions-python/tasks_executor/function_config.json‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎functions-python/tasks_executor/main_local_debug.py‎
Lines changed: 42 additions & 0 deletions b/‎functions-python/tasks_executor/main_local_debug.py‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎functions-python/tasks_executor/requirements.txt‎
Lines changed: 22 additions & 0 deletions b/‎functions-python/tasks_executor/requirements.txt‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎functions-python/tasks_executor/requirements_dev.txt‎
Lines changed: 4 additions & 0 deletions b/‎functions-python/tasks_executor/requirements_dev.txt‎
Lines changed: 4 additions & 0 deletions
@@ -58,7 +58,7 @@ secrets-*.env
 vars.envx
 vars-*.env
 
-config/.env.dev
+config/.env.*
 venv
 cypress.env.json
 
 
@@ -0,0 +1,39 @@
+import os
+from typing import Final
+
+GTFS_VALIDATOR_URL_PROD: Final[
+    str
+] = "https://gtfs-validator-web-mbzoxaljzq-ue.a.run.app"
+GTFS_VALIDATOR_URL_STAGING: Final[
+    str
+] = "https://stg-gtfs-validator-web-mbzoxaljzq-ue.a.run.app"
+
+
+def get_gtfs_validator_results_bucket(is_prod: bool) -> str:
+    """
+    Get the GTFS validator results bucket name based on the environment.
+    :param is_prod: true if target environment is production, false otherwise
+    :return: the bucket name for the target environment
+    """
+    if is_prod:
+        return "gtfs-validator-results"
+    else:
+        return "stg-gtfs-validator-results"
+
+
+def get_gtfs_validator_url(is_prod: bool) -> str:
+    """
+    Get the GTFS validator URL based on the environment
+    or GTFS_VALIDATOR_URL_PROD/GTFS_VALIDATOR_URL_STAGING environment dependent.
+    :param is_prod: true if target environment is production if the env variable is found this parameter is ignored
+                    , false otherwise
+    :return: the GTFS validator URL for the target environment
+    """
+    # Look up the GTFS validator URL based on the environment
+    result = os.getenv("GTFS_VALIDATOR_URL")
+    if result:
+        return result
+    if is_prod:
+        return GTFS_VALIDATOR_URL_PROD
+    else:
+        return GTFS_VALIDATOR_URL_STAGING
@@ -1,11 +1,14 @@
 import logging
+from datetime import datetime
 from typing import Type
 
 from shared.database_gen.sqlacodegen_models import (
     Feed,
     Gtfsrealtimefeed,
     Gtfsfeed,
     Gbfsfeed,
+    Gtfsdataset,
+    Validationreport,
 )
 from sqlalchemy import and_
 from sqlalchemy.orm import Session, joinedload
@@ -133,3 +136,35 @@ def get_feeds_query(
     except Exception as e:
         logging.error("Error building query: %s", str(e))
         raise
+
+
+def get_datasets_with_missing_reports_query(
+    db_session: Session,
+    filter_after: datetime | None = None,
+) -> Query:
+    """
+    Get datasets with missing validation reports.
+
+    Args:
+        db_session: SQLAlchemy session
+        filter_after: Optional date to filter datasets
+
+    Returns:
+        A SQLAlchemy query object for datasets with missing validation reports order by feed and dataset stable id.
+    """
+    query = (
+        db_session.query(
+            Gtfsfeed.stable_id,
+            Gtfsdataset.stable_id,
+        )
+        .select_from(Gtfsfeed)
+        .join(Gtfsdataset, Gtfsdataset.feed_id == Gtfsfeed.id)
+        .outerjoin(Validationreport, Gtfsdataset.validation_reports)
+        .filter(Validationreport.id.is_(None))
+    )
+    if filter_after:
+        query = query.filter(Gtfsdataset.downloaded_at >= filter_after)
+    query = query.distinct(Gtfsfeed.stable_id, Gtfsdataset.stable_id).order_by(
+        Gtfsdataset.stable_id, Gtfsfeed.stable_id
+    )
+    return query
@@ -0,0 +1,99 @@
+import os
+import logging
+import json
+from time import sleep
+
+from google.cloud import workflows_v1
+from google.cloud.workflows import executions_v1
+from google.cloud.workflows.executions_v1 import Execution
+
+env = os.getenv("ENV", "dev").lower()
+bucket_name = f"mobilitydata-datasets-{env}"
+
+
+def execute_workflow(
+    project: str,
+    location: str = "northamerica-northeast1",
+    workflow: str = "gtfs_validator_execution",
+    input_data: dict = None,
+) -> Execution:
+    """
+    Executes a workflow with input data and print the execution results.
+    @param project: The Google Cloud project id which contains the workflow to execute.
+    @param location: The location for the workflow.
+    @param workflow: The ID of the workflow to execute.
+    @param input_data: A dictionary containing input data for the workflow.
+    @return: The execution response.
+    """
+    execution_client = executions_v1.ExecutionsClient()
+    workflows_client = workflows_v1.WorkflowsClient()
+    parent = workflows_client.workflow_path(project, location, workflow)
+
+    # Prepare the execution input as a JSON string.
+    input_json = json.dumps(input_data) if input_data else "{}"
+
+    # Create and configure the execution request with input data.
+    execution_request = Execution(argument=input_json)
+    response = execution_client.create_execution(
+        parent=parent, execution=execution_request
+    )
+    logging.info(f"Created execution: {response.name}")
+    execution = execution_client.get_execution(request={"name": response.name})
+    return execution
+
+
+def execute_workflows(
+    latest_datasets,
+    validator_endpoint=None,
+    bypass_db_update=False,
+    reports_bucket_name=None,
+):
+    """
+    Execute the workflow for the latest datasets that need their validation report to be updated
+    :param latest_datasets: List of tuples containing the feed stable id and dataset stable id
+    :param validator_endpoint: The URL of the validator
+    :param bypass_db_update: Whether to bypass the database update
+    :param reports_bucket_name: The name of the bucket where the reports are stored
+    :return: List of dataset stable ids for which the workflow was executed
+    """
+    project_id = f"mobility-feeds-{env}"
+    location = os.getenv("LOCATION", "northamerica-northeast1")
+    execution_triggered_datasets = []
+    batch_size = int(os.getenv("BATCH_SIZE", 5))
+    sleep_time = int(os.getenv("SLEEP_TIME", 5))
+    count = 0
+    logging.info(f"Executing workflow for {len(latest_datasets)} datasets")
+    for feed_id, dataset_id in latest_datasets:
+        try:
+            input_data = {
+                "data": {
+                    "bypass_db_update": bypass_db_update,
+                    "protoPayload": {
+                        "resourceName": "projects/_/"
+                        f"buckets/{bucket_name}/"
+                        f"objects/{feed_id}/{dataset_id}/{dataset_id}.zip"
+                    },
+                    "resource": {
+                        "labels": {"location": location, "project_id": project_id},
+                    },
+                }
+            }
+            if validator_endpoint:
+                input_data["data"]["validator_endpoint"] = validator_endpoint
+            if reports_bucket_name:
+                input_data["data"]["reports_bucket_name"] = reports_bucket_name
+            logging.info(f"Executing workflow for {feed_id}/{dataset_id}")
+            execute_workflow(project_id, input_data=input_data)
+            execution_triggered_datasets.append(dataset_id)
+        except Exception as e:
+            logging.error(
+                f"Error while executing workflow for {feed_id}/{dataset_id}: {e}"
+            )
+        count += 1
+        logging.info(f"Triggered workflow execution for {count} datasets")
+        if count % batch_size == 0:
+            logging.info(
+                f"Sleeping for {sleep_time} seconds before next batch to avoid rate limiting.."
+            )
+            sleep(sleep_time)
+    return execution_triggered_datasets
@@ -0,0 +1,10 @@
+[run]
+omit =
+    */test*/*
+    */helpers/*
+    */database_gen/*
+    */shared/*
+
+[report]
+exclude_lines =
+    if __name__ == .__main__.:
@@ -0,0 +1,25 @@
+# Tasks Executor
+
+This directory contains Google Cloud Functions used as a single point of access to multiple _tasks_.
+
+## Usage
+The function receive the following payload:
+```
+  {
+   "task": "string", # [required] Name of the task to execute
+   "payload": { } [optional] Payload to pass to the task
+  }
+  "payload": {
+    "dry_run": true,
+    "filter_after_in_days": 14,
+    "filter_statuses": ["active", "inactive", "future"]
+  }
+}
+```
+To get the list of supported tasks use:
+``
+{
+  "name": "list_tasks",
+  "payload": {}
+}
+`````
@@ -0,0 +1,21 @@
+{
+  "name": "tasks_executor",
+  "description": "The Tasks Executor function runs maintenance tasks avoiding the creation of multiple functions for one-time execution",
+  "entry_point": "tasks_executor",
+  "timeout": 540,
+  "memory": "4Gi",
+  "trigger_http": false,
+  "include_folders": ["helpers"],
+  "include_api_folders": ["database_gen", "database", "common"],
+  "environment_variables": [],
+  "secret_environment_variables": [
+    {
+      "key": "FEEDS_DATABASE_URL"
+    }
+  ],
+  "ingress_settings": "ALLOW_ALL",
+  "max_instance_request_concurrency": 1,
+  "max_instance_count": 1,
+  "min_instance_count": 0,
+  "available_cpu": 1
+}
@@ -0,0 +1,42 @@
+# Code to be able to debug locally without affecting the runtime cloud function
+
+#
+# Requirements:
+# - Google Cloud SDK installed
+# - Make sure to have the following environment variables set in your .env.local file
+# - Local database in running state
+
+# Usage:
+# - python tasks_executor/main_local_debug.py
+# - This can be easily run/debug in a local IDE like PyCharm or VSCode
+
+import flask
+from flask.testing import EnvironBuilder
+
+from main import tasks_executor
+
+# Create a Flask app instance
+app = flask.Flask(__name__)
+
+if __name__ == "__main__":
+    # Create a mock payload
+    payload = {"task": "list_tasks"}
+
+    # Push the application context
+    with app.app_context():
+        # Build a mock request environment
+        builder = EnvironBuilder(app=app, method="POST", path="/", json=payload)
+        env = builder.get_environ()
+
+        # Create a Flask request object
+        mock_request = flask.Request(env)
+
+        # Call the tasks_executor function with the mock request
+        response = tasks_executor(mock_request)
+
+        # If the response is a tuple, extract the response object
+        if isinstance(response, tuple):
+            response, _ = response
+
+        # Print the response data
+        print(response.get_data(as_text=True))
@@ -0,0 +1,22 @@
+# Common packages
+functions-framework==3.*
+google-cloud-logging
+psycopg2-binary==2.9.6
+aiohttp~=3.10.5
+asyncio~=3.4.3
+urllib3~=2.2.2
+requests~=2.32.3
+attrs~=23.1.0
+pluggy~=1.3.0
+certifi~=2024.7.4
+
+# SQL Alchemy and Geo Alchemy
+SQLAlchemy==2.0.23
+geoalchemy2==0.14.7
+
+# Google specific packages for this function
+google-cloud-workflows
+flask
+
+# Configuration
+python-dotenv==1.0.0
@@ -0,0 +1,4 @@
+Faker
+pytest~=7.4.3
+urllib3-mock
+requests-mock