updated code to use create_refresh_materialized_view()

qcdyx · qcdyx · commit dcf5d95ea605 · 2025-07-24T10:54:39.000+08:00
diff --git a/functions-python/batch_process_dataset/README.md b/functions-python/batch_process_dataset/README.md
@@ -1,4 +1,5 @@
 # Bath Process Dataset
+
 Subscribed to the topic set in the `batch-datasets` function, `batch-process-dataset` is triggered for each message published. It handles the processing of each feed individually, ensuring data consistency and integrity. The function performs the following operations:
 
 1. **Download Data**: It retrieves the feed data from the provided URL.
@@ -8,34 +9,56 @@ Subscribed to the topic set in the `batch-datasets` function, `batch-process-dat
 
 The URL format for accessing these datasets is standardized as `<bucket-url>/<feed_stable_id>/<dataset_id>.zip`, ensuring a consistent and predictable path for data retrieval.
 
-
 # Message format
+
 The function expects a Pub/Sub message with the following format:
+
 ```json
-    {
-        "message": {
-            "data": 
-            {
-                "execution_id":  "execution_id",
-                "producer_url":  "producer_url",
-                "feed_stable_id":  "feed_stable_id",
-                "feed_id":  "feed_id",
-                "dataset_id":  "dataset_id",
-                "dataset_hash":  "dataset_hash",
-                "authentication_type":  "authentication_type",
-                "authentication_info_url":  "authentication_info_url",
-                "api_key_parameter_name": "api_key_parameter_name"
-            }            
-        }
+{
+  "message": {
+    "data": {
+      "execution_id": "execution_id",
+      "producer_url": "producer_url",
+      "feed_stable_id": "feed_stable_id",
+      "feed_id": "feed_id",
+      "dataset_id": "dataset_id",
+      "dataset_hash": "dataset_hash",
+      "authentication_type": "authentication_type",
+      "authentication_info_url": "authentication_info_url",
+      "api_key_parameter_name": "api_key_parameter_name"
     }
+  }
+}
+```
+
+# Example
+
+```json
+{
+  "message": {
+    "data": {
+      "execution_id": "JLU_20250721A",
+      "producer_url": "http://api.511.org/transit/datafeeds?operator_id=CE",
+      "feed_stable_id": "mdb-2684",
+      "feed_id": "2f5d7b4e-bb9b-49ae-a011-b61d7d9b53ff",
+      "dataset_id": null,
+      "dataset_hash": null,
+      "authentication_type": "1",
+      "authentication_info_url": "https://511.org/open-data/token",
+      "api_key_parameter_name": "api_key"
+    }
+  }
+}
 ```
 
 # Function configuration
+
 The function is configured using the following environment variables:
+
 - `DATASETS_BUCKET_NAME`: The name of the bucket where the datasets are stored.
 - `FEEDS_DATABASE_URL`: The URL of the feeds database.
 - `MAXIMUM_EXECUTIONS`: [Optional] The maximum number of executions per datasets. This controls the number of times a dataset can be processed per execution id. By default, is 1.
 
-
 # Local development
-The local development of this function follows the same steps as the other functions. Please refer to the [README.md](../README.md) file for more information.
+
+The local development of this function follows the same steps as the other functions. Please refer to the [README.md](../README.md) file for more information.
diff --git a/functions-python/batch_process_dataset/src/main.py b/functions-python/batch_process_dataset/src/main.py
@@ -32,7 +32,10 @@
 
 from shared.database_gen.sqlacodegen_models import Gtfsdatasets
 from shared.dataset_service.main import DatasetTraceService, DatasetTrace, Status
-from shared.database.database import with_db_session
+from shared.database.database import (
+    with_db_session,
+    create_refresh_materialized_view_task,
+)
 import logging
 
 from shared.helpers.logger import init_logger, get_logger
@@ -254,25 +257,7 @@ def create_dataset(self, dataset_file: DatasetFile, db_session: Session):
             db_session.commit()
             self.logger.info(f"[{self.feed_stable_id}] Dataset created successfully.")
 
-            # Replace direct call to refresh_materialized_view with HTTP request to the refresh function
-            refresh_url = os.getenv("FUNCTION_URL_REFRESH_MV")
-            if not refresh_url:
-                raise ValueError(
-                    "FUNCTION_URL_REFRESH_MV environment variable is not set"
-                )
-
-            # Create an authorized request
-            auth_req = requests.Request()
-
-            # Get an identity token for the target URL
-            token = id_token.fetch_id_token(auth_req, refresh_url)
-
-            # Make the HTTP request with the ID token
-            headers = {"Authorization": f"Bearer {token}"}
-            response = http_requests.get(refresh_url, headers=headers)
-
-            response.raise_for_status()
-            self.logger.info("Materialized view refresh event triggered successfully.")
+            create_refresh_materialized_view_task()
         except Exception as e:
             raise Exception(f"Error creating dataset: {e}")
 
diff --git a/functions-python/helpers/feed_status.py b/functions-python/helpers/feed_status.py
@@ -1,16 +1,14 @@
 import logging
 from datetime import datetime, timezone
 from sqlalchemy import text
-import requests as http_requests
-import os
 from shared.database_gen.sqlacodegen_models import Gtfsdataset, Feed
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from sqlalchemy.orm import Session
-
-from google.auth.transport import requests
-from google.oauth2 import id_token
+from shared.database.database import (
+    create_refresh_materialized_view_task,
+)
 
 
 #  query to update the status of the feeds based on the service date range of the latest dataset
@@ -78,30 +76,10 @@ def get_filters(status: str):
         raise Exception(f"Error updating feed statuses: {e}")
 
     try:
-        session.commit()
-        # Replace direct call to refresh_materialized_view with HTTP request to the refresh function
-        refresh_url = os.getenv("FUNCTION_URL_REFRESH_MV")
-        if not refresh_url:
-            raise ValueError("FUNCTION_URL_REFRESH_MV environment variable is not set")
-
-        # Create an authorized request
-        auth_req = requests.Request()
-
-        # Get an identity token for the target URL
-        token = id_token.fetch_id_token(auth_req, refresh_url)
-
-        # Make the HTTP request with the ID token
-        headers = {"Authorization": f"Bearer {token}"}
-        response = http_requests.get(refresh_url, headers=headers)
-
-        response.raise_for_status()
-        logging.info("Materialized view refresh event triggered successfully.")
+        create_refresh_materialized_view_task()
         logging.info("Feed Database changes for status committed.")
         logging.info("Status Changes: %s", diff_counts)
-        session.close()
         return diff_counts
     except Exception as e:
         logging.error("Error committing changes:", e)
-        session.rollback()
-        session.close()
         raise Exception(f"Error creating dataset: {e}")
diff --git a/functions-python/reverse_geolocation/src/reverse_geolocation_processor.py b/functions-python/reverse_geolocation/src/reverse_geolocation_processor.py
@@ -7,7 +7,6 @@
 
 import flask
 import pandas as pd
-import requests as http_requests
 import shapely.geometry
 from geoalchemy2 import WKTElement
 from geoalchemy2.shape import to_shape
@@ -24,7 +23,11 @@
     geopolygons_as_string,
 )
 from parse_request import parse_request_parameters
-from shared.database.database import with_db_session
+from shared.database.database import (
+    with_db_session,
+    create_refresh_materialized_view_task,
+)
+
 from shared.database_gen.sqlacodegen_models import (
     Geopolygon,
     Feed,
@@ -37,9 +40,6 @@
 )
 from shared.helpers.logger import get_logger
 
-from google.auth.transport import requests
-from google.oauth2 import id_token
-
 
 @with_db_session
 def get_cached_geopolygons(
@@ -379,23 +379,7 @@ def extract_location_aggregates(
     # Commit the changes to the database before refreshing the materialized view
     db_session.commit()
 
-    # Replace direct call to refresh_materialized_view with HTTP request to the refresh function
-    refresh_url = os.getenv("FUNCTION_URL_REFRESH_MV")
-    if not refresh_url:
-        raise ValueError("FUNCTION_URL_REFRESH_MV environment variable is not set")
-
-    # Create an authorized request
-    auth_req = requests.Request()
-
-    # Get an identity token for the target URL
-    token = id_token.fetch_id_token(auth_req, refresh_url)
-
-    # Make the HTTP request with the ID token
-    headers = {"Authorization": f"Bearer {token}"}
-    response = http_requests.get(refresh_url, headers=headers)
-
-    response.raise_for_status()
-    logger.info("Materialized view refresh event triggered successfully.")
+    create_refresh_materialized_view_task()
 
 
 @with_db_session
diff --git a/functions-python/tasks_executor/README.md b/functions-python/tasks_executor/README.md
@@ -31,6 +31,12 @@ Example:
     "after_date": "2025-06-01"
   }
 }
+{
+   "task": "refresh_materialized_view",
+   "payload": {
+    "dry_run": true
+  }
+}
 ```
 
 To get the list of supported tasks use:

Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,12 @@ Example:`
`31`	`31`	`"after_date": "2025-06-01"`
`32`	`32`	`}`
`33`	`33`	`}`
	`34`	`+{`
	`35`	`+ "task": "refresh_materialized_view",`
	`36`	`+ "payload": {`
	`37`	`+ "dry_run": true`
	`38`	`+ }`
	`39`	`+}`
`34`	`40`	```
`35`	`41`
`36`	`42`	`To get the list of supported tasks use:`