test: add log

jgaucher-cs · jgaucher-cs · commit f1513231dd43 · 2025-02-13T18:08:11.000+01:00
diff --git a/services/staging/rs_server_staging/main.py b/services/staging/rs_server_staging/main.py
@@ -23,6 +23,7 @@
 import yaml
 from dask.distributed import LocalCluster
 from fastapi import APIRouter, FastAPI, HTTPException, Path
+from fastapi.concurrency import run_in_threadpool
 from pygeoapi.api import API
 from pygeoapi.process.base import JobNotFoundError
 from pygeoapi.process.manager.postgresql import PostgreSQLManager
@@ -267,27 +268,36 @@ async def get_resource(resource: str):
 @router.post("/processes/{resource}/execution")
 async def execute_process(req: Request, resource: str, data: ProcessMetadataModel):
     """Used to execute processing jobs."""
+
+    logger.critical(f"start execute_process")
+
     if resource not in api.config["resources"]:
         raise HTTPException(status_code=HTTP_404_NOT_FOUND, detail=f"Process resource '{resource}' not found")
 
     processor_name = api.config["resources"][resource]["processor"]["name"]
     if processor_name in processors:
         processor = processors[processor_name]
-        _, status = await processor(
+        _, status = processor(
             req,
             data.outputs["result"].id,
             app.extra["process_manager"],
             app.extra["dask_cluster"],
         ).execute(data.inputs.dict())
+
+        logger.critical(f"end execute_process")
+
         return JSONResponse(status_code=HTTP_200_OK, content={"status": status})
 
     raise HTTPException(status_code=HTTP_404_NOT_FOUND, detail=f"Processor '{processor_name}' not found")
 
 
 # Endpoint to get the status of a job by job_id
 @router.get("/jobs/{job_id}")
-async def get_job_status_endpoint(job_id: str = Path(..., title="The ID of the job")):
+def get_job_status_endpoint(job_id: str = Path(..., title="The ID of the job")):
     """Used to get status of processing job."""
+
+    logger.critical("start get_job_status_endpoint")
+
     try:
         return app.extra["process_manager"].get_job(job_id)
     except JobNotFoundError as error:
diff --git a/services/staging/rs_server_staging/processors.py b/services/staging/rs_server_staging/processors.py
@@ -100,6 +100,8 @@ def streaming_task(product_url: str, trusted_domains: list[str], auth: str, buck
     Raises:
         ValueError: If the streaming process fails, raises a ValueError with details of the failure.
     """
+    logger = Logging.default(__name__)
+    logger.critical(f"start streaming_task")
 
     try:
         s3_handler = S3StorageHandler(
@@ -115,6 +117,7 @@ def streaming_task(product_url: str, trusted_domains: list[str], auth: str, buck
         ) from e
     except KeyError as exc:
         raise ValueError(f"Cannot create s3 connector object. Reason: {exc}") from exc
+    logger.critical(f"end streaming_task")
     return s3_file
 
 
@@ -211,7 +214,7 @@ def __init__(
         self.catalog_bucket = os.environ.get("RSPY_CATALOG_BUCKET", "rs-cluster-catalog")
 
     # Override from BaseProcessor, execute is async in RSPYProcessor
-    async def execute(
+    def execute(
         self,
         data: dict,
         outputs: dict | None = None,  # pylint: disable=unused-argument
@@ -270,7 +273,7 @@ async def execute(
             )
 
         # Execution section
-        if not await self.check_catalog(catalog_collection, item_collection.features):
+        if not self.check_catalog(catalog_collection, item_collection.features):
             return self.log_job_execution(
                 JobStatus.failed,
                 0,
@@ -357,7 +360,7 @@ def log_job_execution(
         self.db_process_manager.update_job(self.job_id, update_data)
         return self._get_execute_result()
 
-    async def check_catalog(self, catalog_collection: str, features: list[Feature]) -> bool:
+    def check_catalog(self, catalog_collection: str, features: list[Feature]) -> bool:
         """
         Method used to check RSPY catalog if a feature from input_collection is already published.
 
@@ -569,8 +572,10 @@ def manage_dask_tasks_results(self, client: Client, catalog_collection: str):
             catalog_collection (str): Name of the catalog collection.
         """
         self.logger.info("Tasks monitoring started")
+        self.logger.critical("start manage_dask_tasks_results")
         if not client:
             self.logger.error("The dask cluster client object is not created. Exiting")
+            self.logger.critical("end manage_dask_tasks_results 1")
             return
         for task in as_completed(self.tasks):
             try:
@@ -598,6 +603,7 @@ def manage_dask_tasks_results(self, client: Client, catalog_collection: str):
                 self.log_job_execution(JobStatus.failed, None, f"At least one of the tasks failed: {task_e}")
                 self.delete_files_from_bucket()
                 self.logger.error(f"Tasks monitoring finished with error. At least one of the tasks failed: {task_e}")
+                self.logger.critical("end manage_dask_tasks_results 2")
                 return
         # Publish all the features once processed
         published_featurs_ids: list[str] = []
@@ -613,11 +619,13 @@ def manage_dask_tasks_results(self, client: Client, catalog_collection: str):
                 self.delete_files_from_bucket()
                 # delete the published items
                 self.unpublish_rspy_features(catalog_collection, published_featurs_ids)
+                self.logger.critical("end manage_dask_tasks_results 3")
                 return
             published_featurs_ids.append(feature.id)
         # Update status once all features are processed
         self.log_job_execution(JobStatus.successful, 100, "Finished")
         self.logger.info("Tasks monitoring finished")
+        self.logger.critical("end manage_dask_tasks_results 5")
 
     def dask_cluster_connect(self) -> Client:
         """Connects a dask cluster scheduler
@@ -706,6 +714,8 @@ def dask_cluster_connect(self) -> Client:
                 clusters = gateway.list_clusters()
                 self.logger.debug(f"The list of clusters: {clusters}")
 
+                self.logger.critical(f"Cluster names: {[cluster.options.get('cluster_name') for cluster in clusters]}")
+
                 # In local mode, get the first cluster from the gateway.
                 cluster_id = None
                 if local_mode:
@@ -727,7 +737,11 @@ def dask_cluster_connect(self) -> Client:
                 if not cluster_id:
                     raise IndexError(f"No dask cluster named '{cluster_name}' was found.")
 
+                self.logger.critical(
+                    f"Connect to gateway {os.environ['DASK_GATEWAY__ADDRESS']} cluster_id: {cluster_id}",
+                )
                 self.cluster = gateway.connect(cluster_id)
+                self.logger.critical(f"Connected: {self.cluster}")
 
                 self.logger.info(f"Successfully connected to the {cluster_name} dask cluster")
             except KeyError as e:
@@ -803,6 +817,9 @@ def submit_tasks_to_dask_cluster(self, token: str, trusted_domains: list[str], c
         """
         # empty the list
         self.tasks = []
+
+        self.logger.critical(f"start submit_tasks_to_dask_cluster")
+
         # Submit tasks
         try:
             for asset_info in self.assets_info:
@@ -820,6 +837,8 @@ def submit_tasks_to_dask_cluster(self, token: str, trusted_domains: list[str], c
             self.logger.exception(f"Submitting task to dask cluster failed. Reason: {e}")
             raise RuntimeError(f"Submitting task to dask cluster failed. Reason: {e}") from e
 
+        self.logger.critical(f"end submit_tasks_to_dask_cluster")
+
     async def process_rspy_features(self, catalog_collection: str) -> tuple[str, dict]:
         """
         Method used to trigger dask distributed streaming process.
@@ -836,19 +855,23 @@ async def process_rspy_features(self, catalog_collection: str) -> tuple[str, dic
                 Example: ("application/json", {"running": <job_id>})
         """
         self.logger.debug("Starting main loop")
+        self.logger.critical(f"process_rspy_features")
 
         # Process each feature by initiating the streaming download of its assets to the final bucket.
         for feature in self.stream_list:
             if not self.prepare_streaming_tasks(catalog_collection, feature):
+                self.logger.critical(f"process_rspy_features 1")
                 return self.log_job_execution(JobStatus.failed, 0, "Unable to create tasks for the Dask cluster")
         if not self.assets_info:
             self.logger.info("There are no assets to stage. Exiting....")
+            self.logger.critical(f"process_rspy_features 2")
             return self.log_job_execution(JobStatus.successful, 100, "Finished without processing any tasks")
 
         # Determine the domain(s)
         domains = list({urlparse(asset[0]).hostname for asset in self.assets_info})
         self.logger.info("Staging from domain(s) {domains}")
         if len(domains) > 1:
+            self.logger.critical(f"process_rspy_features 3")
             return self.log_job_execution(JobStatus.failed, 0, "Staging from multiple domains is not supported yet")
         domain = domains[0]
 
@@ -860,6 +883,7 @@ async def process_rspy_features(self, catalog_collection: str) -> tuple[str, dic
             self.logger.error(
                 f"Failed to retrieve the token needed to connect to the external station: {http_exception}",
             )
+            self.logger.critical(f"process_rspy_features 4")
             return self.log_job_execution(
                 JobStatus.failed,
                 0,
@@ -872,6 +896,7 @@ async def process_rspy_features(self, catalog_collection: str) -> tuple[str, dic
             self.submit_tasks_to_dask_cluster(token, external_auth_config.trusted_domains, dask_client)
         except RuntimeError as re:
             self.logger.error("Failed to start the staging process")
+            self.logger.critical(f"process_rspy_features 5")
             return self.log_job_execution(JobStatus.failed, 0, f"{re}")
 
         # Set the status to running for the job
@@ -888,6 +913,7 @@ async def process_rspy_features(self, catalog_collection: str) -> tuple[str, dic
         self.assets_info = []
         dask_client.close()
 
+        self.logger.critical(f"process_rspy_features 6")
         return self._get_execute_result()
 
     def publish_rspy_feature(self, catalog_collection: str, feature: Feature):