MobilityData
diff --git a/‎api/src/shared/common/db_utils.py‎
Lines changed: 2 additions & 2 deletions b/‎api/src/shared/common/db_utils.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎functions-python/batch_datasets/src/main.py‎
Lines changed: 1 addition & 3 deletions b/‎functions-python/batch_datasets/src/main.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎functions-python/batch_datasets/tests/conftest.py‎
Lines changed: 4 additions & 2 deletions b/‎functions-python/batch_datasets/tests/conftest.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎functions-python/batch_process_dataset/src/main.py‎
Lines changed: 32 additions & 26 deletions b/‎functions-python/batch_process_dataset/src/main.py‎
Lines changed: 32 additions & 26 deletions
diff --git a/‎functions-python/batch_process_dataset/tests/conftest.py‎
Lines changed: 2 additions & 1 deletion b/‎functions-python/batch_process_dataset/tests/conftest.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎functions-python/batch_process_dataset/tests/test_batch_process_dataset_main.py‎
Lines changed: 1 addition & 0 deletions b/‎functions-python/batch_process_dataset/tests/test_batch_process_dataset_main.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎functions-python/export_csv/src/main.py‎
Lines changed: 1 addition & 8 deletions b/‎functions-python/export_csv/src/main.py‎
Lines changed: 1 addition & 8 deletions
diff --git a/‎functions-python/export_csv/tests/conftest.py‎
Lines changed: 6 additions & 3 deletions b/‎functions-python/export_csv/tests/conftest.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎functions-python/helpers/feed_status.py‎
Lines changed: 2 additions & 2 deletions b/‎functions-python/helpers/feed_status.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎functions-python/helpers/query_helper.py‎
Lines changed: 3 additions & 5 deletions b/‎functions-python/helpers/query_helper.py‎
Lines changed: 3 additions & 5 deletions
@@ -170,7 +170,7 @@ def get_all_gtfs_feeds(
             feed_query = apply_most_common_location_filter(db_session.query(Gtfsfeed), db_session)
             yield from (
                 feed_query.filter(Gtfsfeed.stable_id.in_(stable_ids)).options(
-                    contains_eager(Gtfsfeed.latest_dataset)
+                    joinedload(Gtfsfeed.latest_dataset)
                     .joinedload(Gtfsdataset.validation_reports)
                     .joinedload(Validationreport.features),
                     *get_joinedload_options(include_extracted_location_entities=True),
@@ -182,7 +182,7 @@ def get_all_gtfs_feeds(
                 .outerjoin(Gtfsfeed.gtfsdatasets)
                 .filter(Gtfsfeed.stable_id.in_(stable_ids))
                 .options(
-                    contains_eager(Gtfsfeed.latest_dataset)
+                    joinedload(Gtfsfeed.latest_dataset)
                     .joinedload(Gtfsdataset.validation_reports)
                     .joinedload(Validationreport.features),
                     *get_joinedload_options(include_extracted_location_entities=False),
 
@@ -25,7 +25,6 @@
 from google.cloud import pubsub_v1
 from google.cloud.pubsub_v1 import PublisherClient
 from google.cloud.pubsub_v1.futures import Future
-from sqlalchemy import or_
 from sqlalchemy.orm import Session
 
 from shared.database_gen.sqlacodegen_models import Gtfsfeed, Gtfsdataset
@@ -87,9 +86,8 @@ def get_non_deprecated_feeds(
             Gtfsdataset.hash.label("dataset_hash"),
         )
         .select_from(Gtfsfeed)
-        .outerjoin(Gtfsdataset, (Gtfsdataset.feed_id == Gtfsfeed.id))
+        .outerjoin(Gtfsdataset, (Gtfsfeed.latest_dataset_id == Gtfsdataset.id))
         .filter(Gtfsfeed.status != "deprecated")
-        .filter(or_(Gtfsdataset.id.is_(None), Gtfsdataset.latest.is_(True)))
     )
     if feed_stable_ids:
         # If feed_stable_ids are provided, filter the query by stable IDs
 
@@ -84,10 +84,10 @@ def populate_database(db_session: Session | None = None):
     # GTFS datasets leaving one active feed without a dataset
     active_gtfs_feeds = db_session.query(Gtfsfeed).all()
     for i in range(1, 9):
+        id = fake.uuid4()
         gtfs_dataset = Gtfsdataset(
-            id=fake.uuid4(),
+            id=id,
             feed_id=active_gtfs_feeds[i].id,
-            latest=True,
             bounding_box="POLYGON((-180 -90, -180 90, 180 90, 180 -90, -180 -90))",
             hosted_url=fake.url(),
             note=fake.sentence(),
@@ -96,6 +96,8 @@ def populate_database(db_session: Session | None = None):
             stable_id=fake.uuid4(),
         )
         db_session.add(gtfs_dataset)
+        db_session.flush()
+        active_gtfs_feeds[i].latest_dataset_id = id
 
     db_session.flush()
     # GTFS Realtime feeds
 
@@ -33,7 +33,7 @@
 
 from shared.common.gcp_utils import create_refresh_materialized_view_task
 from shared.database.database import with_db_session
-from shared.database_gen.sqlacodegen_models import Gtfsdataset, Gtfsfile
+from shared.database_gen.sqlacodegen_models import Gtfsdataset, Gtfsfile, Gtfsfeed
 from shared.dataset_service.main import DatasetTraceService, DatasetTrace, Status
 from shared.helpers.logger import init_logger, get_logger
 from shared.helpers.utils import (
@@ -84,7 +84,9 @@ def __init__(
         self.api_key_parameter_name = api_key_parameter_name
         self.date = datetime.now().strftime("%Y%m%d%H%M")
         if self.authentication_type != 0:
-            self.logger.info(f"Getting feed credentials for feed {self.feed_stable_id}")
+            self.logger.info(
+                "Getting feed credentials for feed %s", self.feed_stable_id
+            )
             self.feed_credentials = self.get_feed_credentials(self.feed_stable_id)
             if self.feed_credentials is None:
                 raise Exception(
@@ -135,7 +137,7 @@ def download_content(self, temporary_file_path, feed_id):
             credentials=self.feed_credentials,
             logger=self.logger,
         )
-        self.logger.info(f"hash is: {file_hash}")
+        self.logger.info("hash is: %s", file_hash)
         is_zip = zipfile.is_zipfile(temporary_file_path)
         return file_hash, is_zip
 
@@ -168,7 +170,7 @@ def upload_files_to_storage(
         extracted_files: List[Gtfsfile] = []
         if not extracted_files_path or not os.path.exists(extracted_files_path):
             self.logger.warning(
-                f"Extracted files path {extracted_files_path} does not exist."
+                "Extracted files path %s does not exist.", extracted_files_path
             )
             return blob, extracted_files
         self.logger.info("Processing extracted files from %s", extracted_files_path)
@@ -182,7 +184,7 @@ def upload_files_to_storage(
                 if public:
                     file_blob.make_public()
                 self.logger.info(
-                    f"Uploaded extracted file {file_name} to {file_blob.public_url}"
+                    "Uploaded extracted file %s to %s", file_name, file_blob.public_url
                 )
                 extracted_files.append(
                     Gtfsfile(
@@ -209,7 +211,8 @@ def upload_dataset(self, feed_id, public=True) -> DatasetFile or None:
             file_sha256_hash, is_zip = self.download_content(temp_file_path, feed_id)
             if not is_zip:
                 self.logger.error(
-                    f"[{self.feed_stable_id}] The downloaded file from {self.producer_url} is not a valid ZIP file."
+                    "The downloaded file from %s is not a valid ZIP file.",
+                    self.producer_url,
                 )
                 return None
 
@@ -299,17 +302,18 @@ def process_from_bucket(self, db_session, public=True) -> Optional[DatasetFile]:
                     else None
                 ),
             )
-            dataset = self.create_dataset_entities(
+            dataset, latest = self.create_dataset_entities(
                 dataset_file, skip_dataset_creation=True, db_session=db_session
             )
-            if dataset and dataset.latest:
+            if dataset and latest:
                 self.logger.info(
-                    f"Creating pipeline tasks for latest dataset {dataset.stable_id}"
+                    "Creating pipeline tasks for latest dataset %s", dataset.stable_id
                 )
                 create_pipeline_tasks(dataset)
             elif dataset:
                 self.logger.info(
-                    f"Dataset {dataset.stable_id} is not the latest, skipping pipeline tasks creation."
+                    "Dataset %s is not the latest, skipping pipeline tasks creation.",
+                    dataset.stable_id,
                 )
             else:
                 raise ValueError("Dataset update failed, dataset is None.")
@@ -352,26 +356,24 @@ def create_dataset_entities(
         """
         try:
             # Check latest version of the dataset
-            latest_dataset = (
-                db_session.query(Gtfsdataset)
-                .filter_by(latest=True, feed_id=self.feed_id)
-                .one_or_none()
+            gtfs_feed: Gtfsfeed | None = (
+                db_session.query(Gtfsfeed).filter_by(id=self.feed_id).one_or_none()
             )
+            latest_dataset = gtfs_feed.latest_dataset
             if not latest_dataset:
-                self.logger.info(
-                    f"[{self.feed_stable_id}] No latest dataset found for feed."
-                )
+                self.logger.info("No latest dataset found for feed.")
 
             dataset = None
+            latest = True if latest_dataset is not None else False
             if not skip_dataset_creation:
                 self.logger.info(
-                    f"[{self.feed_stable_id}] Creating new dataset for feed with stable id {dataset_file.stable_id}."
+                    "Creating new dataset for feed with stable id %s.",
+                    dataset_file.stable_id,
                 )
                 dataset = Gtfsdataset(
                     id=str(uuid.uuid4()),
                     feed_id=self.feed_id,
                     stable_id=dataset_file.stable_id,
-                    latest=True,
                     bounding_box=None,
                     note=None,
                     hash=dataset_file.file_sha256_hash,
@@ -386,10 +388,14 @@ def create_dataset_entities(
                     unzipped_size_bytes=self._get_unzipped_size(dataset_file),
                 )
                 db_session.add(dataset)
+                # update the latest dataset relationship in the feed
+                db_session.flush()
+                gtfs_feed.latest_dataset = dataset
+                latest = True
             elif skip_dataset_creation and latest_dataset:
                 self.logger.info(
-                    f"[{self.feed_stable_id}] Updating latest dataset for feed with stable id "
-                    f"{latest_dataset.stable_id}."
+                    "Updating latest dataset for feed with stable id %s",
+                    latest_dataset.stable_id,
                 )
                 latest_dataset.gtfsfiles = (
                     dataset_file.extracted_files if dataset_file.extracted_files else []
@@ -400,13 +406,12 @@ def create_dataset_entities(
                 )
 
             if latest_dataset and not skip_dataset_creation:
-                latest_dataset.latest = False
                 db_session.add(latest_dataset)
             db_session.commit()
-            self.logger.info(f"[{self.feed_stable_id}] Dataset created successfully.")
+            self.logger.info("Dataset created successfully.")
 
             create_refresh_materialized_view_task()
-            return latest_dataset if skip_dataset_creation else dataset
+            return latest_dataset if skip_dataset_creation else dataset, latest
         except Exception as e:
             raise Exception(f"Error creating dataset: {e}")
 
@@ -431,7 +436,7 @@ def process_from_producer_url(
         if dataset_file is None:
             self.logger.info(f"[{self.feed_stable_id}] No database update required.")
             return None
-        dataset = self.create_dataset_entities(dataset_file, db_session=db_session)
+        dataset, _ = self.create_dataset_entities(dataset_file, db_session=db_session)
         create_pipeline_tasks(dataset)
         return dataset_file
 
@@ -577,7 +582,8 @@ def process_dataset(cloud_event: CloudEvent):
             )
             return f"Function completed with errors, missing stable={stable_id} or execution_id={execution_id}"
     logger.info(
-        f"Function %s in execution: [{execution_id}]",
+        "Function %s in execution: %s",
+        execution_id,
         "successfully completed" if not error_message else "Failed",
     )
     return "Completed." if error_message is None else error_message
@@ -64,7 +64,6 @@ def populate_database(db_session):
         gtfs_dataset = Gtfsdataset(
             id=fake.uuid4(),
             feed_id=active_gtfs_feeds[i].id,
-            latest=True,
             bounding_box="POLYGON((-180 -90, -180 90, 180 90, 180 -90, -180 -90))",
             hosted_url=fake.url(),
             note=fake.sentence(),
@@ -73,6 +72,8 @@ def populate_database(db_session):
             stable_id=fake.uuid4(),
         )
         db_session.add(gtfs_dataset)
+        db_session.flush()
+        active_gtfs_feeds[i].latest_gtfsdataset_id = gtfs_dataset.id
 
     db_session.flush()
     # GTFS Realtime feeds
 
@@ -487,6 +487,7 @@ def test_process_from_bucket_latest_happy_path(
             dataset_stable_id="dataset-stable-id-123",  # REQUIRED for bucket-latest path
         )
 
+        mock_create_dataset_entities.return_value = Mock(), True
         # Act
         result = processor.process_from_bucket(public=True)
 
 
@@ -214,14 +214,7 @@ def get_gtfs_feed_csv_data(
     data = get_feed_csv_data(feed, geopolygon_map)
 
     # Then supplement with the GTFS specific data
-    latest_dataset = next(
-        (
-            dataset
-            for dataset in (feed.gtfsdatasets or [])
-            if dataset and dataset.latest
-        ),
-        None,
-    )
+    latest_dataset = feed.latest_dataset
     if latest_dataset and latest_dataset.validation_reports:
         # Keep the report from the more recent validator version
         latest_report = max(
 
@@ -69,7 +69,7 @@ def populate_database(db_session):
             official=True,
         )
         feeds.append(feed)
-
+    db_session.flush()
     # Then fill the specific parameters for each feed
     target_feed = feeds[0]
     target_feed.id = "e3155a30-81d8-40bb-9e10-013a60436d86"  # Just an invented uuid
@@ -148,8 +148,7 @@ def populate_database(db_session):
         feed_stable_id = active_gtfs_feeds[feed_index].stable_id
         gtfs_dataset = Gtfsdataset(
             id=fake.uuid4(),
-            feed_id=feed_stable_id,
-            latest=True if i != 2 else False,
+            feed_id=active_gtfs_feeds[feed_index].id,
             bounding_box=wkt_element,
             # Use a url containing the stable id. The program should replace all the is after the feed stable id
             # by latest.zip
@@ -159,6 +158,8 @@ def populate_database(db_session):
             downloaded_at=datetime(2025, 1, 12),
             stable_id=f"dataset-{i}",
         )
+        db_session.add(gtfs_dataset)
+        db_session.flush()
         validation_report = Validationreport(
             id=fake.uuid4(),
             validator_version="6.0.1",
@@ -175,6 +176,8 @@ def populate_database(db_session):
         gtfs_dataset.locations = locations
 
         active_gtfs_feeds[feed_index].gtfsdatasets.append(gtfs_dataset)
+        if i != 2:
+            active_gtfs_feeds[feed_index].latest_dataset_id = gtfs_dataset.id
         db_session.flush()
         active_gtfs_feeds[feed_index].bounding_box = gtfs_dataset.bounding_box
         active_gtfs_feeds[feed_index].bounding_box_dataset_id = gtfs_dataset.id
 
@@ -1,7 +1,7 @@
 import logging
 from datetime import datetime, timezone
 from sqlalchemy import text
-from shared.database_gen.sqlacodegen_models import Gtfsdataset, Feed
+from shared.database_gen.sqlacodegen_models import Gtfsdataset, Feed, Gtfsfeed
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
@@ -19,8 +19,8 @@ def update_feed_statuses_query(session: "Session", stable_feed_ids: list[str]):
             Gtfsdataset.service_date_range_start,
             Gtfsdataset.service_date_range_end,
         )
+        .join(Gtfsfeed, Gtfsfeed.latest_dataset_id == Gtfsdataset.id)
         .filter(
-            Gtfsdataset.latest.is_(True),
             Gtfsdataset.service_date_range_start.isnot(None),
             Gtfsdataset.service_date_range_end.isnot(None),
         )
 
@@ -190,11 +190,9 @@ def get_feeds_with_missing_bounding_boxes_query(
     """
     query = (
         db_session.query(Gtfsfeed)
-        .join(Gtfsdataset, Gtfsdataset.feed_id == Gtfsfeed.id)
-        .filter(Gtfsdataset.latest.is_(True))
-        .filter(Gtfsdataset.bounding_box.is_(None))
+        .filter(Gtfsfeed.bounding_box.is_(None))
         .filter(~Gtfsfeed.feedlocationgrouppoints.any())
-        .distinct(Gtfsfeed.stable_id, Gtfsdataset.stable_id)
-        .order_by(Gtfsdataset.stable_id, Gtfsfeed.stable_id)
+        .distinct(Gtfsfeed.stable_id)
+        .order_by(Gtfsfeed.stable_id)
     )
     return query
Original file line number	Diff line number	Diff line change
`@@ -487,6 +487,7 @@ def test_process_from_bucket_latest_happy_path(`
`487`	`487`	`dataset_stable_id="dataset-stable-id-123", # REQUIRED for bucket-latest path`
`488`	`488`	`)`
`489`	`489`
	`490`	`+ mock_create_dataset_entities.return_value = Mock(), True`
`490`	`491`	`# Act`
`491`	`492`	`result = processor.process_from_bucket(public=True)`
`492`	`493`