3737)
3838from shared .helpers .pub_sub import trigger_dataset_download
3939from tasks .data_import .data_import_utils import (
40- _get_or_create_feed ,
41- _get_or_create_entity_type ,
40+ get_or_create_feed ,
41+ get_or_create_entity_type ,
42+ get_license ,
4243)
4344
4445logger = logging .getLogger (__name__ )
5152GTFS_RT_FORMAT = "gtfs-rt"
5253
5354LICENSE_URL_MAP = {
54- "odc-odbl" : "https://opendatacommons.org/licenses/odbl/1.0/" ,
55- "mobility-licence" : "https://wiki.lafabriquedesmobilites.fr/wiki/Licence_Mobilit%C3%A9s" ,
56- "fr-lo" : "https://spdx.org/licenses/etalab-2.0.html" ,
57- "lov2" : "https://spdx.org/licenses/etalab-2.0.html" ,
55+ "odc-odbl" : {
56+ "url" : "https://opendatacommons.org/licenses/odbl/1.0/" ,
57+ "id" : "ODbL-1.0" ,
58+ },
59+ "mobility-licence" : {
60+ "url" : "https://wiki.lafabriquedesmobilites.fr/wiki/Licence_Mobilit%C3%A9s" ,
61+ },
62+ "fr-lo" : {
63+ "url" : "https://www.data.gouv.fr/pages/legal/licences/etalab-2.0" ,
64+ "id" : "etalab-2.0" ,
65+ },
66+ "lov2" : {
67+ "url" : "https://www.data.gouv.fr/pages/legal/licences/etalab-2.0" ,
68+ "id" : "etalab-2.0" ,
69+ },
5870}
5971
6072ENTITY_TYPES_MAP = {
@@ -75,7 +87,7 @@ def _get_license_url(license_id: Optional[str]) -> Optional[str]:
7587 """
7688 if not license_id :
7789 return None
78- return LICENSE_URL_MAP .get (license_id .lower ())
90+ return LICENSE_URL_MAP .get (license_id .lower (), {}). get ( "url" )
7991
8092
8193def _probe_head_format (
@@ -241,6 +253,7 @@ def _update_common_tdg_fields(
241253 resource : dict ,
242254 producer_url : str ,
243255 locations : List [Location ],
256+ db_session : Session ,
244257) -> None :
245258 """
246259 Update common fields for both schedule GTFS and RT from TDG dataset + resource.
@@ -254,7 +267,11 @@ def _update_common_tdg_fields(
254267 feed .operational_status = "wip"
255268
256269 feed .license_url = _get_license_url (dataset .get ("licence" ))
257-
270+ feed_license = get_license (
271+ db_session , LICENSE_URL_MAP .get (dataset .get ("licence" ), {}).get ("id" )
272+ )
273+ if feed_license :
274+ feed .license = feed_license
258275 # Use locations only if not already set
259276 if locations and (not feed .locations or len (feed .locations ) == 0 ):
260277 feed .locations = locations
@@ -338,6 +355,7 @@ def _process_tdg_dataset(
338355 db_session : Session ,
339356 session_http : requests .Session ,
340357 dataset : dict ,
358+ processed_stable_ids : Optional [set ] = None ,
341359) -> Tuple [dict , List [Feed ]]:
342360 """
343361 Process one TDG dataset:
@@ -391,8 +409,13 @@ def _process_tdg_dataset(
391409 # ---- STATIC GTFS ----
392410 if res_format == GTFS_FORMAT :
393411 stable_id = f"tdg-{ res_id } "
394- gtfs_feed , is_new = _get_or_create_feed (
395- db_session , Gtfsfeed , stable_id , "gtfs"
412+ processed_stable_ids .add (stable_id )
413+ gtfs_feed , is_new = get_or_create_feed (
414+ db_session ,
415+ Gtfsfeed ,
416+ stable_id ,
417+ "gtfs" ,
418+ official_notes = "Imported from Transport.data.gouv.fr as official feed." ,
396419 )
397420
398421 if not is_new :
@@ -406,7 +429,9 @@ def _process_tdg_dataset(
406429 stable_id ,
407430 )
408431 processed += 1
409- static_feeds_by_dataset_id [dataset_id ] = gtfs_feed
432+ if dataset_id not in static_feeds_by_dataset_id :
433+ static_feeds_by_dataset_id [dataset_id ] = []
434+ static_feeds_by_dataset_id [dataset_id ].append (gtfs_feed )
410435 continue
411436
412437 # Requirement: if GTFS url returns CSV, skip it (listing, not feed).
@@ -430,7 +455,9 @@ def _process_tdg_dataset(
430455 continue
431456
432457 # Apply changes
433- _update_common_tdg_fields (gtfs_feed , dataset , resource , res_url , locations )
458+ _update_common_tdg_fields (
459+ gtfs_feed , dataset , resource , res_url , locations , db_session
460+ )
434461 _ensure_tdg_external_id (gtfs_feed , res_id )
435462
436463 if dataset_id not in static_feeds_by_dataset_id :
@@ -457,7 +484,8 @@ def _process_tdg_dataset(
457484 )
458485
459486 rt_stable_id = f"tdg-{ res_id } "
460- rt_feed , is_new_rt = _get_or_create_feed (
487+ processed_stable_ids .add (rt_stable_id )
488+ rt_feed , is_new_rt = get_or_create_feed (
461489 db_session , Gtfsrealtimefeed , rt_stable_id , "gtfs_rt"
462490 )
463491
@@ -481,7 +509,9 @@ def _process_tdg_dataset(
481509 continue
482510
483511 # Apply changes
484- _update_common_tdg_fields (rt_feed , dataset , resource , res_url , locations )
512+ _update_common_tdg_fields (
513+ rt_feed , dataset , resource , res_url , locations , db_session
514+ )
485515 _ensure_tdg_external_id (rt_feed , res_id )
486516
487517 # Link RT → schedule
@@ -490,7 +520,7 @@ def _process_tdg_dataset(
490520 # Add entity types
491521 entity_types = _get_entity_types_from_resource (resource )
492522 rt_feed .entitytypes = [
493- _get_or_create_entity_type (db_session , et ) for et in entity_types
523+ get_or_create_entity_type (db_session , et ) for et in entity_types
494524 ]
495525
496526 if is_new_rt :
@@ -528,6 +558,28 @@ def _process_tdg_dataset(
528558 return deltas , feeds_to_publish
529559
530560
561+ def _deprecate_stale_feeds (db_session , processed_stable_ids ):
562+ """
563+ Deprecate TDG feeds not seen in this import run.
564+ """
565+ logger .info ("Deprecating stale TDG feeds not in processed_stable_ids" )
566+ tdg_feeds = (
567+ db_session .query (Feed )
568+ .filter (Feed .stable_id .like ("tdg-%" ))
569+ .filter (~ Feed .stable_id .in_ (processed_stable_ids ))
570+ .all ()
571+ )
572+ logger .info ("Found %d tdg_feeds stale stable_ids" , len (tdg_feeds ))
573+ deprecated_count = 0
574+ for feed in tdg_feeds :
575+ if feed .status != "deprecated" :
576+ feed .status = "deprecated"
577+ deprecated_count += 1
578+ logger .info ("Deprecated stale TDG feed stable_id=%s" , feed .stable_id )
579+
580+ logger .info ("Total deprecated stale TDG feeds: %d" , deprecated_count )
581+
582+
531583# ---------------------------------------------------------------------------
532584# Orchestrator & handler
533585# ---------------------------------------------------------------------------
@@ -567,10 +619,15 @@ def _import_tdg(db_session: Session, dry_run: bool = True) -> dict:
567619
568620 created_gtfs = updated_gtfs = created_rt = total_processed = 0
569621 feeds_to_publish : List [Feed ] = []
570-
622+ processed_stable_ids = set ()
571623 for idx , dataset in enumerate (datasets , start = 1 ):
572624 try :
573- deltas , new_feeds = _process_tdg_dataset (db_session , session_http , dataset )
625+ deltas , new_feeds = _process_tdg_dataset (
626+ db_session ,
627+ session_http ,
628+ dataset ,
629+ processed_stable_ids = processed_stable_ids ,
630+ )
574631
575632 created_gtfs += deltas ["created_gtfs" ]
576633 updated_gtfs += deltas ["updated_gtfs" ]
@@ -594,6 +651,8 @@ def _import_tdg(db_session: Session, dry_run: bool = True) -> dict:
594651 continue
595652
596653 if not dry_run :
654+ # Deprecate TDG feeds not seen in this import
655+ _deprecate_stale_feeds (db_session , processed_stable_ids )
597656 # Last commit for remaining feeds
598657 commit_changes (db_session , feeds_to_publish , total_processed )
599658
0 commit comments