@@ -527,13 +527,16 @@ def _add_asset_to_stats(assetmeta: Dict[str, Any], stats: _stats_type) -> None:
527527 stats = _get_samples (value , stats , hierarchy )
528528 break
529529
530+ # which components already found, so we do not count more than
531+ # once in some incorrectly named datasets
532+ found : Dict [str , str ] = {}
530533 for part in Path (assetmeta ["path" ]).name .split ("." )[0 ].split ("_" ):
531- if part .startswith ("sub-" ):
532- subject = part .replace ("sub-" , "" )
534+ if not found . get ( "subject" ) and part .startswith ("sub-" ):
535+ found [ " subject" ] = subject = part .split ("sub-" , 1 )[ 1 ]
533536 if subject not in stats ["subjects" ]:
534537 stats ["subjects" ].append (subject )
535- if part .startswith ("sample-" ):
536- sample = part .replace ("sample-" , "" )
538+ if not found . get ( "sample" ) and part .startswith ("sample-" ):
539+ found [ "sample" ] = sample = part .replace ("sample-" , "" )
537540 if sample not in stats ["tissuesample" ]:
538541 stats ["tissuesample" ].append (sample )
539542
@@ -559,10 +562,13 @@ def aggregate_assets_summary(metadata: Iterable[Dict[str, Any]]) -> dict:
559562 stats : _stats_type = {}
560563 for meta in metadata :
561564 _add_asset_to_stats (meta , stats )
562-
563565 stats ["numberOfBytes" ] = stats .get ("numberOfBytes" , 0 )
564566 stats ["numberOfFiles" ] = stats .get ("numberOfFiles" , 0 )
565567 stats ["numberOfSubjects" ] = len (stats .pop ("subjects" , [])) or None
568+ if stats ["numberOfSubjects" ]:
569+ # Must not happen. If does -- a bug in software
570+ assert stats ["numberOfFiles" ]
571+ assert stats ["numberOfSubjects" ] <= stats ["numberOfFiles" ]
566572 stats ["numberOfSamples" ] = (
567573 len (stats .pop ("tissuesample" , [])) + len (stats .pop ("slice" , []))
568574 ) or None
0 commit comments