Skip to content

Commit 4b89e4f

Browse files
authored
Merge pull request #173 from dandi/bf-robustify-nsubjects
Count only for the first hit for subject or tissue within filename
2 parents 6b7332c + b749796 commit 4b89e4f

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

dandischema/metadata.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -527,13 +527,16 @@ def _add_asset_to_stats(assetmeta: Dict[str, Any], stats: _stats_type) -> None:
527527
stats = _get_samples(value, stats, hierarchy)
528528
break
529529

530+
# which components already found, so we do not count more than
531+
# once in some incorrectly named datasets
532+
found: Dict[str, str] = {}
530533
for part in Path(assetmeta["path"]).name.split(".")[0].split("_"):
531-
if part.startswith("sub-"):
532-
subject = part.replace("sub-", "")
534+
if not found.get("subject") and part.startswith("sub-"):
535+
found["subject"] = subject = part.split("sub-", 1)[1]
533536
if subject not in stats["subjects"]:
534537
stats["subjects"].append(subject)
535-
if part.startswith("sample-"):
536-
sample = part.replace("sample-", "")
538+
if not found.get("sample") and part.startswith("sample-"):
539+
found["sample"] = sample = part.replace("sample-", "")
537540
if sample not in stats["tissuesample"]:
538541
stats["tissuesample"].append(sample)
539542

@@ -559,10 +562,13 @@ def aggregate_assets_summary(metadata: Iterable[Dict[str, Any]]) -> dict:
559562
stats: _stats_type = {}
560563
for meta in metadata:
561564
_add_asset_to_stats(meta, stats)
562-
563565
stats["numberOfBytes"] = stats.get("numberOfBytes", 0)
564566
stats["numberOfFiles"] = stats.get("numberOfFiles", 0)
565567
stats["numberOfSubjects"] = len(stats.pop("subjects", [])) or None
568+
if stats["numberOfSubjects"]:
569+
# Must not happen. If does -- a bug in software
570+
assert stats["numberOfFiles"]
571+
assert stats["numberOfSubjects"] <= stats["numberOfFiles"]
566572
stats["numberOfSamples"] = (
567573
len(stats.pop("tissuesample", [])) + len(stats.pop("slice", []))
568574
) or None

0 commit comments

Comments
 (0)