@@ -400,7 +400,7 @@ task build_vcf_shard_mt {
400400 }
401401
402402 runtime {
403- docker : "us.gcr.io/broad-gotc-prod/aou-mitochondrial-combine-vcfs-covdb:1.0.0 "
403+ docker : "us.gcr.io/broad-gotc-prod/aou-mitochondrial-combine-vcfs-covdb:1.0.1 "
404404 memory : memory_gb + " GB"
405405 cpu : cpu
406406 disks : "local-disk " + disk_gb + " " + disk_type
@@ -599,7 +599,7 @@ task merge_mt_shards {
599599 }
600600
601601 runtime {
602- docker : "us.gcr.io/broad-gotc-prod/aou-mitochondrial-combine-vcfs-covdb:1.0.0 "
602+ docker : "us.gcr.io/broad-gotc-prod/aou-mitochondrial-combine-vcfs-covdb:1.0.1 "
603603 memory : memory_gb + " GB"
604604 cpu : cpu
605605 disks : "local-disk " + disk_gb + " " + disk_type
@@ -699,7 +699,7 @@ task finalize_mt_with_covdb {
699699 }
700700
701701 runtime {
702- docker : "us.gcr.io/broad-gotc-prod/aou-mitochondrial-combine-vcfs-covdb:1.0.0 "
702+ docker : "us.gcr.io/broad-gotc-prod/aou-mitochondrial-combine-vcfs-covdb:1.0.1 "
703703 memory : memory_gb + " GB"
704704 cpu : cpu
705705 disks : "local-disk " + disk_gb + " " + disk_type
@@ -843,17 +843,17 @@ task process_tsv_files {
843843 if filtered_df.shape[0] != df.shape[0]:
844844 raise ValueError("Filtered DataFrame does not have the same number of samples as the original.")
845845
846- # Calculate age
847- filtered_df['date_of_birth'] = pd.to_datetime(filtered_df['date_of_birth'])
848- filtered_df['biosample_collection_date'] = pd.to_datetime(filtered_df['biosample_collection_date'])
846+ # Calculate age (allow missing/invalid dates to yield NaN)
847+ filtered_df['date_of_birth'] = pd.to_datetime(filtered_df['date_of_birth'], errors="coerce")
848+ filtered_df['biosample_collection_date'] = pd.to_datetime(
849+ filtered_df['biosample_collection_date'], errors="coerce"
850+ )
849851 filtered_df['age'] = pd.to_numeric(
850- np.floor((filtered_df['biosample_collection_date'] - filtered_df['date_of_birth']).dt.days / 365)
852+ np.floor((filtered_df['biosample_collection_date'] - filtered_df['date_of_birth']).dt.days / 365),
853+ errors="coerce"
851854 )
852-
853- # Age must be an int and must be present
854- filtered_df['age'] = filtered_df['age'].astype(int)
855- if filtered_df['age'].isna().any():
856- raise ValueError("Unexpected missing ages detected.")
855+ # Use pandas nullable integer dtype so NaN values are preserved.
856+ filtered_df['age'] = filtered_df['age'].astype("Int64")
857857
858858 # Rename columns for compatibility
859859 filtered_df.rename(columns={"mean_coverage": "wgs_mean_coverage"}, inplace=True)
@@ -1013,7 +1013,7 @@ task combine_vcfs_and_homref_from_covdb {
10131013
10141014 runtime {
10151015 # NOTE: This must be a Hail-capable image with mtSwirl code baked in at /opt/mtSwirl.
1016- docker : "us.gcr.io/broad-gotc-prod/aou-mitochondrial-combine-vcfs-covdb:1.0.0 "
1016+ docker : "us.gcr.io/broad-gotc-prod/aou-mitochondrial-combine-vcfs-covdb:1.0.1 "
10171017 memory : memory_gb + " GB"
10181018 cpu : cpu
10191019 disks : "local-disk " + disk_gb + " " + disk_type
@@ -1104,7 +1104,7 @@ task add_annotations {
11041104 }
11051105
11061106 runtime {
1107- docker : "us.gcr.io/broad-gotc-prod/aou-mitochondrial-combine-vcfs-covdb:1.0.0 "
1107+ docker : "us.gcr.io/broad-gotc-prod/aou-mitochondrial-combine-vcfs-covdb:1.0.1 "
11081108 memory : memory_gb + " GB"
11091109 cpu : cpu
11101110 disks : "local-disk " + disk_gb + " " + disk_type
0 commit comments