@@ -166,6 +166,7 @@ def aggregate_concat(metadata=None, directory:Union[_PathLike, List[_PathLike]]=
166166 scrub_tbl = {}
167167 h5ad_tbl = {}
168168 bad = []
169+ var_same = None
169170 with sw ("Reading H5AD files" ):
170171 for sample in tqdm (metadata .index .values ):
171172 gc .collect ()
@@ -183,6 +184,15 @@ def aggregate_concat(metadata=None, directory:Union[_PathLike, List[_PathLike]]=
183184 if adata is None :
184185 bad .append (sample )
185186 else :
187+ if var_same is None :
188+ var_same = adata .var .copy ()
189+ else :
190+ ### Idea: Keep one .var that keeps track of when the same items are there
191+ ### If the column values are all the same, save and delete in all anndata
192+ ###
193+ var_same = anndata .concat ([anndata .AnnData (var = var_same ), anndata .AnnData (var = adata .var )], merge = "same" ).var
194+ for cn in var_same .columns :
195+ del adata .var [cn ]
186196 adata_tbl [sample ] = adata
187197 fname_tbl [sample ] = fname
188198 if "H5AD" in adata .uns and adata .uns ["H5AD" ].get ("sample_key" , "" ) == sample_key :
@@ -208,6 +218,8 @@ def aggregate_concat(metadata=None, directory:Union[_PathLike, List[_PathLike]]=
208218 tk = list (adata_tbl .keys ())
209219 del adata_tbl
210220 gc .collect ()
221+ for vn in var_same .columns :
222+ adata .var [vn ] = var_same [vn ]
211223 if len (tk ) == len (scrub_tbl .keys ()):
212224 adata .uns ["scrublet" ] = {"batches" : scrub_tbl ,
213225 "batched_by" : sample_key }
0 commit comments