Skip to content

Commit c7af345

Browse files
Use anndata.concat to keep shared columns; delete rest
1 parent 9820f38 commit c7af345

File tree

1 file changed

+12
-0
lines changed

1 file changed

+12
-0
lines changed

benj/aggregate.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ def aggregate_concat(metadata=None, directory:Union[_PathLike, List[_PathLike]]=
166166
scrub_tbl = {}
167167
h5ad_tbl = {}
168168
bad = []
169+
var_same = None
169170
with sw("Reading H5AD files"):
170171
for sample in tqdm(metadata.index.values):
171172
gc.collect()
@@ -183,6 +184,15 @@ def aggregate_concat(metadata=None, directory:Union[_PathLike, List[_PathLike]]=
183184
if adata is None:
184185
bad.append(sample)
185186
else:
187+
if var_same is None:
188+
var_same = adata.var.copy()
189+
else:
190+
### Idea: Keep one .var that keeps track of when the same items are there
191+
### If the column values are all the same, save and delete in all anndata
192+
###
193+
var_same = anndata.concat([anndata.AnnData(var=var_same), anndata.AnnData(var=adata.var)], merge="same").var
194+
for cn in var_same.columns:
195+
del adata.var[cn]
186196
adata_tbl[sample] = adata
187197
fname_tbl[sample] = fname
188198
if "H5AD" in adata.uns and adata.uns["H5AD"].get("sample_key", "") == sample_key:
@@ -208,6 +218,8 @@ def aggregate_concat(metadata=None, directory:Union[_PathLike, List[_PathLike]]=
208218
tk = list(adata_tbl.keys())
209219
del adata_tbl
210220
gc.collect()
221+
for vn in var_same.columns:
222+
adata.var[vn] = var_same[vn]
211223
if len(tk) == len(scrub_tbl.keys()):
212224
adata.uns["scrublet"] = {"batches": scrub_tbl,
213225
"batched_by": sample_key}

0 commit comments

Comments
 (0)