-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Labels
enhancementNew feature or requestNew feature or request
Description
import numpy as np
import anndata as ad
import polars as pl
from glob import glob
from geomux import gaussian_mixture
def process_mixture(crispr_path: str, gex_path: str):
# Load anndata
cr_adata = ad.read_h5ad(crispr_path)
# Process mixture
experiments = cr_adata.obs.experiment.unique()
barcodes = cr_adata.obs.bc_idx.unique()
lanes = cr_adata.obs.lane_id.unique()
assignments = []
for exp in experiments:
for bc_idx in barcodes:
for lane_id in lanes:
subset = cr_adata[
(cr_adata.obs.bc_idx == bc_idx)
& (cr_adata.obs.lane_id == lane_id)
& (cr_adata.obs.experiment == exp)
]
subset_assignment = gaussian_mixture(subset).with_columns(
bc_idx=pl.lit(bc_idx).str.replace("CR", "BC"),
lane_id=pl.lit(lane_id),
experiment=pl.lit(exp),
)
assignments.append(subset_assignment)
assignments = pl.concat(assignments)
# load gex as backed
gex_adata = ad.read_h5ad(gex_path, backed="r")
gex_adata.obs["cell"] = gex_adata.obs.index.values
# merge the two
df = assignments.drop(["cell_id", "guide_ids_original"]).to_pandas()
gex_adata.obs = gex_adata.obs.merge(
df,
how="left",
on=["cell", "bc_idx", "lane_id", "experiment"],
suffixes=["_geomux", "_mixture"],
)
gex_adata.obs["assignment"] = gex_adata.obs["assignment_mixture"]
gex_adata.obs["moi"] = gex_adata.obs["moi_mixture"]
gex_adata.obs["umis"] = gex_adata.obs["umis_mixture"]
# write gex out
gex_adata.write_h5ad(
gex_path.replace("_gex.h5ad", "_gex-mixture.h5ad")
)
for crispr_path in glob("./aggr/*/*_crispr.h5ad"):
gex_path = crispr_path.replace("_crispr.h5ad", "_gex.h5ad")
print(f"Processing: {crispr_path} and {gex_path}")
process_mixture(crispr_path, gex_path)
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request