Skip to content

include process mixture (GMM assignment) as a standalone function #34

@noamteyssier

Description

@noamteyssier
import numpy as np
import anndata as ad
import polars as pl
from glob import glob

from geomux import gaussian_mixture

def process_mixture(crispr_path: str, gex_path: str):
    # Load anndata
    cr_adata = ad.read_h5ad(crispr_path)

    # Process mixture
    experiments = cr_adata.obs.experiment.unique()
    barcodes = cr_adata.obs.bc_idx.unique()
    lanes = cr_adata.obs.lane_id.unique()

    assignments = []
    for exp in experiments:
        for bc_idx in barcodes:
            for lane_id in lanes:
                subset = cr_adata[
                    (cr_adata.obs.bc_idx == bc_idx)
                    & (cr_adata.obs.lane_id == lane_id)
                    & (cr_adata.obs.experiment == exp)
                ]
                subset_assignment = gaussian_mixture(subset).with_columns(
                    bc_idx=pl.lit(bc_idx).str.replace("CR", "BC"),
                    lane_id=pl.lit(lane_id),
                    experiment=pl.lit(exp),
                )
                assignments.append(subset_assignment)
    assignments = pl.concat(assignments)

    # load gex as backed
    gex_adata = ad.read_h5ad(gex_path, backed="r")
    gex_adata.obs["cell"] = gex_adata.obs.index.values

    # merge the two
    df = assignments.drop(["cell_id", "guide_ids_original"]).to_pandas()
    gex_adata.obs = gex_adata.obs.merge(
        df,
        how="left",
        on=["cell", "bc_idx", "lane_id", "experiment"],
        suffixes=["_geomux", "_mixture"],
    )
    gex_adata.obs["assignment"] = gex_adata.obs["assignment_mixture"]
    gex_adata.obs["moi"] = gex_adata.obs["moi_mixture"]
    gex_adata.obs["umis"] = gex_adata.obs["umis_mixture"]

    # write gex out
    gex_adata.write_h5ad(
        gex_path.replace("_gex.h5ad", "_gex-mixture.h5ad")
    )



for crispr_path in glob("./aggr/*/*_crispr.h5ad"):
    gex_path = crispr_path.replace("_crispr.h5ad", "_gex.h5ad")
    print(f"Processing: {crispr_path} and {gex_path}")
    process_mixture(crispr_path, gex_path)

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions