Skip to content

Commit d156d39

Browse files
authored
Base proportion of assigned reads on counts instead of transcripts table (#123)
1 parent a00150f commit d156d39

File tree

1 file changed

+50
-6
lines changed

1 file changed

+50
-6
lines changed

src/metrics/quality/quality_metrics.py

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1+
import numpy as np
12
import pandas as pd
23
import spatialdata as sd
34

45

5-
66
def proportion_of_assigned_reads(
77
sdata: sd.SpatialData,
88
) -> [float, pd.Series]:
@@ -22,10 +22,8 @@ def proportion_of_assigned_reads(
2222
2323
"""
2424

25-
sdata['transcripts']['assigned'] = sdata['transcripts']['cell_id'] != 0
26-
2725
# Proportion of assigned reads
28-
prop_of_assigned_reads = float(((sdata['transcripts']['assigned']).sum() / len(sdata['transcripts'])).compute())
26+
prop_of_assigned_reads = float(sdata["counts"].layers["counts"].sum() / len(sdata['transcripts']))
2927

3028
# Proportion of assigned reads per gene
3129
if prop_of_assigned_reads == 1.0:
@@ -39,10 +37,56 @@ def proportion_of_assigned_reads(
3937
data=0.0
4038
)
4139
else:
42-
df = pd.crosstab(sdata['transcripts']['feature_name'], sdata['transcripts']['assigned'])
43-
prop_of_assigned_reads_per_gene = df[True] / (df[False] + df[True])
40+
genes, counts = np.unique(sdata['transcripts']['feature_name'], return_counts=True)
41+
df = pd.DataFrame(index=genes, data = {"fraction":0, "count":counts, "count_assigned":0})
42+
df.loc[sdata["counts"].var_names, "count_assigned"] = np.array(sdata["counts"].layers["counts"].sum(axis=0))[0,:]
43+
df["fraction"] = df["count_assigned"] / df["count"]
44+
prop_of_assigned_reads_per_gene = df["fraction"]
4445

4546
return prop_of_assigned_reads, prop_of_assigned_reads_per_gene
47+
48+
# Previous version only based on transcripts table.
49+
#
50+
#def proportion_of_assigned_reads(
51+
# sdata: sd.SpatialData,
52+
#) -> [float, pd.Series]:
53+
# """ Calculate the proportion of assigned reads
54+
#
55+
# Parameters
56+
# ----------
57+
# sdata : sd.SpatialData
58+
# SpatialData object with sdata['transcripts'] including the column 'cell_id'
59+
#
60+
# Returns
61+
# -------
62+
# float
63+
# Proportion of assigned reads
64+
# pd.Series
65+
# Proportion of assigned reads per gene
66+
#
67+
# """
68+
#
69+
# sdata['transcripts']['assigned'] = sdata['transcripts']['cell_id'] != 0
70+
#
71+
# # Proportion of assigned reads
72+
# prop_of_assigned_reads = float(((sdata['transcripts']['assigned']).sum() / len(sdata['transcripts'])).compute())
73+
#
74+
# # Proportion of assigned reads per gene
75+
# if prop_of_assigned_reads == 1.0:
76+
# prop_of_assigned_reads_per_gene = pd.Series(
77+
# index=sdata['transcripts']['feature_name'].unique().compute().values,
78+
# data=1.0
79+
# )
80+
# elif prop_of_assigned_reads == 0.0:
81+
# prop_of_assigned_reads_per_gene = pd.Series(
82+
# index=sdata['transcripts']['feature_name'].unique().compute().values,
83+
# data=0.0
84+
# )
85+
# else:
86+
# df = pd.crosstab(sdata['transcripts']['feature_name'], sdata['transcripts']['assigned'])
87+
# prop_of_assigned_reads_per_gene = df[True] / (df[False] + df[True])
88+
#
89+
# return prop_of_assigned_reads, prop_of_assigned_reads_per_gene
4690

4791

4892

0 commit comments

Comments
 (0)