1+ import numpy as np
12import pandas as pd
23import spatialdata as sd
34
45
5-
66def proportion_of_assigned_reads (
77 sdata : sd .SpatialData ,
88) -> [float , pd .Series ]:
@@ -22,10 +22,8 @@ def proportion_of_assigned_reads(
2222
2323 """
2424
25- sdata ['transcripts' ]['assigned' ] = sdata ['transcripts' ]['cell_id' ] != 0
26-
2725 # Proportion of assigned reads
28- prop_of_assigned_reads = float ((( sdata ['transcripts' ][ 'assigned' ]) .sum () / len (sdata ['transcripts' ])). compute ( ))
26+ prop_of_assigned_reads = float (sdata ["counts" ]. layers [ "counts" ] .sum () / len (sdata ['transcripts' ]))
2927
3028 # Proportion of assigned reads per gene
3129 if prop_of_assigned_reads == 1.0 :
@@ -39,10 +37,56 @@ def proportion_of_assigned_reads(
3937 data = 0.0
4038 )
4139 else :
42- df = pd .crosstab (sdata ['transcripts' ]['feature_name' ], sdata ['transcripts' ]['assigned' ])
43- prop_of_assigned_reads_per_gene = df [True ] / (df [False ] + df [True ])
40+ genes , counts = np .unique (sdata ['transcripts' ]['feature_name' ], return_counts = True )
41+ df = pd .DataFrame (index = genes , data = {"fraction" :0 , "count" :counts , "count_assigned" :0 })
42+ df .loc [sdata ["counts" ].var_names , "count_assigned" ] = np .array (sdata ["counts" ].layers ["counts" ].sum (axis = 0 ))[0 ,:]
43+ df ["fraction" ] = df ["count_assigned" ] / df ["count" ]
44+ prop_of_assigned_reads_per_gene = df ["fraction" ]
4445
4546 return prop_of_assigned_reads , prop_of_assigned_reads_per_gene
47+
48+ # Previous version only based on transcripts table.
49+ #
50+ #def proportion_of_assigned_reads(
51+ # sdata: sd.SpatialData,
52+ #) -> [float, pd.Series]:
53+ # """ Calculate the proportion of assigned reads
54+ #
55+ # Parameters
56+ # ----------
57+ # sdata : sd.SpatialData
58+ # SpatialData object with sdata['transcripts'] including the column 'cell_id'
59+ #
60+ # Returns
61+ # -------
62+ # float
63+ # Proportion of assigned reads
64+ # pd.Series
65+ # Proportion of assigned reads per gene
66+ #
67+ # """
68+ #
69+ # sdata['transcripts']['assigned'] = sdata['transcripts']['cell_id'] != 0
70+ #
71+ # # Proportion of assigned reads
72+ # prop_of_assigned_reads = float(((sdata['transcripts']['assigned']).sum() / len(sdata['transcripts'])).compute())
73+ #
74+ # # Proportion of assigned reads per gene
75+ # if prop_of_assigned_reads == 1.0:
76+ # prop_of_assigned_reads_per_gene = pd.Series(
77+ # index=sdata['transcripts']['feature_name'].unique().compute().values,
78+ # data=1.0
79+ # )
80+ # elif prop_of_assigned_reads == 0.0:
81+ # prop_of_assigned_reads_per_gene = pd.Series(
82+ # index=sdata['transcripts']['feature_name'].unique().compute().values,
83+ # data=0.0
84+ # )
85+ # else:
86+ # df = pd.crosstab(sdata['transcripts']['feature_name'], sdata['transcripts']['assigned'])
87+ # prop_of_assigned_reads_per_gene = df[True] / (df[False] + df[True])
88+ #
89+ # return prop_of_assigned_reads, prop_of_assigned_reads_per_gene
4690
4791
4892
0 commit comments