Skip to content

Commit 57d7f5f

Browse files
authored
Merge branch 'main' into Adding-mouse-immune-dictionary-ANOVA
2 parents 35b15d8 + 77661f8 commit 57d7f5f

File tree

3 files changed

+152
-128
lines changed

3 files changed

+152
-128
lines changed

pf2rnaseq/factorization.py

Lines changed: 123 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
import anndata
22
import cupy
33
import numpy as np
4+
import pandas as pd
5+
import scanpy as sc
46
import scipy.sparse as sps
57
from pacmap import PaCMAP
68
from parafac2.parafac2 import parafac2_nd, store_pf2
79
from scipy.stats import gmean
810
from sklearn.decomposition import PCA
911
from sklearn.linear_model import LinearRegression
12+
from tensorly.cp_tensor import CPTensor
13+
from tlviz.factor_tools import factor_match_score as fms
1014
from tqdm import tqdm
1115

1216

@@ -41,7 +45,13 @@ def pf2(
4145
):
4246
cupy.cuda.Device(1).use()
4347
pf_out, R2X = parafac2_nd(
44-
X, rank=rank, random_state=random_state, tol=tolerance, n_iter_max=500
48+
49+
X,
50+
rank=rank,
51+
random_state=random_state,
52+
tol=tolerance,
53+
n_iter_max=500,
54+
4555
)
4656

4757
X = store_pf2(X, pf_out)
@@ -70,3 +80,115 @@ def pf2_pca_r2x(X: anndata.AnnData, ranks):
7080
r2x_pca = np.cumsum(pca.explained_variance_ratio_)
7181

7282
return r2x_pf2, r2x_pca[np.array(ranks) - 1]
83+
84+
85+
def calculateFMS(A: anndata.AnnData, B: anndata.AnnData):
86+
"""Calculates FMS between 2 factors"""
87+
factors = [A.uns["Pf2_A"], A.uns["Pf2_B"], A.varm["Pf2_C"]]
88+
A_CP = CPTensor(
89+
(
90+
A.uns["Pf2_weights"],
91+
factors,
92+
)
93+
)
94+
95+
factors = [B.uns["Pf2_A"], B.uns["Pf2_B"], B.varm["Pf2_C"]]
96+
B_CP = CPTensor(
97+
(
98+
B.uns["Pf2_weights"],
99+
factors,
100+
)
101+
)
102+
103+
return fms(A_CP, B_CP, consider_weights=False, skip_mode=1) # type: ignore
104+
105+
106+
def fms_percent_drop(
107+
X: anndata.AnnData,
108+
percentList: np.ndarray,
109+
runs: int,
110+
rank: int = 30,
111+
):
112+
# Plots FMS score when percentage is removed from data
113+
dataX = pf2(X, rank, doEmbedding=False)
114+
115+
fmsLists = []
116+
117+
for j in range(0, runs, 1):
118+
scores = [1.0]
119+
120+
for i in percentList[1:]:
121+
sampled_data: anndata.AnnData = sc.pp.subsample(
122+
X, fraction=1 - (i / 100), random_state=j, copy=True
123+
) # type: ignore
124+
sampledX = pf2(sampled_data, rank, random_state=j + 2, doEmbedding=False)
125+
126+
fmsScore = calculateFMS(dataX, sampledX)
127+
scores.append(fmsScore)
128+
129+
fmsLists.append(scores)
130+
131+
runsList_df = []
132+
for i in range(0, runs):
133+
for _j in range(0, len(percentList)):
134+
runsList_df.append(i)
135+
percentList_df = []
136+
for _i in range(0, runs):
137+
for j in range(0, len(percentList)):
138+
percentList_df.append(percentList[j])
139+
fmsList_df = []
140+
for sublist in fmsLists:
141+
fmsList_df += sublist
142+
df = pd.DataFrame(
143+
{
144+
"Run": runsList_df,
145+
"Percentage of Data Dropped": percentList_df,
146+
"FMS": fmsList_df,
147+
}
148+
)
149+
150+
return df
151+
152+
153+
def resample(data: anndata.AnnData) -> anndata.AnnData:
154+
"""Bootstrapping dataset"""
155+
indices = np.random.randint(0, data.shape[0], size=(data.shape[0],))
156+
data = data[indices].copy()
157+
return data
158+
159+
160+
def fms_diff_ranks(
161+
X: anndata.AnnData,
162+
ranksList: list[int],
163+
runs: int,
164+
):
165+
# Plots FMS when using different Pf2 components
166+
fmsLists = []
167+
168+
for j in range(0, runs, 1):
169+
scores = []
170+
for i in ranksList:
171+
dataX = pf2(X, rank=i, random_state=j, doEmbedding=False)
172+
173+
sampledX = pf2(resample(X), rank=i, random_state=j, doEmbedding=False)
174+
175+
fmsScore = calculateFMS(dataX, sampledX)
176+
scores.append(fmsScore)
177+
fmsLists.append(scores)
178+
179+
runsList_df = []
180+
for i in range(0, runs):
181+
for _j in range(0, len(ranksList)):
182+
runsList_df.append(i)
183+
ranksList_df = []
184+
for _i in range(0, runs):
185+
for j in range(0, len(ranksList)):
186+
ranksList_df.append(ranksList[j])
187+
fmsList_df = []
188+
for sublist in fmsLists:
189+
fmsList_df += sublist
190+
df = pd.DataFrame(
191+
{"Run": runsList_df, "Component": ranksList_df, "FMS": fmsList_df}
192+
)
193+
194+
return df

pf2rnaseq/figures/commonFuncs/plotGeneral.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import seaborn as sns
77
from matplotlib.axes import Axes
88

9-
from ...factorization import pf2_pca_r2x
9+
from ...factorization import fms_percent_drop, pf2_pca_r2x, fms_diff_ranks
1010

1111

1212
def plot_r2x(data, rank_vec, ax: Axes):
@@ -439,3 +439,24 @@ def plot_boxplot_gene_celltype(
439439
ax.set(title=gene)
440440
ax.set_xticks(ax.get_xticks())
441441
ax.set_xticklabels(labels=ax.get_xticklabels(), rotation=45)
442+
443+
444+
def plot_fms_diff_ranks(
445+
X: anndata.AnnData,
446+
ax: Axes,
447+
ranksList: list[int],
448+
runs=3,
449+
):
450+
"""Plots FMS when using different Pf2 components"""
451+
df = fms_diff_ranks(X, ranksList, runs)
452+
sns.lineplot(data=df, x="Component", y="FMS", ax=ax)
453+
ax.set_ylim(0, 1)
454+
455+
456+
def plot_fms_percent_drop(
457+
X: anndata.AnnData, ax: Axes, percentList: np.ndarray, runs=3, rank: int = 30
458+
):
459+
"""Plots FMS when dropping different percentages of data"""
460+
df = fms_percent_drop(X, percentList, runs, rank)
461+
sns.lineplot(data=df, x="Percentage of Data Dropped", y="FMS", ax=ax)
462+
ax.set_ylim(0, 1)
Lines changed: 7 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,16 @@
11
"""
22
factorization score
3+
34
"""
45

5-
import anndata
66
import numpy as np
7-
import pandas as pd
8-
import scanpy as sc
9-
import seaborn as sns
10-
from matplotlib.axes import Axes
11-
from tensorly.cp_tensor import CPTensor
12-
from tlviz.factor_tools import factor_match_score as fms
137

14-
from ..factorization import pf2
158
from ..imports import import_Heiser
169
from .common import getSetup, subplotLabel
10+
from .commonFuncs.plotGeneral import (
11+
plot_fms_diff_ranks,
12+
plot_fms_percent_drop,
13+
)
1714

1815

1916
def makeFigure():
@@ -22,125 +19,9 @@ def makeFigure():
2219

2320
X = import_Heiser()
2421
percentList = np.arange(0.0, 55.0, 5.0)
25-
# plot_fms_percent_drop(X, ax[0], percentList=percentList, runs=2)
22+
plot_fms_percent_drop(X, ax[0], percentList=percentList, runs=2, rank=30)
2623

27-
ranks = list(range(30, 51))
24+
ranks = list(range(1, 31))
2825
plot_fms_diff_ranks(X, ax[1], ranksList=ranks, runs=2)
2926

3027
return f
31-
32-
33-
def calculateFMS(A: anndata.AnnData, B: anndata.AnnData):
34-
"""Calculates FMS between 2 factors"""
35-
factors = [A.uns["Pf2_A"], A.uns["Pf2_B"], A.varm["Pf2_C"]]
36-
A_CP = CPTensor(
37-
(
38-
A.uns["Pf2_weights"],
39-
factors,
40-
)
41-
)
42-
43-
factors = [B.uns["Pf2_A"], B.uns["Pf2_B"], B.varm["Pf2_C"]]
44-
B_CP = CPTensor(
45-
(
46-
B.uns["Pf2_weights"],
47-
factors,
48-
)
49-
)
50-
51-
return fms(A_CP, B_CP, consider_weights=False, skip_mode=1) # type: ignore
52-
53-
54-
def plot_fms_percent_drop(
55-
X: anndata.AnnData,
56-
ax: Axes,
57-
percentList: np.ndarray,
58-
runs: int,
59-
rank: int = 30,
60-
):
61-
# Plots FMS score when percentage is removed from data
62-
dataX = pf2(X, rank, doEmbedding=False)
63-
64-
fmsLists = []
65-
66-
for j in range(0, runs, 1):
67-
scores = [1.0]
68-
69-
for i in percentList[1:]:
70-
sampled_data: anndata.AnnData = sc.pp.subsample(
71-
X, fraction=1 - (i / 100), random_state=j, copy=True
72-
) # type: ignore
73-
sampledX = pf2(sampled_data, rank, random_state=j + 2, doEmbedding=False)
74-
75-
fmsScore = calculateFMS(dataX, sampledX)
76-
scores.append(fmsScore)
77-
78-
fmsLists.append(scores)
79-
80-
runsList_df = []
81-
for i in range(0, runs):
82-
for j in range(0, len(percentList)):
83-
runsList_df.append(i)
84-
percentList_df = []
85-
for i in range(0, runs):
86-
for j in range(0, len(percentList)):
87-
percentList_df.append(percentList[j])
88-
fmsList_df = []
89-
for sublist in fmsLists:
90-
fmsList_df += sublist
91-
df = pd.DataFrame(
92-
{
93-
"Run": runsList_df,
94-
"Percentage of Data Dropped": percentList_df,
95-
"FMS": fmsList_df,
96-
}
97-
)
98-
99-
sns.lineplot(data=df, x="Percentage of Data Dropped", y="FMS", ax=ax)
100-
ax.set_ylim(0, 1)
101-
102-
103-
def resample(data: anndata.AnnData) -> anndata.AnnData:
104-
"""Bootstrapping dataset"""
105-
indices = np.random.randint(0, data.shape[0], size=(data.shape[0],))
106-
data = data[indices].copy()
107-
return data
108-
109-
110-
def plot_fms_diff_ranks(
111-
X: anndata.AnnData,
112-
ax: Axes,
113-
ranksList: list[int],
114-
runs: int,
115-
):
116-
# Plots FMS when using different Pf2 components
117-
fmsLists = []
118-
119-
for j in range(0, runs, 1):
120-
scores = []
121-
for i in ranksList:
122-
dataX = pf2(X, rank=i, random_state=j, doEmbedding=False)
123-
124-
sampledX = pf2(resample(X), rank=i, random_state=j, doEmbedding=False)
125-
126-
fmsScore = calculateFMS(dataX, sampledX)
127-
scores.append(fmsScore)
128-
fmsLists.append(scores)
129-
130-
runsList_df = []
131-
for i in range(0, runs):
132-
for j in range(0, len(ranksList)):
133-
runsList_df.append(i)
134-
ranksList_df = []
135-
for i in range(0, runs):
136-
for j in range(0, len(ranksList)):
137-
ranksList_df.append(ranksList[j])
138-
fmsList_df = []
139-
for sublist in fmsLists:
140-
fmsList_df += sublist
141-
df = pd.DataFrame(
142-
{"Run": runsList_df, "Component": ranksList_df, "FMS": fmsList_df}
143-
)
144-
145-
sns.lineplot(data=df, x="Component", y="FMS", ax=ax)
146-
ax.set_ylim(0, 1)

0 commit comments

Comments
 (0)