@@ -1373,14 +1373,20 @@ def _prep_volcano_data(
13731373
13741374 if p_thresh is not None :
13751375 # (2) significant by score
1376+ # in contrast to typical applications, we now want to find values below the p_threshold
13761377 df , _ , _ , _ , p_sig_idx = _prep_ratio_data (
1377- df , p_colname , None , p_thresh , signficance_label = "p-value"
1378+ df ,
1379+ p_colname ,
1380+ None ,
1381+ p_thresh ,
1382+ significance_label = "p-value" ,
1383+ direction = "inside" ,
13781384 )
13791385
13801386 if log_fc_thresh is not None :
13811387 # (3) significant above or below fc-thresh
13821388 df , _ , _ , _ , logfc_sig_idx = _prep_ratio_data (
1383- df , log_fc_colname , None , log_fc_thresh , signficance_label = "log2FC"
1389+ df , log_fc_colname , None , log_fc_thresh , significance_label = "log2FC"
13841390 )
13851391
13861392 if p_thresh is not None and log_fc_thresh is not None :
@@ -1398,7 +1404,7 @@ def volcano(
13981404 log_fc_colname : str ,
13991405 p_colname : str = None ,
14001406 score_colname : str = None ,
1401- p_thresh : float | None = 0.05 ,
1407+ p_thresh : float | None | tuple [ float | None ] = 0.05 ,
14021408 log_fc_thresh : float | None = np .log2 (2 ),
14031409 pointsize_colname : str | float = None ,
14041410 pointsize_scaler : float = 1 ,
@@ -1759,7 +1765,21 @@ def volcano(
17591765 ax , ratio_thresh_x = log_fc_thresh , ratio_thresh_y = None
17601766 )
17611767 if p_thresh is not None :
1762- _ratio_plot_style_axes (ax , ratio_thresh_x = None , ratio_thresh_y = p_thresh )
1768+ # convert p value to score
1769+ if isinstance (p_thresh , (int , float )):
1770+ score = - np .log10 (p_thresh )
1771+ elif isinstance (p_thresh , (list , tuple )):
1772+ score = tuple (
1773+ (
1774+ - np .log10 (x ) if isinstance (x , (float , int )) else None
1775+ for x in p_thresh
1776+ )
1777+ )
1778+ else :
1779+ raise ValueError (
1780+ f"[volcano] Cannot convert p_thresh { p_thresh } to score"
1781+ )
1782+ _ratio_plot_style_axes (ax , ratio_thresh_x = None , ratio_thresh_y = score )
17631783
17641784 if ret_fig :
17651785 return fig
@@ -1935,7 +1955,8 @@ def _prep_ratio_data(
19351955 col_name1 : str ,
19361956 col_name2 : str | None ,
19371957 ratio_thresh : tuple [float | None , float | None ] | float | None ,
1938- signficance_label : str = "ratio_thresh" ,
1958+ significance_label : str = "ratio_thresh" ,
1959+ direction : Literal ["inside" , "outside" ] = "outside" ,
19391960) -> tuple [pd .DataFrame , str , str , pd .Index , pd .Index ]:
19401961 """
19411962 Prepare ratio data for analysis.
@@ -1959,8 +1980,11 @@ def _prep_ratio_data(
19591980 lower and upper bounds for the ratio threshold. If a single float is provided,
19601981 it is treated as both the lower and upper bound. If None is included in the tuple,
19611982 that bound is ignored.
1962- signficance_label : str
1983+ significance_label : str
19631984 The label to assign to significant ratios in the 'SigCat' column.
1985+ direction : Literal['inside', 'outside']
1986+ Whether values inside the lower, upper boundary our outside of it are considered significant.
1987+ If a single number is given as threshold, the boundaries are calculates as (-value, value)
19641988
19651989 Returns
19661990 -------
@@ -1983,7 +2007,7 @@ def _prep_ratio_data(
19832007 pass
19842008 else :
19852009 raise ValueError (
1986- f"[prep_ratio_data] The ratio threshold must be a number or a tuple of axactly two numbers but was { type (ratio_thresh )} : { ratio_thresh } ."
2010+ f"[prep_ratio_data] The ratio threshold must be a number or a tuple of exactly two numbers but was { type (ratio_thresh )} : { ratio_thresh } ."
19872011 )
19882012
19892013 if not isinstance (col_name1 , str ):
@@ -2007,42 +2031,66 @@ def _prep_ratio_data(
20072031
20082032 df ["SigCat" ] = "not significant" # default value
20092033 if ratio_thresh is not None :
2010- if not isinstance (ratio_thresh , tuple ):
2034+ if isinstance (ratio_thresh , ( int , float , None ) ):
20112035 ratio_thresh = (- ratio_thresh , ratio_thresh )
2036+ print (
2037+ f"[prep_ratio_data] Setting threshold for { significance_label } to { ratio_thresh } ."
2038+ )
20122039
2013- if ratio_thresh [1 ] is None :
2014- sig_up = pd .Series ([False ] * len (df ), index = df .index )
2040+ if ratio_thresh [0 ] is None :
2041+ sig_lower = pd .Series ([False ] * len (df ), index = df .index )
20152042 else :
2016- sig_up = df [col_name1 ] > ratio_thresh [1 ]
2043+ if direction == "inside" :
2044+ sig_lower = df [col_name1 ] > ratio_thresh [0 ]
2045+ else :
2046+ sig_lower = df [col_name1 ] < ratio_thresh [0 ]
20172047
2018- if ratio_thresh [0 ] is None :
2019- sig_down = pd .Series ([False ] * len (df ), index = df .index )
2048+ if ratio_thresh [1 ] is None :
2049+ sig_upper = pd .Series ([False ] * len (df ), index = df .index )
20202050 else :
2021- sig_down = df [col_name1 ] < ratio_thresh [0 ]
2051+ if direction == "inside" :
2052+ sig_upper = df [col_name1 ] < ratio_thresh [1 ]
2053+ else :
2054+ sig_upper = df [col_name1 ] > ratio_thresh [1 ]
20222055
20232056 if col_name2 is None :
20242057 # significantly up or down
2025- df .loc [sig_up | sig_down , "SigCat" ] = signficance_label
2058+ if direction == "inside" :
2059+ # when the value is inside the interval, both tests must be true
2060+ df .loc [sig_upper & sig_lower , "SigCat" ] = significance_label
2061+ else :
2062+ df .loc [sig_upper | sig_lower , "SigCat" ] = significance_label
20262063 else :
2027-
2028- if ratio_thresh [1 ] is None :
2029- sig_up_2 = pd .Series ([False ] * len (df ), index = df .index )
2064+ if ratio_thresh [0 ] is None :
2065+ sig_lower2 = pd .Series ([False ] * len (df ), index = df .index )
20302066 else :
2031- sig_up_2 = df [col_name2 ] > ratio_thresh [1 ]
2067+ if direction == "inside" :
2068+ sig_lower2 = df [col_name2 ] > ratio_thresh [0 ]
2069+ else :
2070+ sig_lower2 = df [col_name2 ] < ratio_thresh [0 ]
20322071
2033- if ratio_thresh [0 ] is None :
2034- sig_down_2 = pd .Series ([False ] * len (df ), index = df .index )
2072+ if ratio_thresh [1 ] is None :
2073+ sig_upper2 = pd .Series ([False ] * len (df ), index = df .index )
20352074 else :
2036- sig_down_2 = df [col_name2 ] < ratio_thresh [0 ]
2075+ if direction == "inside" :
2076+ sig_upper2 = df [col_name2 ] < ratio_thresh [1 ]
2077+ else :
2078+ sig_upper2 = df [col_name2 ] > ratio_thresh [1 ]
20372079
20382080 # significantly up or down in both
2039- df .loc [
2040- (sig_up & sig_up_2 ) | (sig_down & sig_down_2 ),
2041- "SigCat" ,
2042- ] = signficance_label
2081+ if direction == "inside" :
2082+ df .loc [
2083+ (sig_upper & sig_lower ) & (sig_upper2 & sig_lower2 ),
2084+ "SigCat" ,
2085+ ] = significance_label
2086+ else :
2087+ df .loc [
2088+ (sig_upper & sig_upper2 ) | (sig_lower & sig_lower2 ),
2089+ "SigCat" ,
2090+ ] = significance_label
20432091
20442092 non_sig_idx = df [df ["SigCat" ] == "not significant" ].index
2045- sig_idx = df [df ["SigCat" ] == signficance_label ].index
2093+ sig_idx = df [df ["SigCat" ] == significance_label ].index
20462094
20472095 return df , col_name1 , col_name2 , non_sig_idx , sig_idx
20482096
0 commit comments