BHscore/bhscore.py at master · KamitaniLab/BHscore · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
'''A Module to compute the BH score.

Author: Souma Nonaka, Shuntaro C. Aoki
'''


import os

import numpy as np
from scipy.stats import spearmanr, t


def compute_bhscore(predacc_list, pval=0.05, return_top_rois=False):
    """Compute a BH score of a given DNN.

    Parameters
    ----------
    predacc_list : list of arrays
        List of prediction accuracies for a DNN. Each array contains
        prediction accuracies of individual units in a layer, formed as an
         array of ROIs x units.
    pval : float, default = 0.05
        P-value threshold in unit selection.
    return_top_rois : bool, default = False
        Returns top ROIs if True.

    Returns
    -------
    bhscore : float
    top_rois: list of arrays
    """

    top_rois = []
    for predacc in predacc_list:
        # if prediction accuracy is nan, convert it to zero
        predacc[np.isnan(predacc)] = 0

        # for each CNN units, search roi which has the highest prediction accuracy
        pred_max = np.max(predacc, axis=0)
        pred_max_ind = np.argmax(predacc, axis=0)

        # compute p value of the highest decoding accuracy
        tmp = np.sqrt((50 - 2) * (1 - pred_max ** 2))
        tmp = pred_max * tmp
        pvals = 2 * (1 - t.cdf(tmp, df=50 - 2))

        # keep unit with p value < threshold and acc > 0
        threshold = pvals < pval
        plus_unit = pred_max > 0
        select_unit_ind = np.logical_and(threshold, plus_unit)
        pred_max_ind = pred_max_ind[select_unit_ind]

        top_rois.append(pred_max_ind)

    # get layer numbers of each unit. concatenate best ROIs for all layers
    layer_numbers = []
    best_roi_flatten = []
    for i_br, br in enumerate(top_rois):
        layer_numbers.extend(np.repeat(i_br + 1, len(br)))
        best_roi_flatten.extend(br)

    # compute Spearman's rank correlation
    bhscore, _ = spearmanr(layer_numbers, best_roi_flatten)

    if return_top_rois:
        return bhscore, top_rois
    else:
        return bhscore


def compute_bhscore_layerselect(predacc_list, pval=0.05, n_layers=5,
                                n_repeat=100, return_top_rois=False):
    """Compute a BH score of a given DNN, random layer selection version.

    Parameters
    ----------
    predacc_list : list of arrays
        List of prediction accuracies for a DNN. Each array contains
        prediction accuracies of individual units in a layer, formed as an
         array of ROIs x units.
    pval : float, default = 0.05
        P-value threshold in unit selection.
    n_layers : int, default = 5
        The number of layers used to compute the BH score. Note that the first
        and last layers are always included in the computation. Thus,
        (n_layers - 2) layers are randomly selected from the representative
        layers except the first and last ones.
    n_repeat : int, default = 100
        The number of random layer selection.
    return_top_rois : bool, default = False
        Returns top ROIs if True.

    Returns
    -------
    bhscore_list : arary of float
    top_rois_list : list of list of arrays
    """

    bhscore_list = np.zeros(n_repeat)
    top_rois_list = []
    for i_s in range(n_repeat):
        # sample layers
        sample_index = np.random.choice(np.arange(1, len(predacc_list)-1), size=n_layers - 2, replace=False)
        sample_index = np.sort(sample_index)
        predacc_list_sampled = [predacc_list[0]] + [predacc_list[i] for i in sample_index] + [predacc_list[-1]]

        bhscore, top_rois = compute_bhscore(predacc_list_sampled, pval, return_top_rois=True)
        bhscore_list[i_s] = bhscore
        top_rois_list.append(top_rois)

    if return_top_rois:
        return bhscore_list, top_rois_list
    else:
        return bhscore_list


if __name__ == '__main__':
    pass