1+ function B = baitsample(A ,bait ,prey )
2+
3+ % BAITSAMPLE bait and prey subsampling
4+ %
5+ % Input A: n by n adjacency matrix
6+ % bait: proportion of nodes to sample. Defaults to 0.5.
7+ % prey: proportion of edges to retain from each "bait" node.
8+ % Defaults to 0.5.
9+ %
10+ % Output B: adjacency matrix with the attribute sparse.
11+ % Dimension of B cannot be predicted.
12+ %
13+ % Description: The adjacency matrix A is considered, and a
14+ % proportion, bait, of its rows/columns are retained.
15+ % Of these, a proportion, prey, of the outgoing edges
16+ % are retained. All other entries are set to zero.
17+ % Disconnected nodes are then removed.
18+ %
19+ % Reference: J. Han, D. Dupuy, N. Bertin, M. Cusick, M. Vidal,
20+ % Effect of sampling on topology predictions of
21+ % protein-protein interaction networks,
22+ % Nature Biotechnology 23 (2005), pp. 839-844.
23+ %
24+ % Example: B = baitsample(A,0.1,0.4);
25+
26+ if nargin <= 2
27+ prey = 0.5 ;
28+ if nargin == 1
29+ bait = 0.5 ;
30+ end
31+ end
32+
33+ n = length(A );
34+ B = sparse(n ,n );
35+
36+ rp = randperm(n );
37+ ibait = rp(1 : ceil(bait * n )); % bait rows/columns to be retained
38+
39+ for i = 1 : length(ibait )
40+
41+ B(ibait(i ),: ) = A(ibait(i ),: );
42+ B(: ,ibait(i )) = A(: ,ibait(i ));
43+ iprey = find(B(ibait(i ),: ));
44+ psum = ceil(prey * length(iprey )); % number of prey nodes for ith bait
45+
46+ if length(iprey ) ~= 0
47+ dist = (1 / length(iprey )) : (1 / length(iprey )) : 1 ;
48+
49+ while length(find(B(ibait(i ),: ))) > psum
50+
51+ r = rand ;
52+ pos = min(find(r <= dist ));
53+ B(ibait(i ),iprey(pos ))=0 ;
54+ B(iprey(pos ),ibait(i ))=0 ;
55+
56+ end
57+
58+ end
59+
60+ end
61+
62+ for i = 1 : n % trim isolated nodes
63+ if ~any(B(: ,(n - i + 1 )))
64+ B(: ,(n - i + 1 )) = [];
65+ B((n - i + 1 ),: ) = [];
66+ end
67+ end
0 commit comments