4
4
#-----------------------------------------------------------------------------
5
5
# Imports
6
6
#-----------------------------------------------------------------------------
7
- from __future__ import print_function
8
7
9
8
import warnings
10
-
11
9
import numpy as np
12
10
import networkx as nx
13
11
@@ -40,15 +38,22 @@ def slice_data(data, sub, block, subcond=None):
40
38
41
39
42
40
def format_matrix (data ,s ,b ,lk ,co ,idc = [],costlist = [],nouptri = False ):
43
- """ Function which formats matrix for a particular subject and particular block (thresholds, upper-tris it) so that we can make a graph object out of it
41
+ """ Function which thresholds the adjacency matrix for a particular
42
+ subject and particular block, using lookuptable to find thresholds,
43
+ cost value to find threshold, costlist
44
+ (thresholds, upper-tris it) so that we can use it with simulated annealing
44
45
45
- Parameters:
46
+ Parameters
46
47
-----------
47
- data = full data array
48
+ data : full data array 4D (block, sub, node, node)
48
49
s = subject
49
50
b = block
50
51
lk = lookup table for study
51
52
co = cost value to threshold at
53
+ idc = index of ideal cost
54
+ costlist = list (size num_edges) with ordered values used to find
55
+ threshold to control number of edges
56
+ nouptri = if False only keeps upper tri, True yields symmetric matrix
52
57
"""
53
58
54
59
cmat = data [b ,s ]
@@ -65,15 +70,28 @@ def format_matrix(data,s,b,lk,co,idc = [],costlist=[],nouptri = False):
65
70
def format_matrix2 (data ,s ,sc ,c ,lk ,co ,idc = [],costlist = [],nouptri = False ):
66
71
""" Function which formats matrix for a particular subject and particular block (thresholds, upper-tris it) so that we can make a graph object out of it
67
72
68
- Parameters:
69
- -----------
70
- data = full data array
71
- s = subject
72
- b = block
73
- lk = lookup table for study
74
- co = cost value to threshold at
73
+ Parameters
74
+ ----------
75
+ data : numpy array
76
+ full data array 5D (subcondition, condition, subject, node, node)
77
+ s : int
78
+ index of subject
79
+ sc : int
80
+ index of sub condition
81
+ c : int
82
+ index of condition
83
+ lk : numpy array
84
+ lookup table for thresholds at each possible cost
85
+ co : float
86
+ cost value to threshold at
87
+ idc : float
88
+ ideal cost
89
+ costlist : list
90
+ list of possible costs
91
+ nouptri : bool
92
+ False zeros out diag and below, True returns symmetric matrix
75
93
"""
76
-
94
+
77
95
cmat = data [sc ,c ,s ]
78
96
th = cost2thresh2 (co ,s ,sc ,c ,lk ,[],idc ,costlist ) #get the right threshold
79
97
@@ -85,6 +103,46 @@ def format_matrix2(data,s,sc,c,lk,co,idc = [],costlist=[],nouptri = False):
85
103
86
104
return cmat
87
105
106
+ def threshold_adjacency_matrix (adj_matrix , cost ):
107
+ """docstring for threshold_adjacency_matrix(adj_matrix, cost"""
108
+
109
+ pass
110
+
111
+
112
+ def all_positive (adjacency_matrix ):
113
+ """ checks if edge value sin adjacency matrix are all positive
114
+ or positive and negative
115
+ Returns
116
+ -------
117
+ all_positive : bool
118
+ True if all values are >=0
119
+ False if values < 0
120
+ """
121
+ # add 1 so 0-> 1(True) , -1 -> 0 False
122
+ signs = set ( np .sign (adjacency_matrix ) + 1 )
123
+ return bool (sorted (signs )[0 ])
124
+
125
+
126
+ def make_cost_thresh_lookup (adjacency_matrix ):
127
+ """takes upper triangular (offset 1, no diagonal) of summetric
128
+ adjacency matrix, sorts (lowest -> highest)
129
+ Returns
130
+ -------
131
+ lookup : numpy array
132
+ 3 X number_of_edges, numpy array
133
+ row 0 is sorted thresholds
134
+ row 1 is cost at each threshold
135
+ row 2 is costs rounded to one decimal point
136
+ """
137
+
138
+ ind = np .triu_indices_from (adjacency_matrix , k = 1 )
139
+ edges = adjacency_matrix [ind ]
140
+ nedges = edges .shape [0 ]
141
+ lookup = np .zeros ((3 , nedges ))
142
+ lookup [0 ,:] = sorted (edges )
143
+ lookup [1 ,:] = np .arange (nedges ) / float (nedges )
144
+ lookup [2 ,:] = np .round (lookup [1 ,:], decimals = 1 )
145
+ return lookup
88
146
89
147
def cost_size (nnodes ):
90
148
warnings .warn ('deprecated: use make_cost_array' , DeprecationWarning )
@@ -125,9 +183,17 @@ def make_cost_array(n_nodes, cost=0.5):
125
183
costs = np .array (range (int (tot_edges * cost )), dtype = float ) / tot_edges
126
184
return costs , tot_edges
127
185
186
+ def metrics_to_pandas ():
187
+ """docstring for metrics_to_pandas"""
188
+ pass
128
189
129
190
def store_metrics (b , s , co , metd , arr ):
130
- """Store a set of metrics into a structured array"""
191
+ """Store a set of metrics into a structured array
192
+ b = block
193
+ s = subject
194
+ co = cost? float
195
+ metd = dict of metrics
196
+ arr : array?"""
131
197
132
198
if arr .ndim == 3 :
133
199
idx = b ,s ,co
@@ -148,6 +214,8 @@ def regular_lattice(n,k):
148
214
149
215
This type of graph is the starting point for the Watts-Strogatz small-world
150
216
model, where connections are then rewired in a second phase.
217
+
218
+ XXX TODO Use as comparison, check networkx to see if its update worth redundancy
151
219
"""
152
220
# Code simplified from the networkx.watts_strogatz_graph one
153
221
G = nx .Graph ()
@@ -282,18 +350,23 @@ def normalize(arr,mode='direct',folding_edges=None):
282
350
"""Normalize an array to [0,1] range.
283
351
284
352
By default, this simply rescales the input array to [0,1]. But it has a
285
- special 'folding' mode that allows for the normalization of an array with
286
- negative and positive values by mapping the negative values to their
287
- flipped sign
353
+ special 'folding' mode that allong absolute value of all values, in addition
354
+ values between the folding_edges (low_cutoff, high_cutoff) will be zeroed.
288
355
289
356
Parameters
290
357
----------
291
358
arr : 1d array
292
-
359
+ assumes dtype == float, if int32, will raise ValueError
360
+
293
361
mode : string, one of ['direct','folding']
362
+ if direct rescale all values (pos and neg) between 0,1
363
+ if folding, zeros out values between folding_values (inclusive)
364
+ and normalizes absolute value of remaining values
294
365
295
366
folding_edges : (float,float)
296
- Only needed for folding mode, ignored in 'direct' mode.
367
+ (low_cutoff, high_cutoff) lower and upper values to zero out
368
+ (values are inclusive)
369
+ Only needed for folding mode, ignored in 'direct' mode.
297
370
298
371
Examples
299
372
--------
@@ -315,37 +388,23 @@ def normalize(arr,mode='direct',folding_edges=None):
315
388
>>> c
316
389
array([-0.8 , -0.6333, -0.4667, -0.3 , 0.3 , 0.4333, 0.5667, 0.7 ])
317
390
>>> normalize(c,'folding',[-0.3,0.3])
318
- array([ 1. , 0.6667 , 0.3333 , 0. , 0. , 0.2667 , 0.5333 , 0.8 ])
391
+ array([ 1. , 0.7917 , 0.5833 , 0. , 0. , 0.5417 , 0.7083 , 0.875 ])
319
392
"""
320
393
if mode == 'direct' :
321
394
return rescale_arr (arr ,0 ,1 )
322
- else :
323
- fa , fb = folding_edges
395
+ elif mode == 'folding' :
396
+ # cast folding_edges to floats in case inputs are ints
397
+ low_cutoff , high_cutoff = [float (x ) for x in folding_edges ]
324
398
amin , amax = arr .min (), arr .max ()
325
- ra , rb = float ( fa - amin ), float ( amax - fb ) # in case inputs are ints
326
- if ra < 0 or rb < 0 :
399
+ low_diff , high_diff = low_cutoff - amin , amax - high_cutoff
400
+ if low_diff < 0 or high_diff < 0 :
327
401
raise ValueError ("folding edges must be within array range" )
328
- greater = arr >= fb
329
- upper_idx = greater .nonzero ()
330
- lower_idx = (~ greater ).nonzero ()
331
- # Two folding scenarios, we map the thresholds to zero but the upper
332
- # ranges must retain comparability.
333
- if ra > rb :
334
- lower = 1.0 - rescale_arr (arr [lower_idx ],0 ,1.0 )
335
- upper = rescale_arr (arr [upper_idx ],0 ,float (rb )/ ra )
336
- else :
337
- upper = rescale_arr (arr [upper_idx ],0 ,1 )
338
- # The lower range is trickier: we need to rescale it and then flip
339
- # it, so the edge goes to 0.
340
- resc_a = float (ra )/ rb
341
- lower = rescale_arr (arr [lower_idx ],0 ,resc_a )
342
- lower = resc_a - lower
343
- # Now, make output array
344
- out = np .empty_like (arr )
345
- out [lower_idx ] = lower
346
- out [upper_idx ] = upper
347
- return out
348
-
402
+ mask = np .logical_and ( arr >= low_cutoff , arr <= high_cutoff )
403
+ out = arr .copy ()
404
+ out [mask ] = 0
405
+ return rescale_arr (np .abs (out ), 0 , 1 )
406
+ else :
407
+ raise ValueError ('Unknown mode %s: valid options("direct", "folding")' )
349
408
350
409
def mat2graph (cmat ,threshold = 0.0 ,threshold2 = None ):
351
410
"""Make a weighted graph object out of an adjacency matrix.
@@ -559,45 +618,59 @@ def cost2thresh(cost, sub, bl, lk, idc=[], costlist=[]):
559
618
return th
560
619
561
620
562
- def cost2thresh2 (cost ,sub ,sc ,c ,lk ,last ,idc = [],costlist = []):
563
- """A definition for loading the lookup table and finding the threshold associated with a particular cost for a particular subject in a particular block
621
+ def cost2thresh2 (cost , sub , sc , c , lk , last = None , idc = [], costlist = []):
622
+ """A definition for loading the lookup table and finding the threshold
623
+ associated with a particular cost for a particular subject in a
624
+ particular block of data
564
625
565
- inputs:
566
- cost: cost value for which we need the associated threshold
567
- sub: subject number
568
- bl: block number
569
- lk: lookup table (block x subject x cost
570
- last: last threshold value
571
-
572
- output:
573
- th: threshold value for this cost"""
574
-
575
- #print cost,sub,bl
626
+ Inputs
627
+ ------
628
+ cost : float
629
+ cost value for which we need the associated threshold
630
+ sub : int
631
+ (axis -2) subject number
632
+ axis1 : int
633
+ axis 1 into lookup (eg block number or condition)
634
+ axis0 : int
635
+ axis 0 into lookup (eg subcondition)
636
+ lk : numpy array
637
+ lookup table (axis0 x axis1 x subject x 2 )
638
+ last : None
639
+ NOT USED last threshold value
640
+ idc : int or empty list
641
+ Index in costlist corresponding to cost currently being
642
+ processed. By default, idc is an empty list.
643
+ costlist : array-like
644
+ List of costs that are being queried with the current function
645
+ in order.
576
646
577
- ind = np .where (lk [sc ,c ,sub ][1 ]== cost )
578
- th = lk [sc ,c ,sub ][0 ][ind ]
647
+ Returns
648
+ -------
649
+ threshold : float
650
+ threshold value for this cost"""
651
+
652
+ subject_lookup = slice_data (lk , sub , c , subcond = sc )
653
+ index = np .where (subject_lookup [1 ] == cost )
654
+ threshold = subject_lookup [0 ][ind ]
579
655
580
- if len (th )> 1 :
581
- th = th [0 ] #if there are multiple thresholds, go down to the lower cost ####Is this right?!!!####
582
- print ('multiple thresh' )
583
- elif len (th )< 1 :
584
- done = 1
585
- while done :
586
- idc = idc - 1
587
- newcost = costlist [idc ]
588
- print (idc ,newcost )
589
- ind = np .where (lk [bl ][sub ][1 ]== newcost )
590
- th = lk [bl ][sub ][0 ][ind ]
591
- if len (th ) > 1 :
592
- th = th [0 ]
593
- done = 0
594
- #th=last #if there is no associated thresh value because of repeats, just use the previous one
595
- print ('use previous thresh' )
656
+ if len (threshold ) > 1 :
657
+ threshold = threshold [0 ]
658
+ #if there are multiple thresholds, go down to the lower cost
659
+ ####Is this right?!!!####
660
+ print ('Subject %s has multiple thresholds at cost %s' % (sub , cost ))
661
+ print ('index 1: %s, index 2: %s' % (c , sc ))
662
+ elif len (threshold ) < 1 :
663
+ idc = idc - 1
664
+ newcost = costlist [idc ]
665
+ threshold = cost2thresh2 (newcost , sub , sc , c , lk ,
666
+ idc = idc , costlist = costlist )
667
+ print (' ' .join (['Subject %s does not have cost at %s' % (sub , cost ),
668
+ 'index 1: %s, index 2: %s' % (c , sc ),
669
+ 'nearest cost %s being used' % (newcost )]))
596
670
else :
597
- th = th [0 ]
671
+ threshold = threshold [0 ]
598
672
599
- #print th
600
- return th
673
+ return threshold
601
674
602
675
603
676
def apply_cost (corr_mat , cost , tot_edges ):
0 commit comments