22import pandas as pd
33import random
44import matplotlib .pyplot as plt
5+ from sklearn .preprocessing import StandardScaler
6+ from sklearn .decomposition import PCA
7+ from sklearn .preprocessing import LabelEncoder
8+
9+ column_names = [
10+ "KEY" ,
11+ "RADIUS" , "LENGTH" , "WALL" , "SHEAR" , "CIRCUM" , "FLOW" ,
12+ "NODES" , "EDGES" , "GRADIUS" , "GDIAMETER" , "AVG_ECCENTRICITY" ,
13+ "AVG_SHORTEST_PATH" , "AVG_IN_DEGREES" , "AVG_OUT_DEGREES" ,
14+ "AVG_DEGREE" , "AVG_CLUSTERING" , "AVG_CLOSENESS" ,
15+ "AVG_BETWEENNESS" , "AVG_CORENESS"
16+ ]
517
618# Define distance function based on the paper
719def distance_function (y_obs , y_sim , weight = 1.0 ):
@@ -61,22 +73,27 @@ def mcmc(data, y_sims, y_obs, n_iterations, proposal_std=1.0):
6173 samples .append (np .append (current_theta , proposal_y_sim ))
6274 # Remove duplicates in the samples
6375 #samples = list(set(tuple(row) for row in samples))
64- return pd .DataFrame (samples , columns = [ "NODES" , "EDGES" , "GRADIUS" , "ACTIVITY" ])
76+ return pd .DataFrame (samples , columns = column_names + [ "ACTIVITY" ])
6577
6678def main ():
6779 # Load ABM data
68- data_path = "../../data/ARCADE/C-feature_0 .0_metric_15-04032023.csv"
80+ data_path = "../../data/ARCADE/C-feature_15 .0_metric_15-04032023.csv"
6981 data = pd .read_csv (data_path )
82+ data = data [data ["COMPONENTS" ] == 1 ]
83+ threshold = 0.2
84+ columns_to_drop = [col for col in data .columns if ((data [col ] == np .inf ) | (data [col ] == - np .inf )).mean () >= threshold ]
85+ data = data .drop (columns = columns_to_drop )
7086
7187 # Extract inputs (theta) and outputs (y)
72- input_feature_names = ["NODES" , "EDGES" , "GRADIUS" ]
88+ input_feature_names = column_names # ["NODES", "EDGES", "GRADIUS"]
7389 # input_feature_names = ["ACTIVITY"]
7490 predicted_output = ["ACTIVITY" ]#, "GROWTH", "SYMMETRY"]
7591 input_features = data [input_feature_names ].values
92+
7693 y_sims = data [predicted_output ].values
7794
7895 # Observed value
79- y_obs = [1 ]#, -10, 0]
96+ y_obs = [0.25 ]#, -10, 0]
8097
8198 # Run MCMC
8299 n_iterations = 10000
@@ -89,12 +106,64 @@ def main():
89106 print (f"Number of samples: { len (posterior_samples )} " )
90107 print (posterior_samples .describe ())
91108 # Plot the accepted samples activity
92- fig , ax = plt .subplots (1 , 2 , figsize = (10 , 5 ))
109+ fig , ax = plt .subplots (1 , 3 , figsize = (15 , 5 ))
93110 _ , bins , patch = ax [0 ].hist (y_sims , bins = 20 )
94111 ax [0 ].set_title ("Prior - Activity" )
95-
112+ ax [0 ].set_xlim ([- 1 , 1 ])
113+ ax [0 ].set_xlabel ("Activity" )
114+ ax [0 ].set_ylabel ("Number of samples" )
96115 ax [1 ].hist (posterior_samples ["ACTIVITY" ], bins = bins )
97- ax [1 ].set_title ("Posterior - Activity" )
116+ ax [1 ].set_title ("Posterior - Activity (MCMC)" )
117+ ax [1 ].set_xlim ([- 1 , 1 ])
118+ ax [1 ].set_xlabel ("Activity" )
119+ ax [1 ].axvline (y_obs [0 ], color = "red" , linestyle = "--" , label = "Target activity" )
120+ ax [1 ].legend ()
121+
122+ pca = PCA (n_components = 2 )
123+ scaler = StandardScaler ()
124+ features = scaler .fit_transform (input_features [:, 1 :])
125+ label_encoder = LabelEncoder ()
126+ labels = label_encoder .fit_transform (input_features [:, 0 ])
127+ reduced_features = pca .fit_transform (features )
128+ categories = label_encoder .classes_
129+ markers = ['o' , 's' , 'D' , '^' , 'v' , '<' , '>' , 'p' , '*' , 'h' , 'H' , '+' , 'x' , 'd' , '|' , '_' ]
130+ unique_labels = np .unique (labels )
131+ cmap = plt .cm .viridis
132+ # drop duplicates
133+ posterior_samples = posterior_samples .drop_duplicates (subset = input_feature_names )
134+ posterior_reduced_features = pca .transform (scaler .transform (posterior_samples [input_feature_names ].values [:, 1 :]))
135+ posterior_labels = label_encoder .transform (posterior_samples [input_feature_names ].values [:, 0 ])
136+
137+ for i , label in enumerate (unique_labels ):
138+ ax [2 ].scatter (reduced_features [labels == label , 0 ],
139+ reduced_features [labels == label , 1 ],
140+ marker = markers [i % len (markers )],
141+ label = f"{ categories [label ]} " ,
142+ facecolors = 'none' ,
143+ edgecolors = cmap (i / len (unique_labels ))
144+ )
145+ ax [2 ].scatter (posterior_reduced_features [posterior_labels == label , 0 ],
146+ posterior_reduced_features [posterior_labels == label , 1 ],
147+ marker = markers [i % len (markers )],
148+ facecolors = cmap (i / len (unique_labels )),
149+ edgecolors = 'none' , alpha = 0.8
150+ )
151+
152+ # Create custom legends
153+ handles1 = [plt .Line2D ([0 ], [0 ], marker = markers [i % len (markers )], color = 'w' , label = categories [label ],
154+ markerfacecolor = 'none' , markeredgecolor = cmap (i / len (unique_labels )))
155+ for i , label in enumerate (unique_labels )]
156+ handles2 = [plt .Line2D ([0 ], [0 ], marker = 'o' , color = 'w' , label = 'Prior' , markerfacecolor = 'none' , markeredgecolor = 'k' ),
157+ plt .Line2D ([0 ], [0 ], marker = 'o' , color = 'w' , label = 'Posterior' , markerfacecolor = 'k' , markeredgecolor = 'none' , alpha = 0.5 )]
158+
159+ legend1 = ax [2 ].legend (handles = handles1 , title = "Vasculature type" , loc = 'upper right' )
160+ ax [2 ].add_artist (legend1 )
161+ ax [2 ].legend (handles = handles2 , title = "Distribution" , loc = 'lower right' )
162+ ax [2 ].set_title ("PCA - Vasculature distribution" )
163+ ax [2 ].set_xlabel ("PC1" )
164+ ax [2 ].set_ylabel ("PC2" )
165+ plt .tight_layout ()
166+
98167 plt .savefig ("posterior_mcmc.png" )
99168
100169if __name__ == "__main__" :
0 commit comments