@@ -123,7 +123,86 @@ def load_graphs(fileName,TAILLE):
123123 dicoNodes [j ]= labelVertices [i ][j ]
124124 for j in range (int (edges [i ])):
125125 graphes [i ].add_edge (labelEdges [i ][j ][0 ],labelEdges [i ][j ][1 ],feature = labelEdges [i ][j ][2 ])
126- graphes [i ].add_nodes_from ([(node , {'feature' : attr }) for (node , attr ) in dicoNodes .items ()])
126+ graphes [i ].add_nodes_from ([(node , {'feature' :attr }) for (node , attr ) in dicoNodes .items ()])
127+ return graphes ,numbers ,noms
128+ def load_graphs_DGCNN (fileName ,TAILLE ):
129+ """Load graphs from a file.
130+ args: fileName (string) : the name of the file)
131+ TAILLE (int) : the number of graphs in the file
132+
133+ return: graphs (list of networkx graphs) : the list of graphs
134+ numbers (list of list of int) : the list of occurences of each graph
135+ nom (list of string) : the list of names of each graph)"""
136+
137+ nbV = []
138+ nbE = []
139+ numbers = []
140+ noms = []
141+ for i in range (TAILLE ):
142+ numbers .append ([])
143+ ## Variables de stockage
144+ vertices = np .zeros (TAILLE )
145+ labelVertices = []
146+ edges = np .zeros (TAILLE )
147+ labelEdges = []
148+ compteur = - 1
149+ numero = 0
150+ file = open (fileName , "r" )
151+ for line in file :
152+ a = line
153+ b = a .split (" " )
154+ if b [0 ]== "t" :
155+ compteur = compteur + 1
156+ if compteur > 0 :
157+ noms .append (temptre )
158+ nbV .append (len (labelVertices [compteur - 1 ]))
159+ nbE .append (len (labelEdges [compteur - 1 ]))
160+ labelVertices .append ([])
161+ labelEdges .append ([])
162+ val = b [2 ]
163+ val = re .sub ("\n " ,"" ,val )
164+ val = int (val )
165+ numero = val
166+ temptre = ""
167+ if b [0 ]== "v" :
168+ vertices [compteur ]= vertices [compteur ]+ 1
169+ val = b [2 ]
170+ val = re .sub ("\n " ,"" ,val )
171+ val = int (val )
172+ labelVertices [compteur ].append (val )
173+ temptre = temptre + line
174+ if b [0 ]== "e" :
175+ edges [compteur ]= edges [compteur ]+ 1
176+ num1 = int (b [1 ])
177+ num2 = int (b [2 ])
178+ val = b [3 ]
179+ val = re .sub ("\n " ,"" ,val )
180+ val = int (val )
181+ labelEdges [compteur ].append ((num1 ,num2 ,val ))
182+ temptre = temptre + line
183+ if b [0 ]== "x" :
184+ temp = []
185+ #for j in range(1,len(b)-1):
186+ for j in range (1 ,len (b )- 1 ):
187+ if not (b [j ]== "#" ):
188+ val = b [j ]
189+ val = re .sub ("\n " ,"" ,val )
190+ val = int (val )
191+ temp .append (val )
192+ numbers [numero ]= temp
193+ noms .append (temptre )
194+ nbV .append (len (labelVertices [compteur - 1 ]))
195+ nbE .append (len (labelEdges [compteur - 1 ]))
196+ graphes = []
197+ for i in range (len (vertices )):
198+ dicoNodes = {}
199+ graphes .append (nx .Graph ())
200+ for j in range (int (vertices [i ])):
201+ #tempDictionnaireNodes = {"color":labelVertices[i][j]}
202+ dicoNodes [j ]= labelVertices [i ][j ]
203+ for j in range (int (edges [i ])):
204+ graphes [i ].add_edge (labelEdges [i ][j ][0 ],labelEdges [i ][j ][1 ],feature = labelEdges [i ][j ][2 ])
205+ graphes [i ].add_nodes_from ([(node , {'feature' :[attr ]}) for (node , attr ) in dicoNodes .items ()])
127206 return graphes ,numbers ,noms
128207
129208def load_patterns (fileName ,TAILLE ):
@@ -419,6 +498,127 @@ def pangProcessing(Ks,keep,labels,id_graphs_mono,id_graphs_iso,occurences_mono,o
419498from grakel import graph_from_networkx
420499from grakel .datasets import fetch_dataset
421500from grakel .kernels import WeisfeilerLehman , VertexHistogram , WeisfeilerLehmanOptimalAssignment
501+ from stellargraph .mapper import PaddedGraphGenerator
502+ from stellargraph .layer import DeepGraphCNN
503+ from stellargraph import StellarGraph
504+ from tensorflow .keras import Model
505+ from tensorflow .keras .optimizers import Adam
506+ from tensorflow .keras .layers import Dense , Conv1D , MaxPool1D , Dropout , Flatten
507+ from tensorflow .keras .losses import binary_crossentropy
508+ import tensorflow as tf
509+
510+ import keras .backend as K
511+
512+ def f1_score (y_true , y_pred ):
513+
514+ # Count positive samples.
515+ c1 = K .sum (K .round (K .clip (y_true * y_pred , 0 , 1 )))
516+ c2 = K .sum (K .round (K .clip (y_pred , 0 , 1 )))
517+ c3 = K .sum (K .round (K .clip (y_true , 0 , 1 )))
518+
519+ # If there are no true samples, fix the F1 score at 0.
520+ if c3 == 0 :
521+ return 0.0
522+
523+ # How many selected items are relevant?
524+ precision = c1 / c2
525+
526+ # How many relevant items are selected?
527+ recall = c1 / c3
528+
529+ # Calculate f1_score
530+ f1_score = 2 * (precision * recall ) / (precision + recall )
531+ return f1_score
532+
533+ def modelGen (generator ):
534+ k = 35 # the number of rows for the output tensor
535+ layer_sizes = [32 , 32 , 32 , 1 ]
536+
537+ dgcnn_model = DeepGraphCNN (
538+ layer_sizes = layer_sizes ,
539+ activations = ["tanh" , "tanh" , "tanh" , "tanh" ],
540+ k = k ,
541+ bias = False ,
542+ generator = generator ,
543+ )
544+ x_inp , x_out = dgcnn_model .in_out_tensors ()
545+ x_out = Conv1D (filters = 16 , kernel_size = sum (layer_sizes ), strides = sum (layer_sizes ))(x_out )
546+ x_out = MaxPool1D (pool_size = 2 )(x_out )
547+
548+ x_out = Conv1D (filters = 32 , kernel_size = 5 , strides = 1 )(x_out )
549+
550+ x_out = Flatten ()(x_out )
551+
552+ #predictions= Dense(units=128, activation="relu")(x_out)
553+
554+ x_out = Dense (units = 128 , activation = "relu" )(x_out )
555+ x_out = Dropout (rate = 0.5 )(x_out )
556+
557+ predictions = Dense (units = 1 , activation = "sigmoid" )(x_out )
558+ model = Model (inputs = x_inp , outputs = predictions )
559+
560+ model .compile (
561+ optimizer = Adam (lr = 0.0001 ), loss = binary_crossentropy , metrics = ["acc" ,f1_score ])
562+ return model
563+
564+ from sklearn import model_selection
565+ def DGCNN (index ,keep ,graphs ,labels ,cv ,results ):
566+ F1DGCNN = np .zeros (10 )
567+ stellarColl = []
568+ lab = []
569+ for j in range (len (graphs )):
570+ if j in keep :
571+ stellarColl .append (StellarGraph (graphs [j ],node_features = "feature" ))
572+ lab .append (labels [j ])
573+ gen = PaddedGraphGenerator (graphs = stellarColl )
574+ labelss = pd .get_dummies (copy .deepcopy (lab ), drop_first = True )
575+ i = - 1
576+ for train_index , test_index in cv .split (stellarColl ,lab ):
577+ i = i + 1
578+ X_train = []
579+ X_test = []
580+ y_train = []
581+ y_test = []
582+ for l in train_index :
583+ X_train .append (stellarColl [l ])
584+ y_train .append (labelss .iloc [l ])
585+ for l in test_index :
586+ X_test .append (stellarColl [l ])
587+ y_test .append (labelss .iloc [l ])
588+
589+ train_gen = gen .flow (
590+ list (train_index - 1 ),
591+ targets = y_train ,
592+ batch_size = 50 ,
593+ symmetric_normalization = False ,
594+ )
595+
596+ valid_gen = gen .flow (
597+ list (test_index - 1 ),
598+ targets = y_test ,
599+ batch_size = 1 ,
600+ symmetric_normalization = False ,
601+ )
602+
603+ test_gen = gen .flow (
604+ list (test_index - 1 ),
605+ targets = y_test ,
606+ batch_size = len (X_test ),
607+ symmetric_normalization = False ,
608+ )
609+
610+ epochs = 10
611+ model = modelGen (gen )
612+ history = model .fit (train_gen , epochs = epochs , verbose = 1 , validation_data = valid_gen , shuffle = True )
613+ test_metrics = model .evaluate (test_gen )
614+ print ("\n Test Set Metrics:" )
615+ F1DGCNN [i ]= test_metrics [2 ]
616+ results [index ][9 ][1 ][0 ]= np .mean (F1DGCNN )
617+ results [index ][9 ][1 ][1 ]= np .std (F1DGCNN )
618+ return results
619+
620+
621+
422622
423623def Baselines (index ,DATASET ,Graphes ,cv ,labels ,results ):
424624 """ this function computes the baseline results for the graph classification task
@@ -517,7 +717,7 @@ def Baselines(index,DATASET,Graphes,cv,labels,results):
517717def Table2 ():
518718 """ this function computes the results of the table 1 of the paper
519719 results are saved in a csv file in the folder results"""
520- DATASETS = ["MUTAG" ]
720+ DATASETS = ["MUTAG" , "PTC" , "FOPPA" ]
521721 Ks = {"MUTAG" : 150 , "NCI1" : 3 , "DD" : 3 , "PTC" : 150 , "FOPPA" : 500 }
522722 results = np .zeros ((len (DATASETS ),10 ,2 ,2 ))
523723 for DATASET in DATASETS :
@@ -536,6 +736,7 @@ def Table2():
536736 print ("DATASET : " + str (arg ))
537737
538738 Graphes ,useless_var ,PatternsRed = load_graphs (FILEGRAPHS ,GRAPHLENGTH )
739+ DGCNN_graphs ,XX ,XX = load_graphs_DGCNN (FILEGRAPHS ,GRAPHLENGTH )
539740 Subgraphs ,id_graphs ,noms = load_graphs (FILESUBGRAPHS ,PATTERNLENGTH )
540741 xx ,id_graphs_mono ,occurences_mono = load_patterns (FILEMONOSET ,PATTERNLENGTH )
541742 xx ,id_graphs_iso ,occurences_iso = load_patterns (FILEISOSET ,PATTERNLENGTH )
@@ -567,6 +768,7 @@ def Table2():
567768 #keep only graphs which are in keep
568769 Graphs = [Graphes [i ] for i in keep ]
569770 results = Baselines (DATASETS .index (DATASET ),DATASET ,Graphs ,cv ,Y ,results )
771+ results = DGCNN (DATASETS .index (DATASET ),keep ,DGCNN_graphs ,labels ,cv ,results )
570772 print (results )
571773 data = pd .DataFrame (index = range (len (results [0 ])),columns = DATASETS )
572774 for i in range (len (results [0 ])):
0 commit comments