Skip to content

Commit c454c6d

Browse files
committed
BashModif
1 parent 0aefa88 commit c454c6d

File tree

3 files changed

+215
-5
lines changed

3 files changed

+215
-5
lines changed

src/ECML.py

Lines changed: 204 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,86 @@ def load_graphs(fileName,TAILLE):
123123
dicoNodes[j]=labelVertices[i][j]
124124
for j in range(int(edges[i])):
125125
graphes[i].add_edge(labelEdges[i][j][0],labelEdges[i][j][1],feature=labelEdges[i][j][2])
126-
graphes[i].add_nodes_from([(node, {'feature': attr}) for (node, attr) in dicoNodes.items()])
126+
graphes[i].add_nodes_from([(node, {'feature':attr}) for (node, attr) in dicoNodes.items()])
127+
return graphes,numbers,noms
128+
def load_graphs_DGCNN(fileName,TAILLE):
129+
"""Load graphs from a file.
130+
args: fileName (string) : the name of the file)
131+
TAILLE (int) : the number of graphs in the file
132+
133+
return: graphs (list of networkx graphs) : the list of graphs
134+
numbers (list of list of int) : the list of occurences of each graph
135+
nom (list of string) : the list of names of each graph)"""
136+
137+
nbV=[]
138+
nbE=[]
139+
numbers = []
140+
noms = []
141+
for i in range(TAILLE):
142+
numbers.append([])
143+
## Variables de stockage
144+
vertices = np.zeros(TAILLE)
145+
labelVertices = []
146+
edges = np.zeros(TAILLE)
147+
labelEdges = []
148+
compteur=-1
149+
numero=0
150+
file = open(fileName, "r")
151+
for line in file:
152+
a = line
153+
b = a.split(" ")
154+
if b[0]=="t":
155+
compteur=compteur+1
156+
if compteur>0:
157+
noms.append(temptre)
158+
nbV.append(len(labelVertices[compteur-1]))
159+
nbE.append(len(labelEdges[compteur-1]))
160+
labelVertices.append([])
161+
labelEdges.append([])
162+
val = b[2]
163+
val = re.sub("\n","",val)
164+
val = int(val)
165+
numero = val
166+
temptre=""
167+
if b[0]=="v":
168+
vertices[compteur]=vertices[compteur]+1
169+
val = b[2]
170+
val = re.sub("\n","",val)
171+
val = int(val)
172+
labelVertices[compteur].append(val)
173+
temptre=temptre+line
174+
if b[0]=="e":
175+
edges[compteur]=edges[compteur]+1
176+
num1 = int(b[1])
177+
num2 = int(b[2])
178+
val = b[3]
179+
val = re.sub("\n","",val)
180+
val = int(val)
181+
labelEdges[compteur].append((num1,num2,val))
182+
temptre=temptre+line
183+
if b[0]=="x":
184+
temp= []
185+
#for j in range(1,len(b)-1):
186+
for j in range(1,len(b)-1):
187+
if not(b[j]=="#"):
188+
val = b[j]
189+
val = re.sub("\n","",val)
190+
val = int(val)
191+
temp.append(val)
192+
numbers[numero]=temp
193+
noms.append(temptre)
194+
nbV.append(len(labelVertices[compteur-1]))
195+
nbE.append(len(labelEdges[compteur-1]))
196+
graphes = []
197+
for i in range(len(vertices)):
198+
dicoNodes = {}
199+
graphes.append(nx.Graph())
200+
for j in range(int(vertices[i])):
201+
#tempDictionnaireNodes = {"color":labelVertices[i][j]}
202+
dicoNodes[j]=labelVertices[i][j]
203+
for j in range(int(edges[i])):
204+
graphes[i].add_edge(labelEdges[i][j][0],labelEdges[i][j][1],feature=labelEdges[i][j][2])
205+
graphes[i].add_nodes_from([(node, {'feature':[attr]}) for (node, attr) in dicoNodes.items()])
127206
return graphes,numbers,noms
128207

129208
def load_patterns(fileName,TAILLE):
@@ -419,6 +498,127 @@ def pangProcessing(Ks,keep,labels,id_graphs_mono,id_graphs_iso,occurences_mono,o
419498
from grakel import graph_from_networkx
420499
from grakel.datasets import fetch_dataset
421500
from grakel.kernels import WeisfeilerLehman, VertexHistogram, WeisfeilerLehmanOptimalAssignment
501+
from stellargraph.mapper import PaddedGraphGenerator
502+
from stellargraph.layer import DeepGraphCNN
503+
from stellargraph import StellarGraph
504+
from tensorflow.keras import Model
505+
from tensorflow.keras.optimizers import Adam
506+
from tensorflow.keras.layers import Dense, Conv1D, MaxPool1D, Dropout, Flatten
507+
from tensorflow.keras.losses import binary_crossentropy
508+
import tensorflow as tf
509+
510+
import keras.backend as K
511+
512+
def f1_score(y_true, y_pred):
513+
514+
# Count positive samples.
515+
c1 = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
516+
c2 = K.sum(K.round(K.clip(y_pred, 0, 1)))
517+
c3 = K.sum(K.round(K.clip(y_true, 0, 1)))
518+
519+
# If there are no true samples, fix the F1 score at 0.
520+
if c3 == 0:
521+
return 0.0
522+
523+
# How many selected items are relevant?
524+
precision = c1 / c2
525+
526+
# How many relevant items are selected?
527+
recall = c1 / c3
528+
529+
# Calculate f1_score
530+
f1_score = 2 * (precision * recall) / (precision + recall)
531+
return f1_score
532+
533+
def modelGen(generator):
534+
k = 35 # the number of rows for the output tensor
535+
layer_sizes = [32, 32, 32, 1]
536+
537+
dgcnn_model = DeepGraphCNN(
538+
layer_sizes=layer_sizes,
539+
activations=["tanh", "tanh", "tanh", "tanh"],
540+
k=k,
541+
bias=False,
542+
generator=generator,
543+
)
544+
x_inp, x_out = dgcnn_model.in_out_tensors()
545+
x_out = Conv1D(filters=16, kernel_size=sum(layer_sizes), strides=sum(layer_sizes))(x_out)
546+
x_out = MaxPool1D(pool_size=2)(x_out)
547+
548+
x_out = Conv1D(filters=32, kernel_size=5, strides=1)(x_out)
549+
550+
x_out = Flatten()(x_out)
551+
552+
#predictions= Dense(units=128, activation="relu")(x_out)
553+
554+
x_out = Dense(units=128, activation="relu")(x_out)
555+
x_out = Dropout(rate=0.5)(x_out)
556+
557+
predictions = Dense(units=1, activation="sigmoid")(x_out)
558+
model = Model(inputs=x_inp, outputs=predictions)
559+
560+
model.compile(
561+
optimizer=Adam(lr=0.0001), loss=binary_crossentropy, metrics=["acc",f1_score])
562+
return model
563+
564+
from sklearn import model_selection
565+
def DGCNN(index,keep,graphs,labels,cv,results):
566+
F1DGCNN = np.zeros(10)
567+
stellarColl = []
568+
lab=[]
569+
for j in range(len(graphs)):
570+
if j in keep:
571+
stellarColl.append(StellarGraph(graphs[j],node_features="feature"))
572+
lab.append(labels[j])
573+
gen = PaddedGraphGenerator(graphs=stellarColl)
574+
labelss = pd.get_dummies(copy.deepcopy(lab), drop_first=True)
575+
i=-1
576+
for train_index, test_index in cv.split(stellarColl,lab):
577+
i=i+1
578+
X_train=[]
579+
X_test=[]
580+
y_train=[]
581+
y_test=[]
582+
for l in train_index:
583+
X_train.append(stellarColl[l])
584+
y_train.append(labelss.iloc[l])
585+
for l in test_index:
586+
X_test.append(stellarColl[l])
587+
y_test.append(labelss.iloc[l])
588+
589+
train_gen = gen.flow(
590+
list(train_index - 1),
591+
targets=y_train,
592+
batch_size=50,
593+
symmetric_normalization=False,
594+
)
595+
596+
valid_gen = gen.flow(
597+
list(test_index - 1),
598+
targets=y_test,
599+
batch_size=1,
600+
symmetric_normalization=False,
601+
)
602+
603+
test_gen = gen.flow(
604+
list(test_index - 1),
605+
targets=y_test,
606+
batch_size=len(X_test),
607+
symmetric_normalization=False,
608+
)
609+
610+
epochs = 10
611+
model = modelGen(gen)
612+
history = model.fit(train_gen, epochs=epochs, verbose=1, validation_data=valid_gen, shuffle=True)
613+
test_metrics = model.evaluate(test_gen)
614+
print("\nTest Set Metrics:")
615+
F1DGCNN[i]=test_metrics[2]
616+
results[index][9][1][0]=np.mean(F1DGCNN)
617+
results[index][9][1][1]=np.std(F1DGCNN)
618+
return results
619+
620+
621+
422622

423623
def Baselines(index,DATASET,Graphes,cv,labels,results):
424624
""" this function computes the baseline results for the graph classification task
@@ -517,7 +717,7 @@ def Baselines(index,DATASET,Graphes,cv,labels,results):
517717
def Table2():
518718
""" this function computes the results of the table 1 of the paper
519719
results are saved in a csv file in the folder results"""
520-
DATASETS = ["MUTAG"]
720+
DATASETS = ["MUTAG","PTC","FOPPA"]
521721
Ks = {"MUTAG": 150, "NCI1": 3, "DD": 3, "PTC": 150, "FOPPA": 500}
522722
results = np.zeros((len(DATASETS),10,2,2))
523723
for DATASET in DATASETS:
@@ -536,6 +736,7 @@ def Table2():
536736
print("DATASET : "+str(arg))
537737

538738
Graphes,useless_var,PatternsRed= load_graphs(FILEGRAPHS,GRAPHLENGTH)
739+
DGCNN_graphs,XX,XX= load_graphs_DGCNN(FILEGRAPHS,GRAPHLENGTH)
539740
Subgraphs,id_graphs,noms = load_graphs(FILESUBGRAPHS,PATTERNLENGTH)
540741
xx,id_graphs_mono,occurences_mono = load_patterns(FILEMONOSET,PATTERNLENGTH)
541742
xx,id_graphs_iso,occurences_iso = load_patterns(FILEISOSET,PATTERNLENGTH)
@@ -567,6 +768,7 @@ def Table2():
567768
#keep only graphs which are in keep
568769
Graphs = [Graphes[i] for i in keep]
569770
results = Baselines(DATASETS.index(DATASET),DATASET,Graphs,cv,Y,results)
771+
results = DGCNN(DATASETS.index(DATASET),keep,DGCNN_graphs,labels,cv,results)
570772
print(results)
571773
data = pd.DataFrame(index=range(len(results[0])),columns=DATASETS)
572774
for i in range(len(results[0])):

src/Pattern.sh

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#!/bin/bash
22

3-
java -Xmx8192m -jar spmf.jar run GSPAN ../data/MUTAG/MUTAG_graph.txt ../data/MUTAG/MUTAG_pattern.txt 0.00001 8 true false true
4-
java -Xmx8192m -jar spmf.jar run CGSPANSupport ../data/MUTAG/MUTAG_graph.txt ../data/MUTAG/MUTAG_CG.txt 0.00001 8 true false true
5-
python3 ProcessingPatterns.py -d MUTAG
3+
name=$1
4+
input="../data/"$name"/"$name"_graph.txt"
5+
outputGSPAN="../data/"$name"/"$name"_pattern.txt"
6+
outputCGSPAN="../data/"$name"/"$name"_CG.txt"
7+
echo "$input"
8+
java -Xmx8192m -jar spmf.jar run GSPAN $input $outputGSPAN 0.1 10 true false true
9+
#java -Xmx8192m -jar spmf.jar run CGSPANMNI $input $outputCGSPAN 1 1 true false true

src/requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# This file may be used to create an environment using:
2+
# $ conda create --name <env> --file <this file>
3+
# platform: win-64
4+
Terminer le programme de commandes (O/N)ÿ? Terminer le programme de commandes (O/N)ÿ?

0 commit comments

Comments
 (0)