Skip to content

Commit 05664f7

Browse files
authored
Merge pull request #1 from CompNet/dev
Version Rebuttal paper
2 parents 794f657 + 038d24a commit 05664f7

File tree

4 files changed

+141
-58
lines changed

4 files changed

+141
-58
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Comparison of Graph Pattern Quality Measures v1.0.0
1+
Comparison of Graph Pattern Quality Measures v1.0.1
22
-------------------------------------------------------------------------
33

44
# Description
@@ -16,7 +16,7 @@ This repository is composed of the following elements:
1616

1717
* `requirements.txt`: List of required Python packages.
1818
* `src`: folder containing the source code
19-
* `ClusteringComparison.py`: script that reproduces the experiments of Section 5.2.
19+
* `ClusteringComparison.py`: script that reproduces the experiments of Section 5.2.1. and Section 5.2.3.
2020
* `KendallTauHistogram.py`: script that reproduces the experiments of Section 5.2.2.
2121
* `PairwiseComparisons.py`: script that reproduces the experiments of Section 5.3.
2222
* `GoldStandardComparison.py`: script that reproduces the experiments of Section 5.4.

src/GoldStandardComparison.py

Lines changed: 78 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -542,9 +542,35 @@ def FPR(discriminationScore):
542542
result = np.where(discriminationScore.pCassumingnotP == 0, float('inf'), 1/discriminationScore.pCassumingnotP)
543543
return result
544544

545+
def Gini(discriminationScore):
546+
gini_index = 1 - (discriminationScore.pCassumingP ** 2 + discriminationScore.pnotCassumingP ** 2)
547+
return 1/(gini_index+0.0000000001)
548+
549+
def Gini2(discriminationScore):
550+
gini_index = (discriminationScore.pPassumingC ** 2) * discriminationScore.pCassumingP + (discriminationScore.pPassumingnotC ** 2) * discriminationScore.pnotCassumingP
551+
return 1/(gini_index+0.0000000001)
552+
553+
def Entropy(discriminationScore):
554+
epsilon = 1e-10 # Avoid log(0)
555+
p0 = discriminationScore.pnotCassumingP
556+
p1 = discriminationScore.pCassumingP
557+
558+
entropy = - (p0 * np.log2(p0 + epsilon) + p1 * np.log2(p1 + epsilon))
559+
return 1 / (entropy + epsilon)
560+
561+
def Fisher(discriminationScore):
562+
epsilon = 1e-10 # Avoid division by zero
563+
mean_diff = (discriminationScore.pCassumingP - discriminationScore.pnotCassumingP) ** 2
564+
var_sum = discriminationScore.pCassumingP * (1 - discriminationScore.pCassumingP) + \
565+
discriminationScore.pnotCassumingP * (1 - discriminationScore.pnotCassumingP)
566+
567+
return mean_diff / (var_sum + epsilon)
568+
569+
545570

546571
def CertaintyFactor(discriminationScore):
547572
return (discriminationScore.pCassumingP - discriminationScore.pC) / (1 - discriminationScore.pC)
573+
548574
def creationDictionnaryScores():
549575
dico = {
550576
"Acc": Acc,
@@ -556,8 +582,11 @@ def creationDictionnaryScores():
556582
"Cos": Cos,
557583
"Cover": Cover,
558584
"Dep": Dep,
585+
"Entropy": Entropy,
559586
"Excex": Excex,
587+
"Fisher": Fisher,
560588
"Gain": Gain,
589+
"Gini": Gini,
561590
"GR": GR,
562591
"InfGain": InfGain,
563592
"Jacc": Jacc,
@@ -657,19 +686,21 @@ def graphKeep(Graphes,labels):
657686
minority =0
658687
NbMino=len(labels)-sum(labels)
659688
keep = []
689+
NbMino = 0
660690
count=0
661691
graphs=[]
662692
for i in range(len(labels)):
663693
if labels[i]==minority:
694+
NbMino=NbMino+1
664695
keep.append(i)
665696
complete=NbMino
666697
for i in range(len(labels)):
667698
if labels[i]!=minority:
668699
if count<complete:
669700
count=count+1
670701
keep.append(i)
671-
return keep
672702

703+
return keep
673704

674705

675706
def cross_validation(X,Y,cv,classifier):
@@ -1079,11 +1110,11 @@ def GoldStandardComparison(arg,mode,id_graphsMono,labels,keep,TAILLEGRAPHE):
10791110
if arg=="PTC":
10801111
Range = [129]
10811112
if arg=="FOPPA":
1082-
Range = [33]
1113+
Range = [66]
10831114
if arg=="AIDS":
10841115
Range = [19]
10851116
if arg=="NCI1":
1086-
Range = [20]
1117+
Range = [39]
10871118
if arg=="DD":
10881119
Range = [353]
10891120
if arg=="IMDB":
@@ -1280,43 +1311,45 @@ def plot_fig(arg):
12801311
NAMESORTIEF1= "../results/"+str(arg)+"/"+ str(arg)+"ShapleyF1"
12811312

12821313
dicoNumeroNom = {
1283-
0 : "Acc",
1284-
1 : "Brins",
1285-
2 : "CConf",
1286-
3 : "CFactor",
1287-
4 : "ColStr",
1288-
5 : "Cole",
1289-
6 : "Conf",
1290-
7 : "Cos",
1291-
8 : "Cover",
1292-
9 : "Dep",
1293-
10 : "Excex",
1294-
11 : "FPR",
1295-
12 : "GR",
1296-
13 : "Gain",
1297-
14 : "InfGain",
1298-
15 : "Jacc",
1299-
16 : "Klos",
1300-
17 : "Lap",
1301-
18 : "Lever",
1302-
19 : "Lift",
1303-
20 : "MDisc",
1304-
21 : "MutInf",
1305-
22 : "NetConf",
1306-
23 : "OddsR",
1307-
24 : "Pearson",
1308-
25 : "RelRisk",
1309-
26 : "Sebag",
1310-
27 : "Spec",
1311-
28 : "Str",
1312-
29 : "Sup",
1313-
30 : "SupDif",
1314-
31 : "AbsSupDif",
1315-
32 : "WRACC",
1316-
33 : "Zhang",
1317-
34 : "chiTwo",
1318-
1319-
}
1314+
0: "Acc",
1315+
1: "Brins",
1316+
2: "CConf",
1317+
3: "CFactor",
1318+
4: "ColStr",
1319+
5: "Cole",
1320+
6: "Conf",
1321+
7: "Cos",
1322+
8: "Cover",
1323+
9: "Dep",
1324+
10: "Entropy",
1325+
11: "Excex",
1326+
12: "FPR",
1327+
13: "Fisher",
1328+
14: "GR",
1329+
15: "Gain",
1330+
16: "Gini",
1331+
17: "InfGain",
1332+
18: "Jacc",
1333+
19: "Klos",
1334+
20: "Lap",
1335+
21: "Lever",
1336+
22: "Lift",
1337+
23: "MDisc",
1338+
24: "MutInf",
1339+
25: "NetConf",
1340+
26: "OddsR",
1341+
27: "Pearson",
1342+
28: "RelRisk",
1343+
29: "Sebag",
1344+
30: "Spec",
1345+
31: "Str",
1346+
32: "Sup",
1347+
33: "SupDif",
1348+
34: "AbsSupDif",
1349+
35: "WRACC",
1350+
36: "Zhang",
1351+
37: "chiTwo",
1352+
}
13201353

13211354
datas = pd.read_csv(NOMDATA)
13221355
print(datas)
@@ -1347,14 +1380,14 @@ def plot_fig(arg):
13471380
print(datas)
13481381

13491382

1350-
TOKEEP1 = [0,4,10,12,21,27,29,31]
1383+
TOKEEP1 = [0,4,11,14,24,30,32,34]
13511384
TOKEEP2 = [16,17,18,20,21,22,23,24]
13521385
TOKEEP3 = [25,27,28,29,31,33,34]
13531386

1354-
TOKEEPA = [0,4,7,9,10]
1355-
TOKEEPB = [12,13,15,16,17]
1356-
TOKEEPC = [20,21,22,23,24]
1357-
TOKEEPD = [25,27,29,31,34]
1387+
TOKEEPA = [0,4,7,9,10,11]
1388+
TOKEEPB = [14,15,18,19,20]
1389+
TOKEEPC = [23,24,25,26,27]
1390+
TOKEEPD = [28,30,32,33,37]
13581391

13591392
maxVal = 0
13601393
for i in range(0, nBScore):

src/KendallTauHistogram.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,27 @@ def FPR(discriminationScore):
602602
def CertaintyFactor(discriminationScore):
603603
return (discriminationScore.pCassumingP - discriminationScore.pC) / (1 - discriminationScore.pC)
604604

605+
# Les mesures à rajouter :
606+
def Gini(discriminationScore):
607+
gini_index = 1 - (discriminationScore.pCassumingP ** 2 + discriminationScore.pnotCassumingP ** 2)
608+
return 1/(gini_index+0.0000000001)
609+
610+
def Entropy(discriminationScore):
611+
epsilon = 1e-10 # Avoid log(0)
612+
p0 = discriminationScore.pnotCassumingP
613+
p1 = discriminationScore.pCassumingP
614+
615+
entropy = - (p0 * np.log2(p0 + epsilon) + p1 * np.log2(p1 + epsilon))
616+
return 1 / (entropy + epsilon)
617+
618+
def Fisher(discriminationScore):
619+
epsilon = 1e-10 # Avoid division by zero
620+
mean_diff = (discriminationScore.pCassumingP - discriminationScore.pnotCassumingP) ** 2
621+
var_sum = discriminationScore.pCassumingP * (1 - discriminationScore.pCassumingP) + \
622+
discriminationScore.pnotCassumingP * (1 - discriminationScore.pnotCassumingP)
623+
624+
return mean_diff / (var_sum + epsilon)
625+
605626
def creationDictionnaryScores():
606627
dico = {
607628
"Acc": Acc,
@@ -613,8 +634,11 @@ def creationDictionnaryScores():
613634
"Cos": Cos,
614635
"Cover": Cover,
615636
"Dep": Dep,
637+
"Entropy": Entropy,
616638
"Excex": Excex,
639+
"Fisher": Fisher,
617640
"Gain": Gain,
641+
"Gini": Gini,
618642
"GR": GR,
619643
"InfGain": InfGain,
620644
"Jacc": Jacc,
@@ -1241,7 +1265,7 @@ def KendallTauHistograms(argu,mode,id_graphsMono,labelss,keep,TAILLEGRAPHE):
12411265
res0.append(res[0])
12421266
res20.append(res[20])
12431267
res40.append(res[40])
1244-
res60.append(res[60])
1268+
res60.append(res[50])
12451269
res80.append(res[80])
12461270
bins = np.linspace(-1,1,100)
12471271
histo0 = np.histogram(res0,bins=bins)

src/PairwiseComparisons.py

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ def load_patterns(fileName,TAILLE):
215215
temp= []
216216
tempOccu = []
217217
tempCoverage = []
218-
for j in range(1,len(b)-1):
218+
for j in range(1,len(b)):
219219
val = b[j]
220220
val = re.sub("\n","",val)
221221
if not(val=="#" or val==""):
@@ -341,8 +341,8 @@ def patternMeasures(keep,labels,id_graphs,TAILLEPATTERN):
341341
pnotCassumingP[i]= t_Neg/(t_Pos+t_Neg)
342342

343343
if t_Pos+t_Neg==lenALL:
344-
pCassumingnotP[i]= 0
345-
pnotCassumingnotP[i]= 0
344+
pCassumingnotP[i]= 0.5
345+
pnotCassumingnotP[i]= 0.5
346346
else:
347347
pCassumingnotP[i]= (lenC-t_Pos)/(lenALL-t_Pos-t_Neg)
348348
pnotCassumingnotP[i]= (lennotC-t_Neg)/(lenALL-t_Pos-t_Neg)
@@ -584,13 +584,33 @@ def TPR(discriminationScore):
584584
return discriminationScore.pCassumingP
585585

586586
def FPR(discriminationScore):
587-
result = np.where(discriminationScore.pCassumingnotP == 0, float('inf'), 1/discriminationScore.pCassumingnotP)
587+
result = 1/(discriminationScore.pCassumingnotP+0.0000000000001)
588588
return result
589589

590590

591591
def CertaintyFactor(discriminationScore):
592592
return (discriminationScore.pCassumingP - discriminationScore.pC) / (1 - discriminationScore.pC)
593593

594+
def Gini(discriminationScore):
595+
gini_index = 1 - (discriminationScore.pCassumingP ** 2 + discriminationScore.pnotCassumingP ** 2)
596+
return 1/(gini_index+0.0000000001)
597+
598+
def Entropy(discriminationScore):
599+
epsilon = 1e-10 # Avoid log(0)
600+
p0 = discriminationScore.pnotCassumingP
601+
p1 = discriminationScore.pCassumingP
602+
603+
entropy = - (p0 * np.log2(p0 + epsilon) + p1 * np.log2(p1 + epsilon))
604+
return 1 / (entropy + epsilon)
605+
606+
def Fisher(discriminationScore):
607+
epsilon = 1e-10 # Avoid division by zero
608+
mean_diff = (discriminationScore.pCassumingP - discriminationScore.pnotCassumingP) ** 2
609+
var_sum = discriminationScore.pCassumingP * (1 - discriminationScore.pCassumingP) + \
610+
discriminationScore.pnotCassumingP * (1 - discriminationScore.pnotCassumingP)
611+
612+
return mean_diff / (var_sum + epsilon)
613+
594614

595615
def creationDictionnaryScores():
596616
dico = {
@@ -614,8 +634,10 @@ def creationDictionnaryScores():
614634
"Sup": Supp,
615635
"Spec": Spec,
616636
"FPR": FPR,
617-
"ColStr": ColStr,
618637
"Dep": Dep,
638+
"Gini": Gini,
639+
"Fisher": Fisher,
640+
"ColStr": ColStr,
619641
"Excex": Excex,
620642
"Gain": Gain,
621643
"Jacc": Jacc,
@@ -628,7 +650,9 @@ def creationDictionnaryScores():
628650
"Pearson": Pearson,
629651
"RelRisk": RelRisk,
630652
"AbsSupDif": SuppDifAbs,
631-
"chiTwo": chiTwo}
653+
"chiTwo": chiTwo,
654+
"Entropy": Entropy,
655+
}
632656
#don't sort the dictionnary
633657
return dico
634658

@@ -705,12 +729,11 @@ def graphKeep(Graphes,labels):
705729
minority =0
706730
NbMino=len(labels)-sum(labels)
707731
keep = []
732+
NbMino = 0
708733
count=0
709-
NbMino=0
710-
threshold = 1000
711734
graphs=[]
712735
for i in range(len(labels)):
713-
if labels[i]==minority and NbMino<threshold:
736+
if labels[i]==minority:
714737
NbMino=NbMino+1
715738
keep.append(i)
716739
complete=NbMino
@@ -719,6 +742,7 @@ def graphKeep(Graphes,labels):
719742
if count<complete:
720743
count=count+1
721744
keep.append(i)
745+
722746
return keep
723747

724748

@@ -1069,7 +1093,6 @@ def PairwiseComparisons(arg,mode,id_graphsMono,labelss,keep,TAILLEGRAPHE):
10691093
delete = np.count_nonzero(scoresValues == -1000000)
10701094
dicoRankings[compteur]=np.argsort(scoresValues,kind='mergesort')[::-1]
10711095
dicoRankings[compteur] = dicoRankings[compteur][0:len(dicoRankings[compteur])-delete]
1072-
10731096
compteur=compteur+1
10741097
dicoFinal[VALUECLUSTER]=dicoRankings
10751098
import scipy
@@ -1101,6 +1124,9 @@ def PairwiseComparisons(arg,mode,id_graphsMono,labelss,keep,TAILLEGRAPHE):
11011124
if mode == "i":
11021125
nameSortie = NAMEBASE+"PairwiseComparisons"+str(METHOD)+"Induced"+".pdf"
11031126
plt.savefig(nameSortie)
1127+
#Save the dataframe
1128+
nameSortie = NAMEBASE+"PairwiseComparisons"+str(METHOD)+".csv"
1129+
df.to_csv(nameSortie)
11041130

11051131

11061132

0 commit comments

Comments
 (0)