-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDecomposingTest_1D.py
More file actions
108 lines (81 loc) · 3.24 KB
/
DecomposingTest_1D.py
File metadata and controls
108 lines (81 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python
'''
This python script can be used to reproduce the results on 1D distributions
on the article Experiments using machine learning to approximate likelihood ratios
for mixture models (ACAT 2016).
'''
__author__ = "Pavez J. <juan.pavezs@alumnos.usm.cl>"
import ROOT
import numpy as np
from sklearn import svm, linear_model
from sklearn.externals import joblib
from sklearn.metrics import roc_curve, auc
from sklearn.ensemble import GradientBoostingClassifier
import sys
import os.path
from mlp import MLPTrainer
from make_data import makeData, makeModelND, makeModelPrivateND,\
makeModel
from utils import printMultiFrame, printFrame, saveFig, loadData,\
makeROC, makeSigBkg, makePlotName
from train_classifiers import trainClassifiers, predict
from decomposed_test import DecomposedTest
from xgboost_wrapper import XGBoostClassifier
if __name__ == '__main__':
# Setting the classifier to use
model_g = None
classifiers = {'svc':svm.NuSVC(probability=True),'svr':svm.NuSVR(),
'logistic': linear_model.LogisticRegression(),
'bdt':GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
max_depth=5, random_state=0),
'mlp':MLPTrainer(n_hidden=4, L2_reg=0),
'xgboost': XGBoostClassifier(num_class=2, nthread=4, silent=1,
num_boost_round=100, eta=0.5, max_depth=4)}
clf = None
if (len(sys.argv) > 1):
model_g = sys.argv[1]
clf = classifiers.get(sys.argv[1])
if clf == None:
model_g = 'logistic'
clf = classifiers['logistic']
print 'Not found classifier, Using logistic instead'
# parameters of the mixture model
c0 = np.array([.0,.3, .7])
c1 = np.array([.1,.3, .7])
c1_g = ''
c0 = c0/c0.sum()
c1[0] = sys.argv[2]
if c1[0] < 0.01:
c1_g = "%.3f"%c1[0]
else:
c1_g = "%.2f"%c1[0]
c1[0] = (c1[0]*(c1[1]+c1[2]))/(1.-c1[0])
c1 = c1 / c1.sum()
verbose_printing = True
dir = '.'
workspace_file = 'workspace_DecomposingTestOfMixtureModelsClassifiers.root'
# features
vars_g = ['x']
ROOT.gROOT.SetBatch(ROOT.kTRUE)
ROOT.RooAbsPdf.defaultIntegratorConfig().setEpsRel(1E-15)
# Set this value to False if only final plots are needed
verbose_printing = True
if (len(sys.argv) > 3):
print 'Setting seed: {0} '.format(sys.argv[3])
ROOT.RooRandom.randomGenerator().SetSeed(int(sys.argv[3]))
np.random.seed(int(sys.argv[3]))
# Create models to sample from
makeModel(c0=c0,c1=c1,workspace=workspace_file,dir=dir,verbose_printing=
verbose_printing)
# make sintetic data to train the classifiers
makeData(vars_g=vars_g,c0=c0,c1=c1,num_train=100000,num_test=50000,
workspace=workspace_file,dir=dir, c1_g=c1_g, model_g='mlp')
# train the pairwise classifiers
trainClassifiers(clf,3,dir=dir, model_g=model_g,
c1_g=c1_g ,model_file='adaptive')
# class which implement the decomposed method
test = DecomposedTest(c0,c1,dir=dir,c1_g=c1_g,model_g=model_g,
input_workspace=workspace_file, verbose_printing = verbose_printing,
dataset_names=['0','1','2'],clf=clf if model_g=='mlp' else None)
test.fit(data_file='test',importance_sampling=False, true_dist=True,vars_g=vars_g)
test.computeRatios(true_dist=True,vars_g=vars_g,use_log=False)