Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python
import logging
logger = logging.getLogger(__name__)
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from toolkit import KerasROOTClassification
"""
A few functions to compare different setups
"""
def overlay_ROC(filename, *projects):
logger.info("Overlay ROC curves for {}".format([p.name for p in projects]))
for p in projects:
fpr, tpr, threshold = roc_curve(p.y_test, p.scores_test, sample_weight = p.w_test)
fpr = 1.0 - fpr
plt.grid(color='gray', linestyle='--', linewidth=1)
plt.plot(tpr, fpr, label=p.name)
plt.xlim(0,1)
plt.ylim(0,1)
plt.xticks(np.arange(0,1,0.1))
plt.yticks(np.arange(0,1,0.1))
plt.legend(loc='lower left', framealpha=1.0)
plt.title('Receiver operating characteristic')
plt.ylabel("Background rejection")
plt.xlabel("Signal efficiency")
plt.plot([0,1],[1,0], linestyle='--', color='black', label='Luck')
plt.savefig(filename)
plt.clf()
def overlay_loss(filename, *projects):
logger.info("Overlay loss curves for {}".format([p.name for p in projects]))
for p in projects:
plt.semilogy(p.history.history['loss'], linestyle='--', label="Training Loss "+p.name)
plt.semilogy(p.history.history['val_loss'], label="Validation Loss "+p.name)
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(loc='upper right')
plt.savefig(filename)
plt.clf()
if __name__ == "__main__":
import os
logging.basicConfig()
#logging.getLogger("KerasROOTClassification").setLevel(logging.INFO)
logging.getLogger("KerasROOTClassification").setLevel(logging.DEBUG)
filename = "/project/etp4/nhartmann/trees/allTrees_m1.8_NoSys.root"
data_options = dict(signal_trees = [(filename, "GG_oneStep_1705_1105_505_NoSys")],
bkg_trees = [(filename, "ttbar_NoSys"),
(filename, "wjets_Sherpa221_NoSys")
],
selection="lep1Pt<5000", # cut out a few very weird outliers
branches = ["met", "mt"],
weight_expr = "eventWeight*genWeight",
identifiers = ["DatasetNumber", "EventNumber"],
step_bkg = 100)
example1 = KerasROOTClassification("test_sgd",
optimizer="SGD",
optimizer_opts=dict(lr=1000., decay=1e-6, momentum=0.9),
**data_options)
example2 = KerasROOTClassification("test_adam",
optimizer="Adam",
**data_options)
if not os.path.exists("outputs/test_sgd/scores_test.h5"):
example1.train(epochs=20)
if not os.path.exists("outputs/test_adam/scores_test.h5"):
example2.train(epochs=20)
overlay_ROC("overlay_ROC.pdf", example1, example2)
overlay_loss("overlay_loss.pdf", example1, example2)