diff --git a/.gitignore b/.gitignore index 17aa483ab4ec10e27cd1e78e657e547b5772f345..c192842d5b5201bca59cdaf155384b56bd5d5150 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ outputs/ +*.swp diff --git a/toolkit.py b/toolkit.py index 0432498e2d800a2cf22c0990ff753d2fcbd9c39b..3df27c69f9ca2ba8042d4657a6ab64376790866e 100755 --- a/toolkit.py +++ b/toolkit.py @@ -13,11 +13,15 @@ import pandas as pd import h5py from sklearn.preprocessing import StandardScaler from sklearn.externals import joblib +from sklearn.metrics import roc_curve + from keras.models import Sequential from keras.layers import Dense from keras.models import model_from_json import matplotlib.pyplot as plt +import matplotlib.pyplot as plt + # configure number of cores # this doesn't seem to work, but at least with these settings keras only uses 4 processes import tensorflow as tf @@ -79,6 +83,9 @@ class KerasROOTClassification: self._sig_weights = None self._model = None + self.score_train = None + self.score_test = None + # track the number of epochs this model has been trained self.total_epochs = 0 @@ -176,6 +183,8 @@ class KerasROOTClassification: self._scaler = StandardScaler() logger.info("Fitting StandardScaler to training data") self._scaler.fit(self.x_train) + logger.info("Fitting StandardScaler to test data") + self._scaler.fit(self.x_test) joblib.dump(self._scaler, filename) return self._scaler @@ -226,7 +235,8 @@ class KerasROOTClassification: self._model.add(Dense(self.nodes, activation=self.activation_function)) # last layer is one neuron (binary classification) self._model.add(Dense(1, activation='sigmoid')) - + + logger.info("Compile model") self._model.compile(optimizer='SGD', loss='binary_crossentropy', metrics=['accuracy']) @@ -251,6 +261,8 @@ class KerasROOTClassification: if not self.data_loaded: self._load_data() + + self.scaler if not self.data_transformed: self._transform_data() @@ -267,6 +279,7 @@ class KerasROOTClassification: self.total_epochs = self._read_info("epochs", 0) + logger.info("Train model") self.model.fit(self.x_train, # the reshape might be unnescessary here self.y_train.reshape(-1, 1), @@ -274,11 +287,18 @@ class KerasROOTClassification: class_weight=self.class_weight, shuffle=True, batch_size=self.batch_size) - + + logger.info("Save weights") self.model.save_weights(os.path.join(self.project_dir, "weights.h5")) self.total_epochs += epochs self._write_info("epochs", self.total_epochs) + + logger.info("Create scores for ROC curve") + self.scores_test = self.model.predict(self.x_test) + self.scores_train = self.model.predict(self.x_train) + + def evaluate(self): pass @@ -333,7 +353,25 @@ class KerasROOTClassification: def plotROC(self): - pass + + logger.info("Plot ROC curve") + fpr, tpr, threshold = roc_curve(self.y_test, self.scores_test, sample_weight = self.w_test) + + plt.grid(color='gray', linestyle='--', linewidth=1) + plt.plot(fpr, tpr, label='NN') + plt.plot([0,1],[0,1], linestyle='--', color='black', label='Luck') + plt.xlabel("False positive rate (background rejection)") + plt.ylabel("True positive rate (signal efficiency)") + plt.title('Receiver operating characteristic') + plt.xlim(0,1) + plt.ylim(0,1) + plt.xticks(np.arange(0,1,0.1)) + plt.yticks(np.arange(0,1,0.1)) + plt.legend(loc='lower left', framealpha=1.0) + + plt.savefig(os.path.join(self.project_dir, "ROC.pdf")) + plt.clf() + def plotScore(self): pass @@ -358,3 +396,4 @@ if __name__ == "__main__": identifiers = ["DatasetNumber", "EventNumber"]) c.train(epochs=1) + c.plotROC()