From cc9e0663353ccfc1ce1b436d43f644452147dbd4 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann <Nikolai.Hartmann@physik.uni-muenchen.de> Date: Tue, 12 Jun 2018 17:54:14 +0200 Subject: [PATCH] Normalise training and test sample correctly for significance scan --- toolkit.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/toolkit.py b/toolkit.py index b8ae993..df89ef3 100755 --- a/toolkit.py +++ b/toolkit.py @@ -921,15 +921,18 @@ class ClassificationProject(object): significances_train = [] significances_test = [] - for hist_sig, hist_bkg, rel_errors_sig, rel_errors_bkg, significances in [ - (hist_sig_train, hist_bkg_train, rel_errors_bkg_train, rel_errors_sig_train, significances_train), - (hist_sig_test, hist_bkg_test, rel_errors_bkg_test, rel_errors_sig_test, significances_test), + for hist_sig, hist_bkg, rel_errors_sig, rel_errors_bkg, significances, w, y in [ + (hist_sig_train, hist_bkg_train, rel_errors_bkg_train, rel_errors_sig_train, significances_train, self.w_train, self.y_train), + (hist_sig_test, hist_bkg_test, rel_errors_bkg_test, rel_errors_sig_test, significances_test, self.w_test, self.y_test), ]: + # factor to rescale due to using only a fraction of events (training and test samples) + normfactor_sig = (np.sum(self.w_train[self.y_train==1])+np.sum(self.w_test[self.y_test==1]))/np.sum(w[y==1]) + normfactor_bkg = (np.sum(self.w_train[self.y_train==0])+np.sum(self.w_test[self.y_test==0]))/np.sum(w[y==0]) # first set nan values to 0 and multiply by lumi for arr in hist_sig, hist_bkg, rel_errors_bkg: arr[np.isnan(arr)] = 0 - hist_sig *= lumifactor - hist_bkg *= lumifactor + hist_sig *= lumifactor*normfactor_sig + hist_bkg *= lumifactor*normfactor_bkg for i in range(len(hist_sig)): s = sum(hist_sig[i:]) b = sum(hist_bkg[i:]) @@ -941,6 +944,8 @@ class ClassificationProject(object): z = 0 else: z = significanceFunction(s, b, db) + if z == float('inf'): + z = 0 logger.debug("s, b, db, z = {}, {}, {}, {}".format(s, b, db, z)) significances.append(z) -- GitLab