diff --git a/plotting.py b/plotting.py index 74de03259cc92dad8fc13d7cba09e587c37f4bf3..f13a461a0030b66d1b9d20544fbd61facbfdf636 100644 --- a/plotting.py +++ b/plotting.py @@ -273,117 +273,198 @@ def plot_cond_avg_actmax_2D(plotname, model, layer, neuron, ranges, plot_hist_2D(plotname, xedges, yedges, hist, zlabel="Neuron output", **kwargs) +def plot_profile_2D(plotname, valsx, valsy, scores, + nbinsx, xmin, xmax, + nbinsy, ymin, ymax, + metric=np.mean, + **kwargs): + kwargs["zlabel"] = kwargs.get("zlabel", "Profile") -if __name__ == "__main__": - - from .toolkit import ClassificationProject + xedges = np.linspace(xmin, xmax, nbinsx) + yedges = np.linspace(ymin, ymax, nbinsy) - c = ClassificationProject(os.path.expanduser("~/p/scripts/keras/008-allhighlevel/all_highlevel_985")) + binindices_x = np.digitize(valsx, xedges) + binindices_y = np.digitize(valsy, yedges) - mean_signal = get_mean_event(c.x_test, c.y_test, 1) + # create profile histogram + hist = [] + for binindex_x in range(1, len(xedges)+1): + line = [] + for binindex_y in range(1, len(xedges)+1): + try: + prof_score = metric(scores[(binindices_x == binindex_x) & (binindices_y == binindex_y)]) + except ValueError: + prof_score = 0 + line.append(prof_score) + hist.append(line) + hist = np.array(hist) - print("Mean signal: ") - for branch_index, val in enumerate(mean_signal): - print("{:>20}: {:<10.3f}".format(c.branches[branch_index], val)) + plot_hist_2D(plotname, xedges, yedges, hist, **kwargs) - plot_NN_vs_var_1D("met.pdf", mean_signal, - scorefun=c.evaluate, - var_index=c.branches.index("met"), - var_range=(0, 1000, 10), - var_label="met [GeV]") - plot_NN_vs_var_1D("mt.pdf", mean_signal, - scorefun=c.evaluate, - var_index=c.branches.index("mt"), - var_range=(0, 500, 10), - var_label="mt [GeV]") +if __name__ == "__main__": - plot_NN_vs_var_2D("mt_vs_met.pdf", means=mean_signal, - scorefun=c.evaluate, - var1_index=c.branches.index("met"), var1_range=(0, 1000, 10), - var2_index=c.branches.index("mt"), var2_range=(0, 500, 10), - var1_label="met [GeV]", var2_label="mt [GeV]") + import sys + from .toolkit import ClassificationProject - # plot_NN_vs_var_2D_all("mt_vs_met_all.pdf", means=mean_signal, - # model=c.model, transform_function=c.scaler.transform, - # var1_index=c.branches.index("met"), var1_range=(0, 1000, 10), - # var2_index=c.branches.index("mt"), var2_range=(0, 500, 10), - # var1_label="met [GeV]", var2_label="mt [GeV]") + import logging + logging.basicConfig() + logging.getLogger("tfhelpers").setLevel(logging.DEBUG) - import keras.backend as K from .tfhelpers import get_single_neuron_function, get_max_activation_events import meme - meme.setOptions(overrideCache="/scratch-local/nhartmann/meme_cache") + # meme.setOptions(overrideCache="/scratch-local/nhartmann/meme_cache") - import logging - logging.basicConfig() - logging.getLogger("tfhelpers").setLevel(logging.DEBUG) + if len(sys.argv) < 2: + c = ClassificationProject(os.path.expanduser("~/p/scripts/keras/008-allhighlevel/all_highlevel_985")) + else: + c = ClassificationProject(sys.argv[1]) + + def test_mean_signal(): + + mean_signal = get_mean_event(c.x_test, c.y_test, 1) + + print("Mean signal: ") + for branch_index, val in enumerate(mean_signal): + print("{:>20}: {:<10.3f}".format(c.branches[branch_index], val)) + + plot_NN_vs_var_1D("met.pdf", mean_signal, + scorefun=c.evaluate, + var_index=c.branches.index("met"), + var_range=(0, 1000, 10), + var_label="met [GeV]") + + plot_NN_vs_var_1D("mt.pdf", mean_signal, + scorefun=c.evaluate, + var_index=c.branches.index("mt"), + var_range=(0, 500, 10), + var_label="mt [GeV]") - plot_NN_vs_var_2D("mt_vs_met_crosscheck.pdf", means=mean_signal, - scorefun=get_single_neuron_function(c.model, layer=3, neuron=0, scaler=c.scaler), - var1_index=c.branches.index("met"), var1_range=(0, 1000, 10), - var2_index=c.branches.index("mt"), var2_range=(0, 500, 10), - var1_label="met [GeV]", var2_label="mt [GeV]") - - - # transformed events - c.load(reload=True) - ranges = [np.percentile(c.x_test[:,var_index], [1,99]) for var_index in range(len(c.branches))] - - losses, events = get_max_activation_events(c.model, ranges, ntries=100000, layer=3, neuron=0, threshold=0.2) - - events = c.scaler.inverse_transform(events) - - plot_hist_2D_events( - "mt_vs_met_actmaxhist.pdf", - events[:,c.branches.index("met")], - events[:,c.branches.index("mt")], - 100, 0, 1000, - 100, 0, 500, - varx_label="met [GeV]", vary_label="mt [GeV]", - ) - - plot_cond_avg_actmax_2D( - "mt_vs_met_cond_actmax.pdf", - c.model, 3, 0, ranges, - c.branches.index("met"), - c.branches.index("mt"), - 30, 0, 1000, - 30, 0, 500, - scaler=c.scaler, - varx_label="met [GeV]", vary_label="mt [GeV]", - ) - - plot_hist_2D_events( - "mt_vs_output_actmax.pdf", - events[:,c.branches.index("mt")], - losses, - 100, 0, 500, - 100, 0, 1, - varx_label="mt [GeV]", vary_label="NN output", - log=True, - ) - - utrf_x_test = c.scaler.inverse_transform(c.x_test) - - plot_hist_2D_events( - "mt_vs_output_signal_test.pdf", - utrf_x_test[c.y_test==1][:,c.branches.index("mt")], - c.scores_test[c.y_test==1].reshape(-1), - 100, 0, 1000, - 100, 0, 1, - varx_label="mt [GeV]", vary_label="NN output", - log=True, - ) - - plot_hist_2D_events( - "apl_vs_output_actmax.pdf", - events[:,c.branches.index("LepAplanarity")], - losses, - 100, 0, 0.1, - 100, 0, 1, - varx_label="Aplanarity", vary_label="NN output", - ) + plot_NN_vs_var_2D("mt_vs_met.pdf", means=mean_signal, + scorefun=c.evaluate, + var1_index=c.branches.index("met"), var1_range=(0, 1000, 10), + var2_index=c.branches.index("mt"), var2_range=(0, 500, 10), + var1_label="met [GeV]", var2_label="mt [GeV]") + + + plot_NN_vs_var_2D_all("mt_vs_met_all.pdf", means=mean_signal, + model=c.model, transform_function=c.scaler.transform, + var1_index=c.branches.index("met"), var1_range=(0, 1000, 10), + var2_index=c.branches.index("mt"), var2_range=(0, 500, 10), + var1_label="met [GeV]", var2_label="mt [GeV]") + + plot_NN_vs_var_2D("mt_vs_met_crosscheck.pdf", means=mean_signal, + scorefun=get_single_neuron_function(c.model, layer=3, neuron=0, scaler=c.scaler), + var1_index=c.branches.index("met"), var1_range=(0, 1000, 10), + var2_index=c.branches.index("mt"), var2_range=(0, 500, 10), + var1_label="met [GeV]", var2_label="mt [GeV]") + + + def test_max_act(): + + # transformed events + c.load(reload=True) + ranges = [np.percentile(c.x_test[:,var_index], [1,99]) for var_index in range(len(c.branches))] + + losses, events = get_max_activation_events(c.model, ranges, ntries=100000, layer=3, neuron=0, threshold=0.2) + + events = c.scaler.inverse_transform(events) + + plot_hist_2D_events( + "mt_vs_met_actmaxhist.pdf", + events[:,c.branches.index("met")], + events[:,c.branches.index("mt")], + 100, 0, 1000, + 100, 0, 500, + varx_label="met [GeV]", vary_label="mt [GeV]", + ) + + plot_hist_2D_events( + "mt_vs_output_actmax.pdf", + events[:,c.branches.index("mt")], + losses, + 100, 0, 500, + 100, 0, 1, + varx_label="mt [GeV]", vary_label="NN output", + log=True, + ) + + + def test_cond_max_act(): + + c.load(reload=True) + ranges = [np.percentile(c.x_test[:,var_index], [1,99]) for var_index in range(len(c.branches))] + + plot_cond_avg_actmax_2D( + "mt_vs_met_cond_actmax.pdf", + c.model, 3, 0, ranges, + c.branches.index("met"), + c.branches.index("mt"), + 30, 0, 1000, + 30, 0, 500, + scaler=c.scaler, + varx_label="met [GeV]", vary_label="mt [GeV]", + ) + + + def test_xtest_vs_output(): + + c.load(reload=True) + + utrf_x_test = c.scaler.inverse_transform(c.x_test) + + plot_hist_2D_events( + "mt_vs_output_signal_test.pdf", + utrf_x_test[c.y_test==1][:,c.branches.index("mt")], + c.scores_test[c.y_test==1].reshape(-1), + 100, 0, 1000, + 100, 0, 1, + varx_label="mt [GeV]", vary_label="NN output", + log=True, + ) + + # plot_hist_2D_events( + # "apl_vs_output_actmax.pdf", + # events[:,c.branches.index("LepAplanarity")], + # losses, + # 100, 0, 0.1, + # 100, 0, 1, + # varx_label="Aplanarity", vary_label="NN output", + # ) + + + def test_profile(): + + c.load(reload=True) + utrf_x_test = c.scaler.inverse_transform(c.x_test) + + plot_profile_2D( + "mt_vs_met_profilemean_sig.pdf", + utrf_x_test[c.y_test==1][:,c.branches.index("met")], + utrf_x_test[c.y_test==1][:,c.branches.index("mt")], + c.scores_test[c.y_test==1].reshape(-1), + 20, 0, 500, + 20, 0, 1000, + varx_label="met [GeV]", vary_label="mt [GeV]", + ) + + plot_profile_2D( + "mt_vs_met_profilemax_sig.pdf", + utrf_x_test[c.y_test==1][:,c.branches.index("met")], + utrf_x_test[c.y_test==1][:,c.branches.index("mt")], + c.scores_test[c.y_test==1].reshape(-1), + 20, 0, 500, + 20, 0, 1000, + metric=np.max, + varx_label="met [GeV]", vary_label="mt [GeV]", + ) + + for obj in dir(): + if obj.startswith("test_") and callable(locals()[obj]): + if (len(sys.argv) > 2) and (not sys.argv[2] == obj): + continue + print("Running {}".format(obj)) + locals()[obj]()