Compare revisions

480a7679 · 5ce28def · d23f0440 · b9e191be · 489b934d · d1d1cf3c
--- a/__init__.py
+++ b/__init__.py
-from .toolkit import ClassificationProject
-from .compare import overlay_ROC, overlay_loss
-from .add_friend import add_friend
+from .toolkit import *
+from .compare import *
+from .add_friend import *
--- a/browse.py
+++ b/browse.py
@@ -9,11 +9,11 @@ from KerasROOTClassification import *
 logging.basicConfig()
 logging.getLogger("KerasROOTClassification").setLevel(logging.INFO)

-c = ClassificationProject(sys.argv[1])
+c = load_from_dir(sys.argv[1])

 cs = []
 cs.append(c)

 if len(sys.argv) > 2:
    for project_name in sys.argv[2:]:
-        cs.append(ClassificationProject(project_name))
+        cs.append(load_from_dir(project_name))
--- a/compare.py
+++ b/compare.py
@@ -8,6 +8,7 @@ import matplotlib.pyplot as plt
 from sklearn.metrics import roc_curve, auc

 from .toolkit import ClassificationProject
+from .plotting import save_show

 """
 A few functions to compare different setups
@@ -19,6 +20,9 @@ def overlay_ROC(filename, *projects, **kwargs):
    ylim = kwargs.pop("ylim", (0,1))
    plot_thresholds = kwargs.pop("plot_thresholds", False)
    threshold_log = kwargs.pop("threshold_log", True)
+    lumifactor = kwargs.pop("lumifactor", None)
+    tight_layout = kwargs.pop("tight_layout", False)
+    show_auc = kwargs.pop("show_auc", True)
    if kwargs:
        raise KeyError("Unknown kwargs: {}".format(kwargs))

@@ -32,11 +36,15 @@ def overlay_ROC(filename, *projects, **kwargs):
        if threshold_log:
            ax2.set_yscale("log")

+    if lumifactor is not None:
+        ax_abs_b = ax.twinx()
+        ax_abs_s = ax.twiny()
+
    prop_cycle = plt.rcParams['axes.prop_cycle']
    colors = prop_cycle.by_key()['color']

    for p, color in zip(projects, colors):
-        fpr, tpr, threshold = roc_curve(p.y_test, p.scores_test, sample_weight = p.w_test)
+        fpr, tpr, threshold = roc_curve(p.l_test, p.scores_test, sample_weight = p.w_test)
        fpr = 1.0 - fpr
        try:
            roc_auc = auc(tpr, fpr)
@@ -45,25 +53,42 @@ def overlay_ROC(filename, *projects, **kwargs):
            roc_auc = auc(tpr, fpr, reorder=True)

        ax.grid(color='gray', linestyle='--', linewidth=1)
-        ax.plot(tpr,  fpr, label=str(p.name+" (AUC = {:.3f})".format(roc_auc)), color=color)
+        if show_auc:
+            label = str(p.name+" (AUC = {:.3f})".format(roc_auc))
+        else:
+            label = p.name
+        ax.plot(tpr,  fpr, label=label, color=color)
        if plot_thresholds:
            ax2.plot(tpr, threshold, "--", color=color)
+        if lumifactor is not None:
+            sumw_b = p.w_test[p.l_test==0].sum()*lumifactor
+            sumw_s = p.w_test[p.l_test==1].sum()*lumifactor
+            ax_abs_b.plot(tpr, (1.-fpr)*sumw_b, alpha=0)
+            ax_abs_b.invert_yaxis()
+            ax_abs_s.plot(tpr*sumw_s, fpr, alpha=0)

    if xlim is not None:
        ax.set_xlim(*xlim)
    if ylim is not None:
        ax.set_ylim(*ylim)
+    if lumifactor is not None:
+        ax_abs_b.set_ylim((1-ax.get_ylim()[0])*sumw_b, (1-ax.get_ylim()[1])*sumw_b)
+        ax_abs_b.set_xlim(*ax.get_xlim())
+        ax_abs_s.set_xlim(ax.get_xlim()[0]*sumw_s, ax.get_xlim()[1]*sumw_s)
+        ax_abs_s.set_ylim(*ax.get_ylim())
+        ax_abs_b.set_ylabel("Number of background events")
+        ax_abs_s.set_xlabel("Number of signal events")
    # plt.xticks(np.arange(0,1,0.1))
    # plt.yticks(np.arange(0,1,0.1))
    ax.legend(loc='lower left', framealpha=1.0)
-    ax.set_title('Receiver operating characteristic')
+    if lumifactor is None:
+        ax.set_title('Receiver operating characteristic')
    ax.set_ylabel("Background rejection")
    ax.set_xlabel("Signal efficiency")
-    if plot_thresholds:
+    if plot_thresholds or tight_layout:
        # to fit right y-axis description
        fig.tight_layout()
-    fig.savefig(filename)
-    plt.close(fig)
+    return save_show(plt, fig, filename)

 def overlay_loss(filename, *projects, **kwargs):

@@ -78,22 +103,23 @@ def overlay_loss(filename, *projects, **kwargs):
    prop_cycle = plt.rcParams['axes.prop_cycle']
    colors = prop_cycle.by_key()['color']

+    fig, ax = plt.subplots()
+
    for p,color in zip(projects,colors):
        hist_dict = p.csv_hist
-        plt.plot(hist_dict['loss'], linestyle='--', label="Training Loss "+p.name, color=color)
-        plt.plot(hist_dict['val_loss'], label="Validation Loss "+p.name, color=color)
+        ax.plot(hist_dict['loss'], linestyle='--', label="Training Loss "+p.name, color=color)
+        ax.plot(hist_dict['val_loss'], label="Validation Loss "+p.name, color=color)

-    plt.ylabel('loss')
-    plt.xlabel('epoch')
+    ax.set_ylabel('loss')
+    ax.set_xlabel('epoch')
    if log:
-        plt.yscale("log")
+        ax.set_yscale("log")
    if xlim is not None:
-        plt.xlim(*xlim)
+        ax.set_xlim(*xlim)
    if ylim is not None:
-        plt.ylim(*ylim)
-    plt.legend(loc='upper right')
-    plt.savefig(filename)
-    plt.clf()
+        ax.set_ylim(*ylim)
+    ax.legend(loc='upper right')
+    return save_show(plt, fig, filename)




--- a/plotting.py
+++ b/plotting.py
@@ -20,8 +20,27 @@ logger.addHandler(logging.NullHandler())
 Some further plotting functions
 """

-def get_mean_event(x, y, class_label):
-    return [np.mean(x[y==class_label][:,var_index]) for var_index in range(x.shape[1])]
+def save_show(plt, fig, filename, **kwargs):
+    "Save a figure and show it in case we are in ipython or jupyter notebook."
+    fig.savefig(filename, **kwargs)
+    try:
+        get_ipython
+        plt.show()
+        return fig
+    except NameError:
+        plt.close(fig)
+        return None
+
+
+def get_mean_event(x, y, class_label, mask_value=None):
+    means = []
+    for var_index in range(x.shape[1]):
+        if mask_value is not None:
+            masked_values = np.where(x[:,var_index] != mask_value)[0]
+            x = x[masked_values]
+            y = y[masked_values]
+        means.append(np.mean(x[y==class_label][:,var_index]))
+    return means


 def plot_NN_vs_var_1D(plotname, means, scorefun, var_index, var_range, var_label=None):
@@ -41,8 +60,7 @@ def plot_NN_vs_var_1D(plotname, means, scorefun, var_index, var_range, var_label
    if var_label is not None:
        ax.set_xlabel(var_label)
    ax.set_ylabel("NN output")
-    fig.savefig(plotname)
-    plt.close(fig)
+    save_show(plt, fig, plotname)


 def plot_NN_vs_var_2D(plotname, means,
@@ -115,16 +133,17 @@ def plot_NN_vs_var_2D(plotname, means,
        ax.set_xlabel(varx_label)
    if vary_label is not None:
        ax.set_ylabel(vary_label)
-    fig.savefig(plotname)
-    plt.close(fig)
+    save_show(plt, fig, plotname)


 def plot_NN_vs_var_2D_all(plotname, model, means,
-                          var1_index, var1_range,
-                          var2_index, var2_range,
+                          varx_index,
+                          vary_index,
+                          nbinsx, xmin, xmax,
+                          nbinsy, ymin, ymax,
                          transform_function=None,
-                          var1_label=None,
-                          var2_label=None,
+                          varx_label=None,
+                          vary_label=None,
                          zrange=None, logz=False,
                          plot_last_layer=False,
                          log_default_ymin=1e-5,
@@ -132,15 +151,15 @@ def plot_NN_vs_var_2D_all(plotname, model, means,

    "Similar to plot_NN_vs_var_2D, but creates a grid of plots for all neurons."

-    var1_vals = np.arange(*var1_range)
-    var2_vals = np.arange(*var2_range)
+    varx_vals = np.linspace(xmin, xmax, nbinsx)
+    vary_vals = np.linspace(ymin, ymax, nbinsy)

    # create the events for which we want to fetch the activations
-    events = np.tile(means, len(var1_vals)*len(var2_vals)).reshape(len(var2_vals), len(var1_vals), -1)
-    for i, y in enumerate(var2_vals):
-        for j, x in enumerate(var1_vals):
-            events[i][j][var1_index] = x
-            events[i][j][var2_index] = y
+    events = np.tile(means, len(varx_vals)*len(vary_vals)).reshape(len(vary_vals), len(varx_vals), -1)
+    for i, y in enumerate(vary_vals):
+        for j, x in enumerate(varx_vals):
+            events[i][j][varx_index] = x
+            events[i][j][vary_index] = y

    # convert back into 1d array
    events = events.reshape(-1, len(means))
@@ -187,7 +206,7 @@ def plot_NN_vs_var_2D_all(plotname, model, means,
    for layer in range(layers):
        for neuron in range(len(acts[layer][0])):
            acts_neuron = acts[layer][:,neuron]
-            acts_neuron = acts_neuron.reshape(len(var2_vals), len(var1_vals))
+            acts_neuron = acts_neuron.reshape(len(vary_vals), len(varx_vals))
            ax = grid_array[neuron][layer]
            extra_opts = {}
            if not (plot_last_layer and layer == layers-1):
@@ -200,20 +219,127 @@ def plot_NN_vs_var_2D_all(plotname, model, means,
                    extra_opts["norm"] = norm(vmin=zrange[0], vmax=zrange[1])
                else:
                    extra_opts["norm"] = norm(vmin=global_min, vmax=global_max)
-            im = ax.pcolormesh(var1_vals, var2_vals, acts_neuron, cmap=cmap, linewidth=0, rasterized=True, **extra_opts)
+            im = ax.pcolormesh(varx_vals, vary_vals, acts_neuron, cmap=cmap, linewidth=0, rasterized=True, **extra_opts)
            ax.set_facecolor("black")
-            if var1_label is not None:
-                ax.set_xlabel(var1_label)
-            if var2_label is not None:
-                ax.set_ylabel(var2_label)
+            if varx_label is not None:
+                ax.set_xlabel(varx_label)
+            if vary_label is not None:
+                ax.set_ylabel(vary_label)
            ax.text(0., 0.5, "{}, {}".format(layer, neuron), transform=ax.transAxes, color="white")

    cb = fig.colorbar(im, cax=grid[0].cax, orientation="horizontal")
    cb.ax.xaxis.set_ticks_position('top')
    cb.ax.xaxis.set_label_position('top')

-    fig.savefig(plotname, bbox_inches='tight')
-    plt.close(fig)
+    save_show(plt, fig, plotname, bbox_inches='tight')
+
+
+def plot_profile_2D_all(plotname, model, events,
+                        valsx, valsy,
+                        nbinsx, xmin, xmax,
+                        nbinsy, ymin, ymax,
+                        transform_function=None,
+                        varx_label=None,
+                        vary_label=None,
+                        zrange=None, logz=False,
+                        plot_last_layer=False,
+                        log_default_ymin=1e-5,
+                        global_norm=True,
+                        cmap="inferno", **kwargs):
+
+    "Similar to plot_profile_2D, but creates a grid of plots for all neurons."
+
+    # transform
+    if transform_function is not None:
+        events = transform_function(events)
+
+    logger.info("Reading activations for all neurons")
+    acts = get_activations(model, events, print_shape_only=True)
+    logger.info("Done")
+
+    if plot_last_layer:
+        n_neurons = [len(i.reshape(i.shape[0], -1)[0]) for i in acts]
+    else:
+        n_neurons = [len(i.reshape(i.shape[0], -1)[0]) for i in acts[:-1]]
+    layers = len(n_neurons)
+
+    nrows_ncols = (layers, max(n_neurons))
+    fig = plt.figure(1, figsize=nrows_ncols)
+    grid = ImageGrid(fig, 111, nrows_ncols=nrows_ncols[::-1], axes_pad=0,
+                     label_mode="1",
+                     aspect=False,
+                     cbar_location="top",
+                     cbar_mode="single",
+                     cbar_pad=.2,
+                     cbar_size="5%",)
+    grid_array = np.array(grid)
+    grid_array = grid_array.reshape(*nrows_ncols[::-1])
+
+    global_min = None
+    global_max = None
+
+    logger.info("Creating profile histograms")
+    ims = []
+    reg_plots = []
+    for layer in range(layers):
+        neurons_acts = acts[layer]
+        neurons_acts = neurons_acts.reshape(neurons_acts.shape[0], -1)
+        for neuron in range(len(neurons_acts[0])):
+            acts_neuron = neurons_acts[:,neuron]
+            ax = grid_array[neuron][layer]
+            extra_opts = {}
+            if not (plot_last_layer and layer == layers-1):
+                # for hidden layers, plot the same z-scale
+                if logz:
+                    norm = matplotlib.colors.LogNorm
+                else:
+                    norm = matplotlib.colors.Normalize
+                if zrange is not None:
+                    extra_opts["norm"] = norm(vmin=zrange[0], vmax=zrange[1])
+                else:
+                    extra_opts["norm"] = norm(vmin=global_min, vmax=global_max)
+            hist, xedges, yedges = get_profile_2D(
+                valsx, valsy, acts_neuron,
+                nbinsx, xmin, xmax,
+                nbinsy, ymin, ymax,
+                **kwargs
+            )
+            if global_min is None or hist.min() < global_min:
+                global_min = hist.min()
+            if global_max is None or hist.max() > global_max:
+                global_max = hist.max()
+            X, Y = np.meshgrid(xedges, yedges)
+            reg_plots.append((layer, neuron, ax, (X, Y, hist), dict(cmap="inferno", linewidth=0, rasterized=True, **extra_opts)))
+    logger.info("Done")
+
+    logger.info("global_min: {}".format(global_min))
+    logger.info("global_max: {}".format(global_max))
+
+    if global_min <= 0 and logz:
+        global_min = log_default_ymin
+        logger.info("Changing global_min to {}".format(log_default_ymin))
+
+    for layer, neuron, ax, args, kwargs in reg_plots:
+        if zrange is None:
+            kwargs["norm"].vmin = global_min
+            kwargs["norm"].vmax = global_max
+        if not global_norm:
+            kwargs["norm"] = None
+        im = ax.pcolormesh(*args, **kwargs)
+        ax.set_facecolor("black")
+        if varx_label is not None:
+            ax.set_xlabel(varx_label)
+        if vary_label is not None:
+            ax.set_ylabel(vary_label)
+        ax.text(0., 0.5, "{}, {}".format(layer, neuron), transform=ax.transAxes, color="white")
+
+    cb = fig.colorbar(im, cax=grid[0].cax, orientation="horizontal")
+    cb.ax.xaxis.set_ticks_position('top')
+    cb.ax.xaxis.set_label_position('top')
+
+    logger.info("Rendering")
+    save_show(plt, fig, plotname, bbox_inches='tight')
+    logger.info("Done")


 def plot_hist_2D(plotname, xedges, yedges, hist, varx_label=None, vary_label=None, log=False, zlabel="# of events"):
@@ -231,9 +357,7 @@ def plot_hist_2D(plotname, xedges, yedges, hist, varx_label=None, vary_label=Non
    cbar.set_label(zlabel)
    ax.set_ylabel(vary_label)
    ax.set_xlabel(varx_label)
-    fig.savefig(plotname)
-
-    plt.close(fig)
+    save_show(plt, fig, plotname)


 def plot_hist_2D_events(plotname, valsx, valsy, nbinsx, xmin, xmax, nbinsy, ymin, ymax,
@@ -253,12 +377,16 @@ def plot_hist_2D_events(plotname, valsx, valsy, nbinsx, xmin, xmax, nbinsy, ymin
 def plot_cond_avg_actmax_2D(plotname, model, layer, neuron, ranges,
                            varx_index, vary_index,
                            nbinsx, xmin, xmax, nbinsy, ymin, ymax,
-                            scaler=None,
+                            transform=None, inverse_transform=None,
                            ntries=20,
                            step=1,
                            maxit=1,
                            **kwargs):

+    transform_given = [fn is not None for fn in [transform, inverse_transform]]
+    if any(transform_given) and not all(transform_given):
+        raise ValueError("Need to pass both transform and inverse_transform if data should be transformed")
+
    xedges = np.linspace(xmin, xmax, nbinsx)
    yedges = np.linspace(ymin, ymax, nbinsy)

@@ -269,12 +397,12 @@ def plot_cond_avg_actmax_2D(plotname, model, layer, neuron, ranges,
    for ix, x in enumerate(xedges):
        for iy, y in enumerate(yedges):
            random_event = create_random_event(ranges)
-            if scaler is not None:
-                random_event = scaler.inverse_transform(random_event)
+            if inverse_transform is not None:
+                random_event = inverse_transform(random_event)
            for index, val in [(varx_index, x), (vary_index, y)]:
                random_event[0][index] = val
-            if scaler is not None:
-                random_event = scaler.transform(random_event)
+            if transform is not None:
+                random_event = transform(random_event)
            act = np.mean([max_activation_wrt_input(gradient_function, random_event, maxit=maxit, step=step, const_indices=[varx_index, vary_index])[0][0] for i in range(ntries)])
            hist[ix][iy] = act

@@ -283,15 +411,10 @@ def plot_cond_avg_actmax_2D(plotname, model, layer, neuron, ranges,
    plot_hist_2D(plotname, xedges, yedges, hist, zlabel="Neuron output", **kwargs)


-def plot_profile_2D(plotname, valsx, valsy, scores,
-                    nbinsx, xmin, xmax,
-                    nbinsy, ymin, ymax,
-                    metric=np.mean,
-                    weights=None,
-                    **kwargs):
-
-    kwargs["zlabel"] = kwargs.get("zlabel", "Profile")
-
+def get_profile_2D(valsx, valsy, scores,
+                   nbinsx, xmin, xmax,
+                   nbinsy, ymin, ymax,
+                   metric=np.mean, weights=None):
    xedges = np.linspace(xmin, xmax, nbinsx)
    yedges = np.linspace(ymin, ymax, nbinsy)

@@ -317,6 +440,25 @@ def plot_profile_2D(plotname, valsx, valsy, scores,
    hist = np.array(hist)
    hist = hist.T # had a list of columns - needs to be list of rows

+    return hist, xedges, yedges
+
+
+def plot_profile_2D(plotname, valsx, valsy, scores,
+                    nbinsx, xmin, xmax,
+                    nbinsy, ymin, ymax,
+                    metric=np.mean,
+                    weights=None,
+                    **kwargs):
+
+    kwargs["zlabel"] = kwargs.get("zlabel", "Profile")
+
+    hist, xedges, yedges = get_profile_2D(
+        valsx, valsy, scores,
+        nbinsx, xmin, xmax,
+        nbinsy, ymin, ymax,
+        metric=metric, weights=weights
+    )
+
    plot_hist_2D(plotname, xedges, yedges, hist, **kwargs)


@@ -342,6 +484,8 @@ if __name__ == "__main__":

    def test_mean_signal():

+        c._load_data() # untransformed
+
        mean_signal = get_mean_event(c.x_test, c.y_test, 1)

        print("Mean signal: ")
@@ -370,13 +514,19 @@ if __name__ == "__main__":


        plot_NN_vs_var_2D_all("mt_vs_met_all.pdf", means=mean_signal,
-                              model=c.model, transform_function=c.scaler.transform,
-                              var1_index=c.fields.index("met"), var1_range=(0, 1000, 10),
-                              var2_index=c.fields.index("mt"), var2_range=(0, 500, 10),
-                              var1_label="met [GeV]", var2_label="mt [GeV]")
+                              model=c.model, transform_function=c.transform,
+                              varx_index=c.fields.index("met"),
+                              vary_index=c.fields.index("mt"),
+                              nbinsx=100, xmin=0, xmax=1000,
+                              nbinsy=100, ymin=0, ymax=500,
+                              varx_label="met [GeV]", vary_label="mt [GeV]")
+
+        input_transform = c.transform
+        if hasattr(c, "get_input_list"):
+            input_transform = lambda x : c.get_input_list(c.transform(x))

        plot_NN_vs_var_2D("mt_vs_met_crosscheck.pdf", means=mean_signal,
-                          scorefun=get_single_neuron_function(c.model, layer=3, neuron=0, scaler=c.scaler),
+                          scorefun=get_single_neuron_function(c.model, layer=3, neuron=0, input_transform=input_transform),
                          varx_index=c.fields.index("met"),
                          vary_index=c.fields.index("mt"),
                          nbinsx=100, xmin=0, xmax=1000,
@@ -392,7 +542,7 @@ if __name__ == "__main__":

        losses, events = get_max_activation_events(c.model, ranges, ntries=100000, layer=3, neuron=0, threshold=0.2)

-        events = c.scaler.inverse_transform(events)
+        events = c.inverse_transform(events)

        plot_hist_2D_events(
            "mt_vs_met_actmaxhist.pdf",
@@ -426,7 +576,7 @@ if __name__ == "__main__":
            c.fields.index("mt"),
            30, 0, 1000,
            30, 0, 500,
-            scaler=c.scaler,
+            transform=c.transform, inverse_transform=c.inverse_transform,
            varx_label="met [GeV]", vary_label="mt [GeV]",
        )

@@ -435,7 +585,7 @@ if __name__ == "__main__":

        c.load(reload=True)

-        utrf_x_test = c.scaler.inverse_transform(c.x_test)
+        utrf_x_test = c.inverse_transform(c.x_test)

        plot_hist_2D_events(
            "mt_vs_output_signal_test.pdf",
@@ -483,7 +633,7 @@ if __name__ == "__main__":
    def test_profile():

        c.load(reload=True)
-        utrf_x_test = c.scaler.inverse_transform(c.x_test)
+        utrf_x_test = c.inverse_transform(c.x_test)

        plot_profile_2D(
            "mt_vs_met_profilemean_sig.pdf",

--- a/scripts/eval_model.py
+++ b/scripts/eval_model.py
+#!/usr/bin/env python
+
+import os
+import argparse
+
+import keras
+import h5py
+from sklearn.metrics import roc_curve, auc
+import matplotlib.pyplot as plt
+import numpy as np
+
+from KerasROOTClassification import ClassificationProject
+
+parser = argparse.ArgumentParser(description='Evaluate a model from a classification project using the given '
+                                             'weights and plot the ROC curve and train/test overlayed scores')
+parser.add_argument("project_dir")
+parser.add_argument("weights")
+parser.add_argument("-p", "--plot-prefix", default="eval_nn")
+args = parser.parse_args()
+
+c = ClassificationProject(args.project_dir)
+
+c.model.load_weights(args.weights)
+
+print("Predicting for test sample ...")
+scores_test = c.evaluate(c.x_test)
+print("Done")
+
+fpr, tpr, threshold = roc_curve(c.y_test, scores_test, sample_weight = c.w_test)
+fpr = 1.0 - fpr
+try:
+    roc_auc = auc(tpr, fpr, reorder=True)
+except ValueError:
+    logger.warning("Got a value error from auc - trying to rerun with reorder=True")
+    roc_auc = auc(tpr, fpr, reorder=True)
+
+plt.grid(color='gray', linestyle='--', linewidth=1)
+plt.plot(tpr,  fpr, label=str(c.name + " (AUC = {})".format(roc_auc)))
+plt.plot([0,1],[1,0], linestyle='--', color='black', label='Luck')
+plt.ylabel("Background rejection")
+plt.xlabel("Signal efficiency")
+plt.title('Receiver operating characteristic')
+plt.xlim(0,1)
+plt.ylim(0,1)
+plt.xticks(np.arange(0,1,0.1))
+plt.yticks(np.arange(0,1,0.1))
+plt.legend(loc='lower left', framealpha=1.0)
+plt.savefig(args.plot_prefix+"_ROC.pdf")
+plt.clf()
+
--- a/scripts/generate_actmax.py
+++ b/scripts/generate_actmax.py
+#!/usr/bin/env python
+
+import sys, argparse, re, random
+
+parser = argparse.ArgumentParser(description="generate events that maximise the activation for a given neuron")
+parser.add_argument("input_project")
+parser.add_argument("output_file")
+parser.add_argument("-n", "--nevents", default=100000, type=int)
+parser.add_argument("-j", "--mask-jets", action="store_true",
+                    help="mask variables called jet*Pt/Eta/Phi and generate a random uniform distribution of the number of jets (only nescessary for non-recurrent NN)")
+args = parser.parse_args()
+
+import logging
+logging.basicConfig()
+logging.getLogger().setLevel(logging.INFO)
+
+import h5py
+import numpy as np
+
+from KerasROOTClassification.utils import (
+    weighted_quantile,
+    get_max_activation_events,
+    create_random_event,
+    get_ranges
+)
+from KerasROOTClassification import load_from_dir
+import meme
+
+meme.setOptions(deactivated=True)
+
+input_project = args.input_project
+output_file = args.output_file
+
+c = load_from_dir(input_project)
+c._load_data()
+
+ranges, mask_probs = get_ranges(c.transform(c.x_train), [0.01, 0.99], c.w_train_tot, mask_value=c.mask_value, max_evts=10000)
+
+def mask_uniform(x, mask_value, recurrent_field_idx):
+    """
+    Mask recurrent fields with a random (uniform) number of objects. Works in place.
+    """
+    for rec_idx in recurrent_field_idx:
+        for evt in x:
+            masked = False
+            nobj = int(random.random()*(rec_idx.shape[1]+1))
+            for obj_number, line_idx in enumerate(rec_idx.reshape(*rec_idx.shape[1:])):
+                if obj_number == nobj:
+                    masked=True
+                if masked:
+                    evt[line_idx] = mask_value
+
+def get_input_flat(x):
+    return x[0].reshape(-1, len(c.fields))
+
+
+if args.mask_jets:
+    jet_fields = {}
+    for field_name in c.fields:
+        if any(field_name.startswith("jet") and field_name.endswith(suffix) for suffix in ["Pt", "Eta", "Phi"]):
+            jet_number = re.findall("[0-9]+", field_name)[0]
+            if not jet_number in jet_fields:
+                jet_fields[jet_number] = []
+            jet_fields[jet_number].append(c.fields.index(field_name))
+    jet_fields = [np.array([[v for k, v in sorted(jet_fields.items(), key=lambda l:l[0])]])]
+
+
+def input_transform(x):
+    x = np.array(x)
+    if hasattr(c, "mask_uniform"):
+        c.mask_uniform(x)
+        return c.get_input_list(x)
+    elif args.mask_jets:
+        mask_uniform(x, c.mask_value, jet_fields)
+        return x
+
+
+opt_kwargs = dict()
+if hasattr(c, "mask_uniform"):
+    opt_kwargs["input_transform"] = input_transform
+    opt_kwargs["input_inverse_transform"] = c.get_input_flat
+if args.mask_jets:
+    opt_kwargs["input_transform"] = input_transform
+    opt_kwargs["input_inverse_transform"] = get_input_flat
+
+
+evts = get_max_activation_events(
+    c.model, ranges,
+    ntries=args.nevents,
+    layer=len(c.model.layers)-1,
+    neuron=0,
+    maxit=10,
+    seed=45,
+    threshold=0,
+    **opt_kwargs
+)
+
+with h5py.File(output_file, "w") as f:
+    f.create_dataset("actmax", data=evts[1])
--- a/scripts/plot_NN_2D.py
+++ b/scripts/plot_NN_2D.py
@@ -2,20 +2,6 @@

 import sys
 import argparse
-import logging
-logging.basicConfig()
-
-import numpy as np
-
-from KerasROOTClassification import ClassificationProject
-from KerasROOTClassification.plotting import (
-    get_mean_event,
-    plot_NN_vs_var_2D,
-    plot_profile_2D,
-    plot_hist_2D_events,
-    plot_cond_avg_actmax_2D
-)
-from KerasROOTClassification.utils import get_single_neuron_function, get_max_activation_events

 parser = argparse.ArgumentParser(description='Create various 2D plots for a single neuron')
 parser.add_argument("project_dir")
@@ -27,6 +13,7 @@ parser.add_argument("-m", "--mode",
                    default="mean_sig")
 parser.add_argument("-l", "--layer", type=int, help="Layer index (takes last layer by default)")
 parser.add_argument("-n", "--neuron", type=int, default=0, help="Neuron index (takes first neuron by default)")
+parser.add_argument("-a", "--all-neurons", action="store_true", help="Create a summary plot for all neurons in all hidden layers")
 parser.add_argument("--log", action="store_true", help="Plot in color in log scale")
 parser.add_argument("--contour", action="store_true", help="Interpolate with contours")
 parser.add_argument("-b", "--nbins", default=20, type=int, help="Number of bins in x and y direction")
@@ -42,10 +29,38 @@ parser.add_argument("-s", "--step-size", help="step size for activation maximisa

 args = parser.parse_args()

+import logging
+logging.basicConfig()
+
+import numpy as np
+
+import ROOT
+ROOT.gROOT.SetBatch()
+ROOT.PyConfig.IgnoreCommandLineOptions = True
+
+from KerasROOTClassification import load_from_dir
+from KerasROOTClassification.plotting import (
+    get_mean_event,
+    plot_NN_vs_var_2D,
+    plot_profile_2D,
+    plot_hist_2D_events,
+    plot_cond_avg_actmax_2D,
+    plot_NN_vs_var_2D_all,
+)
+from KerasROOTClassification.utils import (
+    get_single_neuron_function,
+    get_max_activation_events,
+    weighted_quantile,
+    get_ranges
+)
+
+if args.all_neurons and (not args.mode.startswith("mean")):
+    parser.error("--all-neurons currently only supported for mean_sig and mean_bkg")
+
 if args.verbose:
    logging.getLogger().setLevel(logging.DEBUG)

-c = ClassificationProject(args.project_dir)
+c = load_from_dir(args.project_dir)

 plot_vs_activation = (args.vary == "activation")

@@ -53,7 +68,7 @@ layer = args.layer
 neuron = args.neuron

 if layer is None:
-    layer = c.layers
+    layer = len(c.model.layers)-1

 varx_index = c.fields.index(args.varx)
 if not plot_vs_activation:
@@ -64,8 +79,25 @@ else:
 varx_label = args.varx
 vary_label = args.vary

-percentilesx = np.percentile(c.x_test[:,varx_index], [1,99])
-percentilesy = np.percentile(c.x_test[:,vary_index], [1,99])
+total_weights = c.w_test*np.array(c.class_weight)[c.y_test.astype(int)]
+
+try:
+    mask_value = c.mask_value
+except AttributeError:
+    mask_value = None
+
+# varx_test = c.x_test[:,varx_index]
+# vary_test = c.x_test[:,vary_index]
+
+# x_not_masked = np.where(varx_test != mask_value)[0]
+# y_not_masked = np.where(vary_test != mask_value)[0]
+
+# percentilesx = weighted_quantile(varx_test[x_not_masked], [0.01, 0.99], sample_weight=total_weights[x_not_masked])
+# percentilesy = weighted_quantile(vary_test[y_not_masked], [0.01, 0.99], sample_weight=total_weights[y_not_masked])
+
+percentilesx = get_ranges(c.x_test, [0.01, 0.99], total_weights, mask_value=mask_value, filter_index=varx_index, max_evts=10000)[0][0]
+percentilesy = get_ranges(c.x_test, [0.01, 0.99], total_weights, mask_value=mask_value, filter_index=vary_index, max_evts=10000)[0][0]
+

 if args.xrange is not None:
    if len(args.xrange) < 3:
@@ -86,28 +118,62 @@ else:
 if args.mode.startswith("mean"):

    if args.mode == "mean_sig":
-        means = get_mean_event(c.x_test, c.y_test, 1)
+        means = get_mean_event(c.x_test, c.y_test, 1, mask_value=mask_value)
    elif args.mode == "mean_bkg":
-        means = get_mean_event(c.x_test, c.y_test, 0)
+        means = get_mean_event(c.x_test, c.y_test, 0, mask_value=mask_value)

-    plot_NN_vs_var_2D(
-        args.output_filename,
-        means=means,
-        varx_index=varx_index,
-        vary_index=vary_index,
-        scorefun=get_single_neuron_function(c.model, layer, neuron, scaler=c.scaler),
-        xmin=varx_range[0], xmax=varx_range[1], nbinsx=varx_range[2],
-        ymin=vary_range[0], ymax=vary_range[1], nbinsy=vary_range[2],
-        varx_label=varx_label, vary_label=vary_label,
-        logscale=args.log, only_pixels=(not args.contour)
-    )
+    print(means)
+
+    if hasattr(c, "get_input_list"):
+        input_transform = lambda x : c.get_input_list(c.transform(x))
+    else:
+        input_transform = c.transform
+
+    if not args.all_neurons:
+        plot_NN_vs_var_2D(
+            args.output_filename,
+            means=means,
+            varx_index=varx_index,
+            vary_index=vary_index,
+            scorefun=get_single_neuron_function(
+                c.model, layer, neuron,
+                input_transform=input_transform
+            ),
+            xmin=varx_range[0], xmax=varx_range[1], nbinsx=varx_range[2],
+            ymin=vary_range[0], ymax=vary_range[1], nbinsy=vary_range[2],
+            varx_label=varx_label, vary_label=vary_label,
+            logscale=args.log, only_pixels=(not args.contour)
+        )
+    else:
+        if hasattr(c, "get_input_list"):
+            transform_function = lambda inp : c.get_input_list(c.scaler.transform(inp))
+        else:
+            transform_function = c.scaler.transform
+        plot_NN_vs_var_2D_all(
+            args.output_filename,
+            means=means,
+            model=c.model,
+            transform_function=transform_function,
+            varx_index=varx_index,
+            vary_index=vary_index,
+            xmin=varx_range[0], xmax=varx_range[1], nbinsx=varx_range[2],
+            ymin=vary_range[0], ymax=vary_range[1], nbinsy=vary_range[2],
+            logz=args.log,
+            plot_last_layer=False,
+        )

 elif args.mode.startswith("profile"):

+    def my_average(x, weights):
+        if weights.sum() <= 0:
+            return np.nan
+        else:
+            return np.average(x, weights=weights)
+
    metric_dict = {
        "mean" : np.mean,
        "max" : np.max,
-        "average" : np.average,
+        "average" : my_average,
    }

    if args.mode == "profile_sig":
@@ -150,10 +216,20 @@ elif args.mode.startswith("hist"):
        weights = c.w_test[c.y_test==class_index]
    else:
        # ranges in which to sample the random events
-        x_test_scaled = c.scaler.transform(c.x_test)
-        ranges = [np.percentile(x_test_scaled[:,var_index], [1,99]) for var_index in range(len(c.fields))]
-        losses, events = get_max_activation_events(c.model, ranges, ntries=args.ntries_actmax, step=args.step_size, layer=layer, neuron=neuron, threshold=args.threshold)
-        events = c.scaler.inverse_transform(events)
+        x_test_scaled = c.transform(c.x_test)
+        ranges = get_ranges(x_test_scaled, [0.01, 0.99], total_weights, mask_value=mask_value)
+        kwargs = {}
+        if hasattr(c, "get_input_list"):
+            kwargs["input_transform"] = c.get_input_list
+            kwargs["input_inverse_transform"] = c.get_input_flat
+        losses, events = get_max_activation_events(c.model, ranges,
+                                                   ntries=args.ntries_actmax,
+                                                   step=args.step_size,
+                                                   layer=layer,
+                                                   neuron=neuron,
+                                                   threshold=args.threshold,
+                                                   **kwargs)
+        events = c.inverse_transform(events)
        valsx = events[:,varx_index]
        if not plot_vs_activation:
            valsy = events[:,vary_index]
@@ -173,10 +249,10 @@ elif args.mode.startswith("hist"):

 elif args.mode.startswith("cond_actmax"):

-    x_test_scaled = c.scaler.transform(c.x_test)
+    x_test_scaled = c.transform(c.x_test)

    # ranges in which to sample the random events
-    ranges = [np.percentile(x_test_scaled[:,var_index], [1,99]) for var_index in range(len(c.fields))]
+    ranges = get_ranges(x_test_scaled, [0.01, 0.99], total_weights, mask_value=mask_value)

    plot_cond_avg_actmax_2D(
        args.output_filename,

--- a/scripts/write_parametrized.py
+++ b/scripts/write_parametrized.py
+#!/usr/bin/env python
+"""
+Write new TTrees with signal parameters as branches. For the
+backgrounds the parameters are generated following the total
+distribution for all signals. The discrete values for the whole ntuple
+of signal parameters are counted, such that correlations between
+signal parameters are taken into account.
+"""
+
+import argparse, re, os
+
+import ROOT
+
+from root_numpy import list_trees
+from root_pandas import read_root
+import numpy as np
+
+if __name__ == "__main__":
+
+    input_filename = "/project/etp4/nhartmann/trees/allTrees_m1.8_NoSys.root"
+    output_filename = "/project/etp4/nhartmann/trees/allTrees_m1.8_NoSys_parametrized.root"
+
+    param_names = ["mg", "mc", "mn"]
+
+    param_match = "GG_oneStep_(.*?)_(.*?)_(.*?)_NoSys"
+
+    output_signal_treename = "GG_oneStep_NoSys"
+
+    bkg_trees = [
+        "diboson_Sherpa221_NoSys",
+        "singletop_NoSys",
+        "ttbar_NoSys",
+        "ttv_NoSys",
+        "wjets_Sherpa221_NoSys",
+        "zjets_Sherpa221_NoSys",
+    ]
+
+    # read in the number of events for each combination of parameters
+    f = ROOT.TFile.Open(input_filename)
+    count_dict = {}
+    for key in f.GetListOfKeys():
+        tree_name = key.GetName()
+        match = re.match(param_match, tree_name)
+        if match is not None:
+            tree = f.Get(tree_name)
+            params = tuple([float(i) for i in match.groups()])
+            if not params in count_dict:
+                count_dict[params] = 0
+            # TODO: might be better to use sum of weights
+            count_dict[params] += tree.GetEntries()
+    f.Close()
+
+    # calculate cumulative sum of counts to sample signal parameters for background from
+    numbers = np.array(count_dict.keys(), dtype=np.float)
+    counts = np.array(count_dict.values(), dtype=np.float)
+    probs = counts/counts.sum()
+    prob_bins = np.cumsum(probs)
+
+    # read and write the rest in chunks
+    if os.path.exists(output_filename):
+        os.remove(output_filename)
+    for tree_name in list_trees(input_filename):
+        match_signal = re.match(param_match, tree_name)
+        if match_signal is not None or tree_name in bkg_trees:
+            print("Writing {}".format(tree_name))
+            nwritten = 0
+            for df in read_root(input_filename, tree_name, chunksize=100000):
+                print("Writing event {}".format(nwritten))
+                if match_signal is None:
+                    rnd = np.random.random(len(df))
+                    rnd_idx = np.digitize(rnd, prob_bins)
+                    param_values = numbers[rnd_idx]
+                    for param_idx, param_name in enumerate(param_names):
+                        df[param_name] = param_values[:,param_idx]
+                    df["training_weight"] = df["eventWeight"]*df["genWeight"]
+                else:
+                    for param_name, param_value in zip(param_names, match_signal.groups()):
+                        df[param_name] = float(param_value)
+                    df["training_weight"] = df["eventWeight"]
+                if match_signal is None:
+                    out_tree_name = tree_name
+                else:
+                    out_tree_name = output_signal_treename
+                df.to_root(output_filename, mode="a", key=out_tree_name)
+                nwritten += len(df)
--- a/test/test_toolkit.py
+++ b/test/test_toolkit.py
+import pytest
+import numpy as np
+import root_numpy
+import pandas as pd
+from sklearn.datasets import make_classification
+from keras.layers import GRU
+
+from KerasROOTClassification import ClassificationProject, ClassificationProjectRNN
+
+
+def create_dataset(path):
+
+    # create example dataset with (low-weighted) noise added
+    X, y = make_classification(n_samples=10000, random_state=1)
+    X2 = np.random.normal(size=20*10000).reshape(-1, 20)
+    y2 = np.concatenate([np.zeros(5000), np.ones(5000)])
+    X = np.concatenate([X, X2])
+    y = np.concatenate([y, y2])
+    w = np.concatenate([np.ones(10000), 0.01*np.ones(10000)])
+
+    # shift and scale randomly (to check if transformation is working)
+    shift = np.random.rand(20)*100
+    scale = np.random.rand(20)*1000
+    X *= scale
+    X += shift
+
+    # write to root files
+    branches = ["var_{}".format(i) for i in range(len(X[0]))]
+    df = pd.DataFrame(X, columns=branches)
+    df["class"] = y
+    df["weight"] = w
+    tree_path_bkg = str(path / "bkg.root")
+    tree_path_sig = str(path / "sig.root")
+    root_numpy.array2root(df[df["class"]==0].to_records(), tree_path_bkg)
+    root_numpy.array2root(df[df["class"]==1].to_records(), tree_path_sig)
+    return branches, tree_path_sig, tree_path_bkg
+
+
+def test_ClassificationProject(tmp_path):
+    branches, tree_path_sig, tree_path_bkg = create_dataset(tmp_path)
+    c = ClassificationProject(
+        str(tmp_path / "project"),
+        bkg_trees = [(tree_path_bkg, "tree")],
+        signal_trees = [(tree_path_sig, "tree")],
+        branches = branches,
+        weight_expr = "weight",
+        identifiers = ["index"],
+        optimizer="Adam",
+        earlystopping_opts=dict(patience=5),
+        dropout=0.5,
+        layers=3,
+        nodes=128,
+    )
+
+    c.train(epochs=200)
+    c.plot_all_inputs()
+    c.plot_loss()
+    assert min(c.history.history["val_loss"]) < 0.18
+
+
+def test_ClassificationProjectRNN(tmp_path):
+    branches, tree_path_sig, tree_path_bkg = create_dataset(tmp_path)
+    c = ClassificationProjectRNN(
+        str(tmp_path / "project"),
+        bkg_trees = [(tree_path_bkg, "tree")],
+        signal_trees = [(tree_path_sig, "tree")],
+        branches = branches,
+        recurrent_field_names=[
+            [
+                ["var_1", "var_2", "var_3"],
+                ["var_4", "var_5", "var_6"]
+            ],
+            [
+                ["var_10", "var_11", "var_12"],
+                ["var_13", "var_14", "var_15"]
+            ],
+        ],
+        weight_expr = "weight",
+        identifiers = ["index"],
+        optimizer="Adam",
+        earlystopping_opts=dict(patience=5),
+        dropout=0.5,
+        layers=3,
+        nodes=128,
+    )
+    assert sum([isinstance(layer, GRU) for layer in c.model.layers]) == 2
+    c.train(epochs=200)
+    c.plot_all_inputs()
+    c.plot_loss()
+    assert min(c.history.history["val_loss"]) < 0.18
--- a/toolkit.py
+++ b/toolkit.py
 #!/usr/bin/env python

+__all__ = ["load_from_dir", "ClassificationProject", "ClassificationProjectDataFrame", "ClassificationProjectRNN"]
+
 from sys import version_info

 if version_info[0] > 2:
@@ -10,13 +12,14 @@ else:

 import os
 import json
-import yaml
 import pickle
 import importlib
 import csv
 import math
 import glob
 import shutil
+import gc
+import random

 import logging
 logger = logging.getLogger("KerasROOTClassification")
@@ -29,31 +32,25 @@ import h5py
 from sklearn.preprocessing import StandardScaler, RobustScaler
 from sklearn.externals import joblib
 from sklearn.metrics import roc_curve, auc
-from keras.models import Sequential
-from keras.layers import Dense, Dropout
-from keras.models import model_from_json
-from keras.callbacks import History, EarlyStopping, CSVLogger, ModelCheckpoint
+from sklearn.utils.extmath import stable_cumsum
+from sklearn.model_selection import KFold
+from keras.models import Sequential, Model, model_from_json
+from keras.layers import Dense, Dropout, Input, Masking, GRU, LSTM, concatenate, SimpleRNN
+from keras.callbacks import History, EarlyStopping, CSVLogger, ModelCheckpoint, TensorBoard
 from keras.optimizers import SGD
+from keras.activations import relu
+import keras.initializers
 import keras.optimizers
+from keras.utils.vis_utils import model_to_dot
+from keras import backend as K
+import tensorflow as tf
 import matplotlib.pyplot as plt

-from .utils import WeightedRobustScaler, weighted_quantile
-
-# configure number of cores
-# this doesn't seem to work, but at least with these settings keras only uses 4 processes
-import tensorflow as tf
-from keras import backend as K
-num_cores = 1
-config = tf.ConfigProto(intra_op_parallelism_threads=num_cores,
-                        inter_op_parallelism_threads=num_cores,
-                        allow_soft_placement=True,
-                        device_count = {'CPU': num_cores})
-session = tf.Session(config=config)
-K.set_session(session)
+from .utils import WeightedRobustScaler, weighted_quantile, poisson_asimov_significance
+from .plotting import save_show

 import ROOT

-
 def byteify(input):
    "From stackoverflow https://stackoverflow.com/a/13105359"
    if isinstance(input, dict):
@@ -69,8 +66,43 @@ def byteify(input):
 if version_info[0] > 2:
    byteify = lambda input : input

+
+def set_session_threads(n_cpu=None):
+    "Set the number of threads based on OMP_NUM_THREADS or the given argument"
+
+    if n_cpu is None:
+        if os.environ.get('OMP_NUM_THREADS'):
+            n_cpu = int(os.environ.get('OMP_NUM_THREADS'))
+        else:
+            return
+
+    # not sure if this is the best configuration ...
+    config = tf.ConfigProto(intra_op_parallelism_threads=n_cpu,
+                            inter_op_parallelism_threads=1,
+                            allow_soft_placement=True,
+                            #log_device_placement=True,
+                            device_count = {'CPU': n_cpu})
+    session = tf.Session(config=config)
+    K.set_session(session)
+
+
+def load_from_dir(path):
+    "Load a project and the options from a directory"
+    try:
+        with open(os.path.join(path, "info.json")) as f:
+            info = json.load(f)
+        project_type = info["project_type"]
+        if project_type == "ClassificationProjectRNN":
+            return ClassificationProjectRNN(path)
+    except (KeyError, IOError):
+        pass
+    return ClassificationProject(path)
+
+
 class ClassificationProject(object):

+    verbose = 1 # verbosity of the fit method
+
    """Simple framework to load data from ROOT TTrees and train Keras
    neural networks for classification according to some global settings.

@@ -90,6 +122,8 @@ class ClassificationProject(object):

    :param branches: list of branch names or expressions to be used as input values for training

+    :param regression_branches: list of branch names to be used as regression targets
+
    :param rename_branches: dictionary that maps branch expressions to names for better readability

    :param weight_expr: expression to weight the events in the loss function
@@ -110,16 +144,26 @@ class ClassificationProject(object):

    :param nodes: list number of nodes in each layer. If only a single number is given, use this number for every layer

-    :param dropout: dropout fraction after each hidden layer. Set to None for no Dropout
+    :param dropout: dropout fraction after each hidden layer. You can also pass a list for dropout fractions for each layer. Set to None for no Dropout.
+
+    :param dropout_input: dropout fraction for the input layer. Set to None for no Dropout.
+
+    :param use_bias: use bias constant for each neuron? (default: True). You can also pass a list of booleans for each layer.

    :param batch_size: size of the training batches

    :param validation_split: split off this fraction of training events for loss evaluation

+    :param kfold_splits: if given, split into this number of of subsets to perform KFold cross validation
+
+    :param kfold_index: index of the subset to leave out for kfold validation
+
    :param activation_function: activation function in the hidden layers

    :param activation_function_output: activation function in the output layer

+    :param leaky_relu_alpha: set this to a non-zero value to use the LeakyReLU variant with a slope in the negative part
+
    :param out_dir: base directory in which the project directories should be stored

    :param scaler_type: sklearn scaler class name to transform the data before training (options: "StandardScaler", "RobustScaler")
@@ -128,6 +172,10 @@ class ClassificationProject(object):

    :param step_bkg: step size when selecting background training events (e.g. 2 means take every second event)

+    :param stop_train: stop after this number of events for reading in training events
+
+    :param stop_test: stop after this number of events for reading in test events
+
    :param optimizer: name of optimizer class in keras.optimizers

    :param optimizer_opts: dictionary of options for the optimizer
@@ -143,16 +191,36 @@ class ClassificationProject(object):
                                 you change the format of the saved model weights it has to be of
                                 the form "weights*.h5"

+    :param use_tensorboard: if True, use the tensorboard callback to write logs for tensorboard
+
+    :param tensorboard_opts: options for the TensorBoard callback
+
    :param balance_dataset: if True, balance the dataset instead of
                            applying class weights. Only a fraction of the overrepresented
                            class will be used in each epoch, but different subsets of the
                            overrepresented class will be used in each epoch.

-    :param random_seed: use this seed value when initialising the model and produce consistent results. Note:
-                        random data is also used for shuffling the training data, so results may vary still. To
-                        produce consistent results, set the numpy random seed before training.
+    :param random_seed: use this seed value when initialising the model and produce consistent results.
+
+    :param shuffle_seed: use this seed for shuffling the training data
+                         the first time. This seed (increased by one) is used again before
+                         training when keras shuffling is used.
+
+    :param loss: loss function name (or list of names in case of regression targets)
+
+    :param loss_weights: (optional) list of weights to weight the individual losses (for multiple targets)

-    :param loss: loss function name
+    :param mask_value: value that is used for non-existent entries (e.g. 4th jet pt in events with 3 jets)
+
+    :param apply_class_weight: apply a weight that scales the events such that sumw(signal) = sumw(background)
+
+    :param normalize_weights: normalize the weights to mean 1
+
+    :param ignore_neg_weights: ignore events with negative weights in training - not recommended! (default: False)
+
+    :param kernel_initializer: weight initializer for the dense layers - if None (default) the keras defaults are used
+
+    :param shuffle: shuffle training data after (and before first) epoch

    """

@@ -188,6 +256,7 @@ class ClassificationProject(object):

    def _init_from_args(self, name,
                        signal_trees, bkg_trees, branches, weight_expr,
+                        regression_branches=None,
                        rename_branches=None,
                        project_dir=None,
                        data_dir=None,
@@ -196,22 +265,40 @@ class ClassificationProject(object):
                        layers=3,
                        nodes=64,
                        dropout=None,
+                        dropout_input=None,
+                        use_bias=True,
                        batch_size=128,
                        validation_split=0.33,
+                        kfold_splits=None,
+                        kfold_index=0,
                        activation_function='relu',
+                        leaky_relu_alpha=None,
                        activation_function_output='sigmoid',
                        scaler_type="WeightedRobustScaler",
                        step_signal=2,
                        step_bkg=2,
+                        stop_train=None,
+                        stop_test=None,
                        optimizer="SGD",
                        optimizer_opts=None,
                        use_earlystopping=True,
                        earlystopping_opts=None,
                        use_modelcheckpoint=True,
                        modelcheckpoint_opts=None,
+                        use_tensorboard=False,
+                        tensorboard_opts=None,
                        random_seed=1234,
+                        shuffle_seed=42,
                        balance_dataset=False,
-                        loss='binary_crossentropy'):
+                        loss='binary_crossentropy',
+                        loss_weights=None,
+                        mask_value=None,
+                        apply_class_weight=True,
+                        normalize_weights=True,
+                        ignore_neg_weights=False,
+                        kernel_initializer=None,
+                        shuffle=True,
+    ):

        self.name = name
        self.signal_trees = signal_trees
@@ -220,6 +307,9 @@ class ClassificationProject(object):
        if rename_branches is None:
            rename_branches = {}
        self.rename_branches = rename_branches
+        if regression_branches is None:
+            regression_branches = []
+        self.regression_branches = regression_branches
        self.weight_expr = weight_expr
        self.selection = selection

@@ -243,16 +333,34 @@ class ClassificationProject(object):
            logger.warning("Number of layers not equal to the given nodes "
                           "per layer - adjusted to " + str(self.layers))
        self.dropout = dropout
+        if not isinstance(self.dropout, list):
+            self.dropout = [self.dropout for i in range(self.layers)]
+        self.dropout_input = dropout_input
+        self.use_bias = use_bias
+        if not isinstance(self.use_bias, list):
+            self.use_bias = [self.use_bias for i in range(self.layers)]
+        if len(self.dropout) != self.layers:
+            raise ValueError("List of dropout fractions has to be of equal size as the number of layers!")
+        if len(self.use_bias) != self.layers:
+            raise ValueError("List biases has to be of equal size as the number of layers!")
        self.batch_size = batch_size
        self.validation_split = validation_split
+        self.kfold_splits = kfold_splits
+        self.kfold_index = kfold_index
        self.activation_function = activation_function
+        self.leaky_relu_alpha = leaky_relu_alpha
+        if self.activation_function == "relu" and self.leaky_relu_alpha:
+            self.activation_function = lambda x : relu(x, alpha=self.leaky_relu_alpha)
        self.activation_function_output = activation_function_output
        self.scaler_type = scaler_type
        self.step_signal = step_signal
        self.step_bkg = step_bkg
+        self.stop_train = stop_train
+        self.stop_test = stop_test
        self.optimizer = optimizer
        self.use_earlystopping = use_earlystopping
        self.use_modelcheckpoint = use_modelcheckpoint
+        self.use_tensorboard = use_tensorboard
        if optimizer_opts is None:
            optimizer_opts = dict()
        self.optimizer_opts = optimizer_opts
@@ -266,9 +374,25 @@ class ClassificationProject(object):
                filepath="weights.h5"
            )
        self.modelcheckpoint_opts = modelcheckpoint_opts
+        self.tensorboard_opts = dict(
+            log_dir=os.path.join(self.project_dir, "tensorboard"),
+        )
+        if tensorboard_opts is not None:
+            self.tensorboard_opts.update(**tensorboard_opts)
        self.random_seed = random_seed
+        self.shuffle_seed = shuffle_seed
        self.balance_dataset = balance_dataset
        self.loss = loss
+        self.loss_weights = loss_weights
+        if self.regression_branches and (not isinstance(self.loss, list)):
+            self.loss = [self.loss]+["mean_squared_error"]*len(self.regression_branches)
+
+        self.mask_value = mask_value
+        self.apply_class_weight = apply_class_weight
+        self.normalize_weights = normalize_weights
+        self.ignore_neg_weights = ignore_neg_weights
+        self.kernel_initializer = kernel_initializer
+        self.shuffle = shuffle

        self.s_train = None
        self.b_train = None
@@ -284,29 +408,31 @@ class ClassificationProject(object):
        self._scores_train = None
        self._scores_test = None

-        # class weighted validation data
-        self._w_validation = None
+        # class weighted training data (divided by mean)
+        self._w_train_tot = None

        self._s_eventlist_train = None
        self._b_eventlist_train = None

        self._scaler = None
+        self._scaler_target = None
        self._class_weight = None
        self._balanced_class_weight = None
        self._model = None
        self._history = None
        self._callbacks_list = []
+        self._train_val_idx = None

        # track the number of epochs this model has been trained
        self.total_epochs = 0

        self.data_loaded = False
-        self.data_transformed = False

        # track if we are currently training
        self.is_training = False

        self._fields = None
+        self._target_fields = None


    @property
@@ -319,6 +445,16 @@ class ClassificationProject(object):
        return self._fields


+    @property
+    def target_fields(self):
+        "Renamed branch expressions for regression targets"
+        if self._target_fields is None:
+            self._target_fields = []
+            for branch_expr in self.regression_branches:
+                self._target_fields.append(self.rename_branches.get(branch_expr, branch_expr))
+        return self._target_fields
+
+
    def rename_fields(self, ar):
        "Rename fields of structured array"
        fields = list(ar.dtype.names)
@@ -330,12 +466,14 @@ class ClassificationProject(object):

    def _load_data(self):

+        self._w_train_tot = None
+
        try:

            # if those don't exist, we need to load them from ROOT trees first
            self._load_from_hdf5(*self.dataset_names_tree)

-        except KeyError:
+        except (KeyError, IOError):

            logger.info("Couldn't load all datasets - reading from ROOT trees")

@@ -347,21 +485,25 @@ class ClassificationProject(object):
            for filename, treename in self.bkg_trees:
                bkg_chain.AddFile(filename, -1, treename)
            self.s_train = tree2array(signal_chain,
-                                      branches=self.branches+[self.weight_expr]+self.identifiers,
+                                      branches=set(self.branches+self.regression_branches+[self.weight_expr]+self.identifiers),
                                      selection=self.selection,
-                                      start=0, step=self.step_signal)
+                                      start=0, step=self.step_signal, stop=self.stop_train)
            self.b_train = tree2array(bkg_chain,
-                                      branches=self.branches+[self.weight_expr]+self.identifiers,
+                                      branches=set(self.branches+self.regression_branches+[self.weight_expr]+self.identifiers),
                                      selection=self.selection,
-                                      start=0, step=self.step_bkg)
+                                      start=0, step=self.step_bkg, stop=self.stop_train)
            self.s_test = tree2array(signal_chain,
-                                     branches=self.branches+[self.weight_expr],
+                                     branches=set(self.branches+self.regression_branches+[self.weight_expr]),
                                     selection=self.selection,
-                                     start=1, step=self.step_signal)
+                                     start=1, step=self.step_signal, stop=self.stop_test)
            self.b_test = tree2array(bkg_chain,
-                                     branches=self.branches+[self.weight_expr],
+                                     branches=set(self.branches+self.regression_branches+[self.weight_expr]),
                                     selection=self.selection,
-                                     start=1, step=self.step_bkg)
+                                     start=1, step=self.step_bkg, stop=self.stop_test)
+
+            if self.ignore_neg_weights:
+                self.s_train = self.s_train[self.s_train[self.weight_expr]>0]
+                self.b_train = self.b_train[self.b_train[self.weight_expr]>0]

            self.rename_fields(self.s_train)
            self.rename_fields(self.b_train)
@@ -372,27 +514,38 @@ class ClassificationProject(object):
            self.b_eventlist_train = self.b_train[self.identifiers].astype(dtype=[(branchName, "u8") for branchName in self.identifiers])
            self._dump_training_list()

-            # now we don't need the identifiers anymore
-            self.s_train = self.s_train[self.fields+[self.weight_expr]]
-            self.b_train = self.b_train[self.fields+[self.weight_expr]]
-
            # create x (input), y (target) and w (weights) arrays
            # the first block will be signals, the second block backgrounds
            self.x_train = rec2array(self.s_train[self.fields])
            self.x_train = np.concatenate((self.x_train, rec2array(self.b_train[self.fields])))
-            self.x_test = rec2array(self.s_test[self.fields])
-            self.x_test = np.concatenate((self.x_test, rec2array(self.b_test[self.fields])))
            self.w_train = self.s_train[self.weight_expr]
            self.w_train = np.concatenate((self.w_train, self.b_train[self.weight_expr]))
+
+            def fill_target(x, s, b):
+                if not self.target_fields:
+                    y = np.empty(len(x), dtype=np.bool)
+                    y[:len(s)] = 1
+                    y[len(s):] = 0
+                else:
+                    y = np.empty((len(x), 1+len(self.target_fields)), dtype=np.float)
+                    y[:len(s),0] = 1
+                    y[len(s):,0] = 0
+                    y[:len(s),1:] = rec2array(s[self.target_fields])
+                    y[len(s):,1:] = rec2array(b[self.target_fields])
+                return y
+
+            self.y_train = fill_target(self.x_train, self.s_train, self.b_train)
+            self.b_train = None
+            self.s_train = None
+
+            self.x_test = rec2array(self.s_test[self.fields])
+            self.x_test = np.concatenate((self.x_test, rec2array(self.b_test[self.fields])))
            self.w_test = self.s_test[self.weight_expr]
            self.w_test = np.concatenate((self.w_test, self.b_test[self.weight_expr]))

-            self.y_train = np.empty(len(self.x_train))
-            self.y_train[:len(self.s_train)] = 1
-            self.y_train[len(self.s_train):] = 0
-            self.y_test = np.empty(len(self.x_test))
-            self.y_test[:len(self.s_test)] = 1
-            self.y_test[len(self.s_test):] = 0
+            self.y_test = fill_target(self.x_test, self.s_test, self.b_test)
+            self.b_test = None
+            self.s_test = None

            self._dump_to_hdf5(*self.dataset_names_tree)

@@ -457,7 +610,7 @@ class ClassificationProject(object):
                    os.symlink(srcpath, filename)
                    logger.info("Created symlink from {} to {}".format(srcpath, filename))
            logger.info("Trying to load {} from {}".format(dataset_name, filename))
-            with h5py.File(filename) as hf:
+            with h5py.File(filename, "r") as hf:
                setattr(self, dataset_name,  hf[dataset_name][:])
        logger.info("Data loaded")

@@ -474,6 +627,8 @@ class ClassificationProject(object):
            if not os.path.dirname(mc.filepath) == self.project_dir:
                mc.filepath = os.path.join(self.project_dir, mc.filepath)
                logger.debug("Prepending project dir to ModelCheckpoint filepath: {}".format(mc.filepath))
+        if self.use_tensorboard:
+            self._callbacks_list.append(TensorBoard(**self.tensorboard_opts))
        self._callbacks_list.append(CSVLogger(os.path.join(self.project_dir, "training.log"), append=True))
        return self._callbacks_list

@@ -495,15 +650,97 @@ class ClassificationProject(object):
                    self._scaler = RobustScaler()
                elif self.scaler_type == "WeightedRobustScaler":
                    self._scaler = WeightedRobustScaler()
-                    scaler_fit_kwargs["weights"] = self.w_train*np.array(self.class_weight)[self.y_train.astype(int)]
+                    scaler_fit_kwargs["weights"] = self.w_train_tot
+                    scaler_fit_kwargs["mask_value"] = self.mask_value
                else:
                    raise ValueError("Scaler type {} unknown".format(self.scaler_type))
                logger.info("Fitting {} to training data".format(self.scaler_type))
+                orig_copy_setting = self.scaler.copy
+                self.scaler.copy = False
                self._scaler.fit(self.x_train, **scaler_fit_kwargs)
+                self.scaler.copy = orig_copy_setting
                joblib.dump(self._scaler, filename)
        return self._scaler


+    @property
+    def scaler_target(self):
+        "same as scaler, but for scaling regression targets"
+        # create the scaler (and fit to training data) if not existent
+        if self._scaler_target is None:
+            filename = os.path.join(self.project_dir, "scaler_target.pkl")
+            try:
+                self._scaler_target = joblib.load(filename)
+                logger.info("Loaded existing scaler from {}".format(filename))
+            except IOError:
+                logger.info("Creating new {} for scaling the targets".format(self.scaler_type))
+                scaler_fit_kwargs = dict()
+                if self.scaler_type == "StandardScaler":
+                    self._scaler_target = StandardScaler()
+                elif self.scaler_type == "RobustScaler":
+                    self._scaler_target = RobustScaler()
+                elif self.scaler_type == "WeightedRobustScaler":
+                    self._scaler_target = WeightedRobustScaler()
+                    scaler_fit_kwargs["weights"] = self.w_train_tot
+                else:
+                    raise ValueError("Scaler type {} unknown".format(self.scaler_type))
+                logger.info("Fitting {} to training data".format(self.scaler_type))
+                orig_copy_setting = self.scaler.copy
+                self.scaler.copy = False
+                self._scaler_target.fit(self.y_train, **scaler_fit_kwargs)
+                # i don't want to scale the classification target here
+                self._scaler_target.center_[0] = 0.
+                self._scaler_target.scale_[0] = 1.
+                self.scaler.copy = orig_copy_setting
+                joblib.dump(self._scaler_target, filename)
+        return self._scaler_target
+
+
+    def _batch_transform(self, x, fn, batch_size):
+        "Transform array in batches, temporarily setting mask_values to nan"
+        transformed = np.empty(x.shape, dtype=x.dtype)
+        for start in range(0, len(x), batch_size):
+            stop = start+batch_size
+            x_batch = np.array(x[start:stop]) # copy
+            x_batch[x_batch == self.mask_value] = np.nan
+            x_batch = fn(x_batch)
+            x_batch[np.isnan(x_batch)] = self.mask_value
+            transformed[start:stop] = x_batch
+        return transformed
+
+
+    def transform(self, x, batch_size=10000):
+        if self.mask_value is not None:
+            return self._batch_transform(x, self.scaler.transform, batch_size)
+        else:
+            return self.scaler.transform(x)
+
+
+    def inverse_transform(self, x, batch_size=10000):
+        if self.mask_value is not None:
+            return self._batch_transform(x, self.scaler.inverse_transform, batch_size)
+        else:
+            return self.scaler.inverse_transform(x)
+
+
+    def transform_target(self, y, batch_size=10000):
+        if not self.target_fields:
+            return y
+        if self.mask_value is not None:
+            return self._batch_transform(y, self.scaler_target.transform, batch_size)
+        else:
+            return self.scaler_target.transform(y)
+
+
+    def inverse_transform_target(self, y, batch_size=10000):
+        if not self.target_fields:
+            return y
+        if self.mask_value is not None:
+            return self._batch_transform(y, self.scaler_target.inverse_transform, batch_size)
+        else:
+            return self.scaler_target.inverse_transform(y)
+
+
    @property
    def history(self):
        params_file = os.path.join(self.project_dir, "history_params.json")
@@ -535,19 +772,6 @@ class ClassificationProject(object):
            json.dump(self.history.history, of)


-    def _transform_data(self):
-        if not self.data_transformed:
-            # todo: what to do about the outliers? Where do they come from?
-            logger.debug("training data before transformation: {}".format(self.x_train))
-            logger.debug("minimum values: {}".format([np.min(self.x_train[:,i]) for i in range(self.x_train.shape[1])]))
-            logger.debug("maximum values: {}".format([np.max(self.x_train[:,i]) for i in range(self.x_train.shape[1])]))
-            self.x_train = self.scaler.transform(self.x_train)
-            logger.debug("training data after transformation: {}".format(self.x_train))
-            self.x_test = self.scaler.transform(self.x_test)
-            self.data_transformed = True
-            logger.info("Training and test data transformed")
-
-
    def _read_info(self, key, default):
        filename = os.path.join(self.project_dir, "info.json")
        if not os.path.exists(filename):
@@ -560,6 +784,9 @@ class ClassificationProject(object):

    def _write_info(self, key, value):
        filename = os.path.join(self.project_dir, "info.json")
+        if not os.path.exists(filename):
+            with open(filename, "w") as of:
+                json.dump({}, of)
        with open(filename) as f:
            info = json.load(f)
        info[key] = value
@@ -586,52 +813,94 @@ class ClassificationProject(object):

        if self._model is None:

-            self._model = Sequential()
-
-            # first hidden layer
-            self._model.add(Dense(self.nodes[0], input_dim=len(self.fields), activation=self.activation_function))
-            # the other hidden layers
-            for node_count, layer_number in zip(self.nodes[1:], range(self.layers-1)):
-                self._model.add(Dense(node_count, activation=self.activation_function))
-                if self.dropout is not None:
-                    self._model.add(Dropout(rate=self.dropout))
-            # last layer is one neuron (binary classification)
-            self._model.add(Dense(1, activation=self.activation_function_output))
-            logger.info("Using {}(**{}) as Optimizer".format(self.optimizer, self.optimizer_opts))
-            Optimizer = getattr(keras.optimizers, self.optimizer)
-            optimizer = Optimizer(**self.optimizer_opts)
-            logger.info("Compile model")
-            rn_state = np.random.get_state()
-            np.random.seed(self.random_seed)
-            self._model.compile(optimizer=optimizer,
-                                loss=self.loss,
-                                metrics=['accuracy'])
-            np.random.set_state(rn_state)
-            if os.path.exists(os.path.join(self.project_dir, "weights.h5")):
-                if self.is_training:
-                    continue_training = self.query_yn("Found previously trained weights - continue training (choosing N will restart)? (Y/N) ")
-                else:
-                    continue_training = True
-                if continue_training:
-                    self.model.load_weights(os.path.join(self.project_dir, "weights.h5"))
-                    logger.info("Found and loaded previously trained weights")
-                else:
-                    logger.info("Starting completely new model")
+
+            # input
+            input_layer = Input((len(self.fields),))
+
+            # optional dropout on inputs
+            if self.dropout_input is None:
+                hidden_layer = input_layer
+            else:
+                hidden_layer = Dropout(rate=self.dropout_input)(input_layer)
+
+            # densely connected hidden layers
+            for node_count, dropout_fraction, use_bias in zip(
+                    self.nodes,
+                    self.dropout,
+                    self.use_bias,
+            ):
+                extra_opts = dict()
+                if self.kernel_initializer is not None:
+                    extra_opts["kernel_initializer"] = getattr(keras.initializers, self.kernel_initializer)()
+                hidden_layer = Dense(node_count, activation=self.activation_function, use_bias=use_bias, **extra_opts)(hidden_layer)
+                if (dropout_fraction is not None) and (dropout_fraction > 0):
+                    hidden_layer = Dropout(rate=dropout_fraction)(hidden_layer)
+
+            # optional regression targets
+            extra_targets = []
+            for target_field in self.target_fields:
+                extra_target = Dense(1, activation="linear", name="target_{}".format(target_field))(hidden_layer)
+                extra_targets.append(extra_target)
+
+            if not self.target_fields:
+                # one output node for binary classification
+                output_layer = Dense(1, activation=self.activation_function_output)(hidden_layer)
+                outputs = [output_layer]
            else:
-                logger.info("No weights found, starting completely new model")
+                # add another hidden layer on top of the regression targets and previous hidden layers
+                merge = concatenate([hidden_layer]+extra_targets)
+                hidden_layer2 = Dense(64, activation=self.activation_function)(merge)
+                output_class = Dense(1, activation=self.activation_function_output)(hidden_layer2)
+                outputs = [output_class]+extra_targets

-            # dump to json for documentation
-            with open(os.path.join(self.project_dir, "model.json"), "w") as of:
-                of.write(self._model.to_json())
+            self._model = Model(inputs=[input_layer], outputs=outputs)
+            self._compile_or_load_model()

        return self._model


+    def _compile_or_load_model(self):
+        logger.info("Using {}(**{}) as Optimizer".format(self.optimizer, self.optimizer_opts))
+        Optimizer = getattr(keras.optimizers, self.optimizer)
+        optimizer = Optimizer(**self.optimizer_opts)
+        logger.info("Compile model")
+        rn_state = np.random.get_state()
+        np.random.seed(self.random_seed)
+        self._model.compile(optimizer=optimizer,
+                            loss=self.loss,
+                            loss_weights=self.loss_weights,
+                            weighted_metrics=['accuracy']
+        )
+        np.random.set_state(rn_state)
+
+        if os.path.exists(os.path.join(self.project_dir, "weights.h5")):
+            if self.is_training:
+                continue_training = self.query_yn("Found previously trained weights - "
+                                                  "continue training (choosing N will restart)? (Y/N) ")
+            else:
+                continue_training = True
+            if continue_training:
+                self.model.load_weights(os.path.join(self.project_dir, "weights.h5"))
+                logger.info("Found and loaded previously trained weights")
+            else:
+                logger.info("Starting completely new model")
+        else:
+            logger.info("No weights found, starting completely new model")
+
+        # dump to json for documentation
+        with open(os.path.join(self.project_dir, "model.json"), "w") as of:
+            of.write(self._model.to_json())
+
+        # plot model
+        with open(os.path.join(self.project_dir, "model.svg"), "wb") as of:
+            of.write(model_to_dot(self._model, show_shapes=True).create("dot", format="svg"))
+
+
    @property
    def class_weight(self):
        if self._class_weight is None:
-            sumw_bkg = np.sum(self.w_train[self.y_train == 0])
-            sumw_sig = np.sum(self.w_train[self.y_train == 1])
+            sumw_bkg = np.sum(self.w_train[self.l_train == 0])
+            sumw_sig = np.sum(self.w_train[self.l_train == 1])
            self._class_weight = [(sumw_sig+sumw_bkg)/(2*sumw_bkg), (sumw_sig+sumw_bkg)/(2*sumw_sig)]
            logger.debug("Calculated class_weight: {}".format(self._class_weight))
        return self._class_weight
@@ -646,11 +915,11 @@ class ClassificationProject(object):
        event with class weights
        """
        if self._balanced_class_weight is None:
-            sumw_bkg = np.sum(self.w_train[self.y_train == 0])
-            sumw_sig = np.sum(self.w_train[self.y_train == 1])
+            sumw_bkg = np.sum(self.w_train[self.l_train == 0])
+            sumw_sig = np.sum(self.w_train[self.l_train == 1])
            # use sumw *per event* in this case
-            sumw_bkg /= len(self.w_train[self.y_train == 0])
-            sumw_sig /= len(self.w_train[self.y_train == 1])
+            sumw_bkg /= len(self.w_train[self.l_train == 0])
+            sumw_sig /= len(self.w_train[self.l_train == 1])
            self._balanced_class_weight = [(sumw_sig+sumw_bkg)/(2*sumw_bkg), (sumw_sig+sumw_bkg)/(2*sumw_sig)]
            logger.debug("Calculated balanced_class_weight: {}".format(self._balanced_class_weight))
        return self._balanced_class_weight
@@ -661,73 +930,167 @@ class ClassificationProject(object):

        if reload:
            self.data_loaded = False
-            self.data_transformed = False

        if not self.data_loaded:
            self._load_data()

-        if not self.data_transformed:
-            self._transform_data()

+    @property
+    def l_train(self):
+        "labels (in case y contains regression targets)"
+        if not self.target_fields:
+            return self.y_train
+        else:
+            return self.y_train[:,0]

-    def shuffle_training_data(self):
-        rn_state = np.random.get_state()
-        np.random.shuffle(self.x_train)
-        np.random.set_state(rn_state)
-        np.random.shuffle(self.y_train)
-        np.random.set_state(rn_state)
-        np.random.shuffle(self.w_train)
-        if self._scores_train is not None:
-            logger.info("Shuffling scores, since they are also there")
-            np.random.set_state(rn_state)
-            np.random.shuffle(self._scores_train)
+
+    @property
+    def l_test(self):
+        "labels (in case y contains regression targets)"
+        if not self.target_fields:
+            return self.y_test
+        else:
+            return self.y_test[:,0]


    @property
-    def w_validation(self):
-        "class weighted validation data weights"
-        split_index = int((1-self.validation_split)*len(self.x_train))
-        if self._w_validation is None:
-            self._w_validation = np.array(self.w_train[split_index:])
-            self._w_validation[self.y_train[split_index:]==0] *= self.class_weight[0]
-            self._w_validation[self.y_train[split_index:]==1] *= self.class_weight[1]
-        return self._w_validation
+    def w_train_tot(self):
+        "(sample weight * class weight), divided by mean"
+        if not self.balance_dataset:
+            class_weight = self.class_weight
+        else:
+            class_weight = self.balanced_class_weight
+        if not self.data_loaded:
+            raise ValueError("Data not loaded! can't calculate total weight")
+        if self._w_train_tot is None:
+            if self.apply_class_weight:
+                self._w_train_tot = self.w_train*np.array(class_weight)[self.l_train.astype(int)]
+            else:
+                self._w_train_tot = np.array(self.w_train)
+            if self.normalize_weights:
+                self._w_train_tot /= np.mean(self._w_train_tot)
+        return self._w_train_tot


    @property
-    def class_weighted_validation_data(self):
-        "class weighted validation data. Attention: Shuffle training data before using this!"
-        split_index = int((1-self.validation_split)*len(self.x_train))
-        return self.x_train[split_index:], self.y_train[split_index:], self.w_validation
+    def validation_data(self):
+        "(Transformed) validation data for loss evaluation"
+        idx = self.train_val_idx[1]
+        x_val, y_val, w_val = self.x_train[idx], self.y_train[idx], self.w_train_tot[idx]
+        x_val_input = self.get_input_list(self.transform(x_val))
+        y_val_output = self.get_output_list(self.transform_target(y_val))
+        w_val_list = self.get_weight_list(w_val)
+        return x_val_input, y_val_output, w_val_list


    @property
    def training_data(self):
-        "training data with validation data split off. Attention: Shuffle training data before using this!"
-        split_index = int((1-self.validation_split)*len(self.x_train))
-        return self.x_train[:split_index], self.y_train[:split_index], self.w_train[:split_index]
+        "(Transformed) Training data with validation data split off"
+        idx = self.train_val_idx[0]
+        x_train, y_train, w_train =  self.x_train[idx], self.y_train[idx], self.w_train_tot[idx]
+        x_train_input = self.get_input_list(self.transform(x_train))
+        y_train_output = self.get_output_list(self.transform_target(y_train))
+        w_train_list = self.get_weight_list(w_train)
+        return x_train_input, y_train_output, w_train_list


-    def yield_batch(self, class_label):
+    @property
+    def train_val_idx(self):
+        if self._train_val_idx is None:
+            if self.kfold_splits is not None:
+                kfold = KFold(self.kfold_splits, shuffle=self.shuffle, random_state=self.shuffle_seed)
+                for i, train_val_idx in enumerate(kfold.split(self.x_train)):
+                    if i == self.kfold_index:
+                        self._train_val_idx = train_val_idx
+                        break
+                else:
+                    raise IndexError("Index {} out of range for kfold (requested {} splits)".format(self.kfold_index, self.kfold_splits))
+            else:
+                split_index = int((1-self.validation_split)*len(self.x_train))
+                np.random.seed(self.shuffle_seed)
+                if self.shuffle:
+                    shuffled_idx = np.random.permutation(len(self.x_train))
+                else:
+                    shuffled_idx = np.arange(len(self.x_train))
+                self._train_val_idx = (shuffled_idx[:split_index], shuffled_idx[split_index:])
+        return self._train_val_idx
+
+
+    @property
+    def steps_per_epoch(self):
+        return int(float(len(self.train_val_idx[0]))/float(self.batch_size))
+
+
+    def get_input_list(self, x):
+        "For the standard Dense models with single input, this does nothing"
+        return x
+
+
+    def get_output_list(self, y):
+        "Split target vector column wise in case of regression targets"
+        if not self.target_fields:
+            return y
+        else:
+            return np.hsplit(y, len(self.target_fields)+1)
+
+
+    def get_weight_list(self, w):
+        "Repeat weight n times for regression targets"
+        if not self.target_fields:
+            return w
+        else:
+            return [w]*(len(self.target_fields)+1)
+
+
+    def yield_batch(self):
+        "Batch generator - optionally shuffle the indices after each epoch"
+        x_train, y_train, w_train = self.x_train, self.y_train, self.w_train_tot
+        train_idx = list(self.train_val_idx[0])
+        np.random.seed(self.shuffle_seed+1)
+        logger.info("Generating training batches from {} signal and {} background events"
+                    .format(len(np.where(self.l_train[train_idx]==1)[0]),
+                            len(np.where(self.l_train[train_idx]==0)[0])))
+        while True:
+            if self.shuffle:
+                shuffled_idx = np.random.permutation(train_idx)
+            else:
+                shuffled_idx = train_idx
+            for start in range(0, len(shuffled_idx), int(self.batch_size)):
+                x_batch = x_train[shuffled_idx[start:start+int(self.batch_size)]]
+                y_batch = y_train[shuffled_idx[start:start+int(self.batch_size)]]
+                w_batch = w_train[shuffled_idx[start:start+int(self.batch_size)]]
+                x_input = self.get_input_list(self.transform(x_batch))
+                y_output = self.get_output_list(self.transform_target(y_batch))
+                w_list = self.get_weight_list(w_batch)
+                yield (x_input, y_output, w_list)
+
+
+    def yield_single_class_batch(self, class_label):
+        """
+        Generate batches of half batch size, containing only entries for the given class label.
+        The weights are multiplied by balanced_class_weight.
+        """
+        x_train, y_train, w_train = self.training_data
+        l_train = y_train[:,0] if self.target_fields else y_train
+        class_idx = np.where(l_train==class_label)[0]
        while True:
-            x_train, y_train, w_train = self.training_data
-            # shuffle the entries for this class label
-            rn_state = np.random.get_state()
-            x_train[y_train==class_label] = np.random.permutation(x_train[y_train==class_label])
-            np.random.set_state(rn_state)
-            w_train[y_train==class_label] = np.random.permutation(w_train[y_train==class_label])
+            # shuffle the indices for this class label
+            if self.shuffle:
+                shuffled_idx = np.random.permutation(class_idx)
+            else:
+                shuffled_idx = class_idx
            # yield them batch wise
-            for start in range(0, len(x_train[y_train==class_label]), int(self.batch_size/2)):
-                yield (x_train[y_train==class_label][start:start+int(self.batch_size/2)],
-                       y_train[y_train==class_label][start:start+int(self.batch_size/2)],
-                       w_train[y_train==class_label][start:start+int(self.batch_size/2)]*self.balanced_class_weight[class_label])
-            # restart
+            for start in range(0, len(shuffled_idx), int(self.batch_size/2)):
+                yield (x_train[shuffled_idx[start:start+int(self.batch_size/2)]],
+                       y_train[shuffled_idx[start:start+int(self.batch_size/2)]],
+                       w_train[shuffled_idx[start:start+int(self.batch_size/2)]])


    def yield_balanced_batch(self):
        "generate batches with equal amounts of both classes"
        logcounter = 0
-        for batch_0, batch_1 in izip(self.yield_batch(0), self.yield_batch(1)):
+        for batch_0, batch_1 in izip(self.yield_single_class_batch(0),
+                                     self.yield_single_class_batch(1)):
            if logcounter == 10:
                logger.debug("\rSumw sig*balanced_class_weight[1]: {}".format(np.sum(batch_1[2])))
                logger.debug("\rSumw bkg*balanced_class_weight[0]: {}".format(np.sum(batch_0[2])))
@@ -738,49 +1101,49 @@ class ClassificationProject(object):
                   np.concatenate((batch_0[2], batch_1[2])))


-    def train(self, epochs=10):
+    def train(self, epochs=10, skip_checkpoint=False):

        self.load()

-        for branch_index, branch in enumerate(self.fields):
-            self.plot_input(branch_index)
-
        self.total_epochs = self._read_info("epochs", 0)

+        set_session_threads()
+
        logger.info("Train model")
        if not self.balance_dataset:
            try:
-                self.shuffle_training_data()
                self.is_training = True
-                self.model.fit(self.x_train,
-                               # the reshape might be unnescessary here
-                               self.y_train.reshape(-1, 1),
-                               epochs=epochs,
-                               validation_split = self.validation_split,
-                               class_weight=self.class_weight,
-                               sample_weight=self.w_train,
-                               shuffle=True,
-                               batch_size=self.batch_size,
-                               callbacks=self.callbacks_list)
+                self.model.fit_generator(self.yield_batch(),
+                                         steps_per_epoch=self.steps_per_epoch,
+                                         epochs=epochs,
+                                         validation_data=self.validation_data,
+                                         callbacks=self.callbacks_list,
+                                         verbose=self.verbose)
                self.is_training = False
            except KeyboardInterrupt:
                logger.info("Interrupt training - continue with rest")
        else:
            try:
-                self.shuffle_training_data() # needed here too, in order to get correct validation data
                self.is_training = True
-                labels, label_counts = np.unique(self.y_train, return_counts=True)
+                labels, label_counts = np.unique(self.l_train, return_counts=True)
                logger.info("Training on balanced batches")
                # note: the batches have balanced_class_weight already applied
                self.model.fit_generator(self.yield_balanced_batch(),
                                         steps_per_epoch=int(min(label_counts)/self.batch_size),
                                         epochs=epochs,
-                                         validation_data=self.class_weighted_validation_data,
-                                         callbacks=self.callbacks_list)
+                                         validation_data=self.validation_data,
+                                         callbacks=self.callbacks_list,
+                                         verbose=self.verbose)
                self.is_training = False
            except KeyboardInterrupt:
                logger.info("Interrupt training - continue with rest")

+        if not skip_checkpoint:
+            self.checkpoint_model()
+
+
+    def checkpoint_model(self):
+
        logger.info("Save history")
        self._dump_history()

@@ -795,34 +1158,80 @@ class ClassificationProject(object):
            logger.info("Reloading weights file since we are using model checkpoint!")
            self.model.load_weights(os.path.join(self.project_dir, "weights.h5"))

-        self.total_epochs += epochs
+        self.total_epochs += self.history.epoch[-1]+1
        self._write_info("epochs", self.total_epochs)

-        logger.info("Reloading (and re-transforming) unshuffled training data")
-        self.load(reload=True)

-        logger.info("Create/Update scores for ROC curve")
-        self.scores_test = self.model.predict(self.x_test)
-        self.scores_train = self.model.predict(self.x_train)
+    def evaluate_train_test(self, do_train=True, do_test=True, batch_size=10000, mode=None):
+        "Calculate scores for training and test sample"
+
+        if mode is not None:
+            self._write_info("scores_mode", mode)
+
+        def eval_score(data_name):
+            logger.info("Create/Update scores for {} sample".format(data_name))
+            n_events = len(getattr(self, "x_"+data_name))
+            setattr(self, "scores_"+data_name, np.empty(n_events))
+            for start in range(0, n_events, batch_size):
+                stop = start+batch_size
+                outputs =  self.predict(
+                    self.get_input_list(self.transform(getattr(self, "x_"+data_name)[start:stop])),
+                    mode=mode
+                )
+                if not self.target_fields:
+                    scores_batch = outputs.reshape(-1)
+                else:
+                    scores_batch = outputs[0].reshape(-1)
+                getattr(self, "scores_"+data_name)[start:stop] = scores_batch
+            self._dump_to_hdf5("scores_"+data_name)

-        self._dump_to_hdf5("scores_train", "scores_test")
+        if do_test:
+            eval_score("test")
+        if do_train:
+            eval_score("train")

-        logger.info("Creating all validation plots")
-        self.plot_all()

+    def predict(self, x, mode=None):
+        """
+        Calculate the scores for a (transformed) array of input values.
+        If the array is not transformed, use `evaluate` instead
+        """
+        if mode is None:
+            # normal output - after activation function output layer
+            return self.model.predict(x)
+        elif mode == "skip_activation":
+            # output before applying activation function
+            # (after weighted sum + bias of last hidden layer)
+            if isinstance(self.model.input, list):
+                feed_dict={tuple(self.model.input) : x}
+            else:
+                feed_dict={self.model.input : x}
+            return K.get_session().run(
+                self.model.output.op.inputs[0],
+                feed_dict=feed_dict
+            )
+        else:
+            raise ValueError("Unknown mode {}".format(mode))


-    def evaluate(self, x_eval):
+    def evaluate(self, x_eval, mode=None):
+        """
+        Calculate the scores for an array of input values.
+        All nescessary transformations are applied.
+        """
        logger.debug("Evaluate score for {}".format(x_eval))
-        x_eval = self.scaler.transform(x_eval)
+        x_eval = self.transform(x_eval)
        logger.debug("Evaluate for transformed array: {}".format(x_eval))
-        return self.model.predict(x_eval)
+        return self.predict(x_eval, mode=mode)


    def write_friend_tree(self, score_name,
                          source_filename, source_treename,
                          target_filename, target_treename,
-                          batch_size=100000):
+                          batch_size=100000,
+                          score_mode=None,
+                          fixed_params=None):
+        "TODO: doesn't work for regression targets"
        f = ROOT.TFile.Open(source_filename)
        tree = f.Get(source_treename)
        entries = tree.GetEntries()
@@ -833,10 +1242,13 @@ class ClassificationProject(object):
            logger.info("Evaluating score for entry {}/{}".format(start, entries))
            logger.debug("Loading next batch")
            x_from_tree = tree2array(tree,
-                                     branches=self.fields+self.identifiers,
+                                     branches=self.branches+self.identifiers,
                                     start=start, stop=start+batch_size)
-            x_eval = rec2array(x_from_tree[self.fields])
-
+            # for parametrized classifiers
+            if fixed_params is not None:
+                for param_name, value in fixed_params.items():
+                    x_from_tree[param_name] = value
+            x_eval = rec2array(x_from_tree[self.branches])
            if len(self.identifiers) > 0:
                # create list of booleans that indicate which events where used for training
                df_identifiers = pd.DataFrame(x_from_tree[self.identifiers])
@@ -848,7 +1260,7 @@ class ClassificationProject(object):
                is_train = np.zeros(len(x_eval))

            # join scores and is_train array
-            scores = self.evaluate(x_eval).reshape(-1)
+            scores = self.evaluate(x_eval, mode=score_mode).reshape(-1)
            friend_df = pd.DataFrame(np.array(scores, dtype=[(score_name, np.float64)]))
            friend_df[score_name+"_is_train"] = is_train
            friend_tree = friend_df.to_records()[[score_name, score_name+"_is_train"]]
@@ -890,101 +1302,214 @@ class ClassificationProject(object):
        return centers, hist, errors


-    def plot_input(self, var_index):
-        "plot a single input variable"
+    def plot_input(self, var_index, ax=None, from_training_batches=False, max_n_batches=None):
+        """
+        plot a single input variable as a histogram (signal vs background)
+
+        :param from_training_batches: use data from training batch generator
+        :param max_n_batches: if training batch generator is used, just use
+                              this number of batches (otherwise steps_per_epoch is used)
+        """
        branch = self.fields[var_index]
-        fig, ax = plt.subplots()
-        bkg = self.x_train[:,var_index][self.y_train == 0]
-        sig = self.x_train[:,var_index][self.y_train == 1]
-        bkg_weights = self.w_train[self.y_train == 0]
-        sig_weights = self.w_train[self.y_train == 1]
+        if ax is None:
+            fig, ax = plt.subplots()
+        else:
+            fig = None
+
+        if not from_training_batches:
+            bkg = self.x_train[:,var_index][self.l_train == 0]
+            sig = self.x_train[:,var_index][self.l_train == 1]
+            bkg_weights = self.w_train_tot[self.l_train == 0]
+            sig_weights = self.w_train_tot[self.l_train == 1]
+        else:
+            bkg = None
+            sig = None
+            bkg_weights = None
+            sig_weights = None
+            if max_n_batches is not None:
+                n_batches = max_n_batches
+            else:
+                n_batches = self.steps_per_epoch
+            for i_batch, (x, y, w) in enumerate(self.yield_batch()):
+                if i_batch > n_batches:
+                    break
+                if self.target_fields:
+                    y = y[0]
+                try:
+                    x = self.get_input_flat(x)
+                except NameError:
+                    pass
+                bkg_batch = x[:,var_index][y==0]
+                sig_batch = x[:,var_index][y==1]
+                bkg_weights_batch = w[y==0]
+                sig_weights_batch = w[y==1]
+                if bkg is None:
+                    bkg = bkg_batch
+                    sig = sig_batch
+                    bkg_weights = bkg_weights_batch
+                    sig_weights = sig_weights_batch
+                else:
+                    bkg = np.concatenate([bkg, bkg_batch])
+                    sig = np.concatenate([sig, sig_batch])
+                    bkg_weights = np.concatenate([bkg_weights, bkg_weights_batch])
+                    sig_weights = np.concatenate([sig_weights, sig_weights_batch])
+
+        if hasattr(self, "mask_value"):
+            bkg_not_masked = np.where(bkg != self.mask_value)[0]
+            sig_not_masked = np.where(sig != self.mask_value)[0]
+            bkg = bkg[bkg_not_masked]
+            sig = sig[sig_not_masked]
+            bkg_weights = bkg_weights[bkg_not_masked]
+            sig_weights = sig_weights[sig_not_masked]
+
+        if self.balance_dataset:
+            if len(sig) < len(bkg):
+                logger.warning("Plotting only up to {} bkg events, since we use balance_dataset".format(len(sig)))
+                bkg = bkg[0:len(sig)]
+                bkg_weights = bkg_weights[0:len(sig)]
+            else:
+                logger.warning("Plotting only up to {} sig events, since we use balance_dataset".format(len(bkg)))
+                sig = sig[0:len(bkg)]
+                sig_weights = sig_weights[0:len(bkg)]

        logger.debug("Plotting bkg (min={}, max={}) from {}".format(np.min(bkg), np.max(bkg), bkg))
        logger.debug("Plotting sig (min={}, max={}) from {}".format(np.min(sig), np.max(sig), sig))

        # calculate percentiles to get a heuristic for the range to be plotted
-        # (should in principle also be done with weights, but for now do it unweighted)
-        # range_sig = np.percentile(sig, [1, 99])
-        # range_bkg = np.percentile(sig, [1, 99])
-        # plot_range = (min(range_sig[0], range_bkg[0]), max(range_sig[1], range_sig[1]))
-        plot_range = weighted_quantile(self.x_train[:,var_index], [0.1, 0.99], sample_weight=self.w_train*np.array(self.class_weight)[self.y_train.astype(int)])
-
+        x_total = np.concatenate([bkg, sig])
+        w_total = np.concatenate([bkg_weights, sig_weights])
+        plot_range = weighted_quantile(
+            x_total,
+            [0.01, 0.99],
+            sample_weight=w_total,
+        )
        logger.debug("Calculated range based on percentiles: {}".format(plot_range))

+        bins = 50
+
+        # check if we have a distribution of integer numbers (e.g. njet or something categorical)
+        # in that case we want to have a bin for each number
+        if (x_total == x_total.astype(int)).all():
+            plot_range = (math.floor(plot_range[0])-0.5, math.ceil(plot_range[1])+0.5)
+            bins = int(plot_range[1]-plot_range[0])
+
        try:
-            centers_sig, hist_sig, _ = self.get_bin_centered_hist(sig, scale_factor=self.class_weight[1], bins=50, range=plot_range, weights=sig_weights)
-            centers_bkg, hist_bkg, _ = self.get_bin_centered_hist(bkg, scale_factor=self.class_weight[0], bins=50, range=plot_range, weights=bkg_weights)
+            centers_sig, hist_sig, _ = self.get_bin_centered_hist(sig, bins=bins, range=plot_range, weights=sig_weights)
+            centers_bkg, hist_bkg, _ = self.get_bin_centered_hist(bkg, bins=bins, range=plot_range, weights=bkg_weights)
        except ValueError:
            # weird, probably not always working workaround for a numpy bug
            plot_range = (float("{:.3f}".format(plot_range[0])), float("{:.3f}".format(plot_range[1])))
            logger.warn("Got a value error during plotting, maybe this is due to a numpy bug - changing range to {}".format(plot_range))
-            centers_sig, hist_sig, _ = self.get_bin_centered_hist(sig, scale_factor=self.class_weight[1], bins=50, range=plot_range, weights=sig_weights)
-            centers_bkg, hist_bkg, _ = self.get_bin_centered_hist(bkg, scale_factor=self.class_weight[0], bins=50, range=plot_range, weights=bkg_weights)
+            centers_sig, hist_sig, _ = self.get_bin_centered_hist(sig, bins=bins, range=plot_range, weights=sig_weights)
+            centers_bkg, hist_bkg, _ = self.get_bin_centered_hist(bkg, bins=bins, range=plot_range, weights=bkg_weights)

-        width = centers_sig[1]-centers_sig[0]
+        if bins > 1:
+            width = centers_sig[1]-centers_sig[0]
+        else:
+            width = 1.
        ax.bar(centers_bkg, hist_bkg, color="b", alpha=0.5, width=width)
        ax.bar(centers_sig, hist_sig, color="r", alpha=0.5, width=width)

-        ax.set_xlabel(branch+" (transformed)")
-        plot_dir = os.path.join(self.project_dir, "plots")
-        if not os.path.exists(plot_dir):
-            os.mkdir(plot_dir)
-        fig.savefig(os.path.join(plot_dir, "var_{}.pdf".format(var_index)))
-        plt.close(fig)
+        label = branch
+        ax.set_xlabel(label)
+        if fig is not None:
+            plot_dir = os.path.join(self.project_dir, "plots")
+            if not os.path.exists(plot_dir):
+                os.mkdir(plot_dir)
+            return save_show(plt, fig, os.path.join(plot_dir, "var_{}.pdf".format(var_index)))
+

+    def plot_all_inputs(self, **kwargs):
+        nrows = math.ceil(math.sqrt(len(self.fields)))
+        fig, axes = plt.subplots(nrows=int(nrows), ncols=int(nrows),
+                                 figsize=(3*nrows, 3*nrows),
+                                 gridspec_kw=dict(wspace=0.4, hspace=0.4))
+        for i in range(len(self.fields)):
+            self.plot_input(i, ax=axes.reshape(-1)[i], **kwargs)
+        return save_show(plt, fig, os.path.join(self.project_dir, "all_inputs.pdf"))

-    def plot_weights(self):
+
+    def plot_weights(self, bins=100, range=None):
        fig, ax = plt.subplots()
-        bkg = self.w_train[self.y_train == 0]
-        sig = self.w_train[self.y_train == 1]
-        ax.hist(bkg, bins=100, color="b", alpha=0.5)
-        fig.savefig(os.path.join(self.project_dir, "eventweights_bkg.pdf"))
-        plt.close(fig)
+        bkg = self.w_train_tot[self.l_train == 0]
+        sig = self.w_train_tot[self.l_train == 1]
+        ax.hist(bkg, bins=bins, range=range, color="b", alpha=0.5)
+        ax.set_yscale("log")
+        save_show(plt, fig, os.path.join(self.project_dir, "eventweights_bkg.pdf"))
        fig, ax = plt.subplots()
-        ax.hist(sig, bins=100, color="r", alpha=0.5)
-        fig.savefig(os.path.join(self.project_dir, "eventweights_sig.pdf"))
-        plt.close(fig)
+        ax.hist(sig, bins=bins, range=range, color="r", alpha=0.5)
+        ax.set_yscale("log")
+        save_show(plt, fig, os.path.join(self.project_dir, "eventweights_sig.pdf"))


-    def plot_ROC(self):
+    def plot_ROC(self, xlim=(0,1), ylim=(0,1)):

        logger.info("Plot ROC curve")
-        fpr, tpr, threshold = roc_curve(self.y_test, self.scores_test, sample_weight = self.w_test)
-        fpr = 1.0 - fpr
-        try:
-            roc_auc = auc(tpr, fpr, reorder=True)
-        except ValueError:
-            logger.warning("Got a value error from auc - trying to rerun with reorder=True")
-            roc_auc = auc(tpr, fpr, reorder=True)
-
-        plt.grid(color='gray', linestyle='--', linewidth=1)
-        plt.plot(tpr,  fpr, label=str(self.name + " (AUC = {})".format(roc_auc)))
-        plt.plot([0,1],[1,0], linestyle='--', color='black', label='Luck')
-        plt.ylabel("Background rejection")
-        plt.xlabel("Signal efficiency")
-        plt.title('Receiver operating characteristic')
-        plt.xlim(0,1)
-        plt.ylim(0,1)
-        plt.xticks(np.arange(0,1,0.1))
-        plt.yticks(np.arange(0,1,0.1))
-        plt.legend(loc='lower left', framealpha=1.0)
-        plt.savefig(os.path.join(self.project_dir, "ROC.pdf"))
-        plt.clf()

+        fig, ax = plt.subplots()
+        ax.grid(color='gray', linestyle='--', linewidth=1)

-    def plot_score(self, log=True, plot_opts=dict(bins=50, range=(0, 1)), ylim=None, xlim=None):
-        centers_sig_train, hist_sig_train, _ = self.get_bin_centered_hist(self.scores_train[self.y_train==1].reshape(-1), density=True, weights=self.w_train[self.y_train==1], **plot_opts)
-        centers_bkg_train, hist_bkg_train, _ = self.get_bin_centered_hist(self.scores_train[self.y_train==0].reshape(-1), density=True, weights=self.w_train[self.y_train==0], **plot_opts)
-        centers_sig_test, hist_sig_test, rel_errors_sig_test = self.get_bin_centered_hist(self.scores_test[self.y_test==1].reshape(-1), density=True, weights=self.w_test[self.y_test==1], **plot_opts)
-        centers_bkg_test, hist_bkg_test, rel_errors_bkg_test = self.get_bin_centered_hist(self.scores_test[self.y_test==0].reshape(-1), density=True, weights=self.w_test[self.y_test==0], **plot_opts)
-        errors_sig_test = hist_sig_test*rel_errors_sig_test
-        errors_bkg_test = hist_bkg_test*rel_errors_bkg_test
+        for y, scores, weight, label in [
+                (self.l_train, self.scores_train, self.w_train, "train"),
+                (self.l_test, self.scores_test, self.w_test, "test")
+        ]:
+            fpr, tpr, threshold = roc_curve(y, scores, sample_weight = weight)
+            fpr = 1.0 - fpr # background rejection
+            try:
+                roc_auc = auc(tpr, fpr)
+            except ValueError:
+                logger.warning("Got a value error from auc - trying to rerun with reorder=True")
+                roc_auc = auc(tpr, fpr, reorder=True)
+            ax.plot(tpr,  fpr, label=str(self.name + " {} (AUC = {:.3f})".format(label, roc_auc)))
+
+        ax.plot([0,1],[1,0], linestyle='--', color='black', label='Luck')
+        ax.set_ylabel("Background rejection")
+        ax.set_xlabel("Signal efficiency")
+        ax.set_title('Receiver operating characteristic')
+        ax.set_xlim(*xlim)
+        ax.set_ylim(*ylim)
+        # plt.xticks(np.arange(0,1,0.1))
+        # plt.yticks(np.arange(0,1,0.1))
+        ax.legend(loc='lower left', framealpha=1.0)
+        return save_show(plt, fig, os.path.join(self.project_dir, "ROC.pdf"))
+
+
+    def plot_score(self, log=True, plot_opts=dict(bins=50, range=(0,1)),
+                   ylim=None, xlim=None, density=True,
+                   lumifactor=None, apply_class_weight=True,
+                   invert_activation=False):
+        if invert_activation:
+            trf = self.get_inverse_act_fn()
+        else:
+            trf = lambda y : y
        fig, ax = plt.subplots()
-        width = centers_sig_train[1]-centers_sig_train[0]
-        ax.bar(centers_bkg_train, hist_bkg_train, color="b", alpha=0.5, width=width, label="background train")
-        ax.bar(centers_sig_train, hist_sig_train, color="r", alpha=0.5, width=width, label="signal train")
-        ax.errorbar(centers_bkg_test, hist_bkg_test, fmt="bo", yerr=errors_bkg_test, label="background test")
-        ax.errorbar(centers_sig_test, hist_sig_test, fmt="ro", yerr=errors_sig_test, label="signal test")
+        for scores, weights, y, class_label, fn, opts in [
+                (self.scores_train, self.w_train, self.l_train, 1, ax.bar, dict(color="r", label="signal train")),
+                (self.scores_train, self.w_train, self.l_train, 0, ax.bar, dict(color="b", label="background train")),
+                (self.scores_test, self.w_test, self.l_test, 1, ax.errorbar, dict(fmt="ro", label="signal test")),
+                (self.scores_test, self.w_test, self.l_test, 0, ax.errorbar, dict(fmt="bo", label="background test")),
+        ]:
+            weights = weights[y==class_label]
+            if apply_class_weight is True and (lumifactor is not None):
+                logger.warning("not applying class weight, since lumifactor given")
+            if apply_class_weight and (lumifactor is None):
+                weights = weights*self.class_weight[class_label]
+            if lumifactor is not None:
+                weights = weights*lumifactor
+            centers, hist, rel_errors = self.get_bin_centered_hist(
+                trf(scores[y==class_label].reshape(-1)),
+                weights=weights,
+                **plot_opts
+            )
+            width = centers[1]-centers[0]
+            if density:
+                hist = hist/width
+            if fn == ax.errorbar:
+                errors = rel_errors*hist
+                opts.update(yerr=errors)
+            else:
+                opts.update(width=width, alpha=0.5)
+            fn(centers, hist, **opts)
        if log:
            ax.set_yscale("log")
        if ylim is not None:
@@ -992,24 +1517,39 @@ class ClassificationProject(object):
        if xlim is not None:
            ax.set_xlim(*xlim)
        ax.set_xlabel("NN output")
-        fig.legend(loc='upper center', framealpha=0.5)
-        fig.savefig(os.path.join(self.project_dir, "scores.pdf"))
-        plt.close(fig)
+        if density:
+            ax.set_ylabel("dN / d(NN output)")
+        else:
+            ax.set_ylabel("Events / {:.2f}".format(width))
+        if apply_class_weight:
+            ax.set_title("Class weights applied")
+        ax.legend(loc='upper center', framealpha=0.5)
+        return save_show(plt, fig, os.path.join(self.project_dir, "scores.pdf"))
+

+    def plot_significance_hist(self, lumifactor=1., significance_function=None, plot_opts=dict(bins=50, range=(0, 1)), invert_activation=False):
+
+        """
+        Plot significances based on a histogram of scores
+        """

-    def plot_significance(self, lumifactor=1., significanceFunction=None, plot_opts=dict(bins=50, range=(0, 1))):
        logger.info("Plot significances")

-        centers_sig_train, hist_sig_train, rel_errors_sig_train = self.get_bin_centered_hist(self.scores_train[self.y_train==1].reshape(-1), weights=self.w_train[self.y_train==1], **plot_opts)
-        centers_bkg_train, hist_bkg_train, rel_errors_bkg_train = self.get_bin_centered_hist(self.scores_train[self.y_train==0].reshape(-1), weights=self.w_train[self.y_train==0], **plot_opts)
-        centers_sig_test, hist_sig_test, rel_errors_sig_test = self.get_bin_centered_hist(self.scores_test[self.y_test==1].reshape(-1), weights=self.w_test[self.y_test==1], **plot_opts)
-        centers_bkg_test, hist_bkg_test, rel_errors_bkg_test = self.get_bin_centered_hist(self.scores_test[self.y_test==0].reshape(-1), weights=self.w_test[self.y_test==0], **plot_opts)
+        if invert_activation:
+            trf = self.get_inverse_act_fn()
+        else:
+            trf = lambda y : y
+
+        centers_sig_train, hist_sig_train, rel_errors_sig_train = self.get_bin_centered_hist(trf(self.scores_train[self.l_train==1].reshape(-1)), weights=self.w_train[self.l_train==1], **plot_opts)
+        centers_bkg_train, hist_bkg_train, rel_errors_bkg_train = self.get_bin_centered_hist(trf(self.scores_train[self.l_train==0].reshape(-1)), weights=self.w_train[self.l_train==0], **plot_opts)
+        centers_sig_test, hist_sig_test, rel_errors_sig_test = self.get_bin_centered_hist(trf(self.scores_test[self.l_test==1].reshape(-1)), weights=self.w_test[self.l_test==1], **plot_opts)
+        centers_bkg_test, hist_bkg_test, rel_errors_bkg_test = self.get_bin_centered_hist(trf(self.scores_test[self.l_test==0].reshape(-1)), weights=self.w_test[self.l_test==0], **plot_opts)

        significances_train = []
        significances_test = []
-        for hist_sig, hist_bkg, rel_errors_sig, rel_errors_bkg, significances, w, y in [
-                (hist_sig_train, hist_bkg_train, rel_errors_sig_train, rel_errors_bkg_train, significances_train, self.w_train, self.y_train),
-                (hist_sig_test, hist_bkg_test, rel_errors_sig_test, rel_errors_bkg_test, significances_test, self.w_test, self.y_test),
+        for hist_sig, hist_bkg, rel_errors_sig, rel_errors_bkg, significances in [
+                (hist_sig_train, hist_bkg_train, rel_errors_sig_train, rel_errors_bkg_train, significances_train),
+                (hist_sig_test, hist_bkg_test, rel_errors_sig_test, rel_errors_bkg_test, significances_test),
        ]:
            # factor to rescale due to using only a fraction of events (training and test samples)
            # normfactor_sig = (np.sum(self.w_train[self.y_train==1])+np.sum(self.w_test[self.y_test==1]))/np.sum(w[y==1])
@@ -1025,13 +1565,13 @@ class ClassificationProject(object):
                s = sum(hist_sig[i:])
                b = sum(hist_bkg[i:])
                db = math.sqrt(sum((rel_errors_bkg[i:]*hist_bkg[i:])**2))
-                if significanceFunction is None:
+                if significance_function is None:
                    try:
-                        z = s/math.sqrt(b+db**2)
+                        z = poisson_asimov_significance(s, 0, b, db)
                    except (ZeroDivisionError, ValueError) as e:
                        z = 0
                else:
-                    z = significanceFunction(s, b, db)
+                    z = significance_function(s, b, db)
                if z == float('inf'):
                    z = 0
                logger.debug("s, b, db, z = {}, {}, {}, {}".format(s, b, db, z))
@@ -1044,8 +1584,96 @@ class ClassificationProject(object):
        ax.set_xlabel("Cut on NN score")
        ax.set_ylabel("Significance")
        ax.legend(loc='lower center', framealpha=0.5)
-        fig.savefig(os.path.join(self.project_dir, "significances.pdf"))
-        plt.close(fig)
+        return save_show(plt, fig, os.path.join(self.project_dir, "significances_hist.pdf"))
+
+
+    @staticmethod
+    def calc_s_ds_b_db(scores, y, w):
+
+        """
+        Calculate the sum of weights of signal (s), background (b) and the
+        sqrt of the squared sum of weights for all possible threshold
+        of the output score.
+        Following the implementation from sklearn.metrics.ranking._binary_clf_curve
+        """
+
+        desc_score_indices = np.argsort(scores, kind="mergesort")[::-1]
+        scores_sorted = scores[desc_score_indices]
+        y_sorted = y[desc_score_indices]
+        w_sorted = w[desc_score_indices]
+        distinct_value_indices = np.where(np.diff(scores_sorted))[0]
+        threshold_idxs = np.r_[distinct_value_indices, y_sorted - 1]
+        s_sumw = stable_cumsum(y_sorted * w_sorted)[threshold_idxs]
+        s_sumw2 = stable_cumsum(y_sorted * (w_sorted**2))[threshold_idxs]
+        b_sumw = stable_cumsum(np.logical_not(y_sorted) * w_sorted)[threshold_idxs]
+        b_sumw2 = stable_cumsum(np.logical_not(y_sorted) * (w_sorted**2))[threshold_idxs]
+
+        return s_sumw, np.sqrt(s_sumw2), b_sumw, np.sqrt(b_sumw2), scores_sorted[threshold_idxs]
+
+
+    def get_inverse_act_fn(self):
+        if not self.activation_function_output == "sigmoid":
+            raise NotImplementedError("Inverse function of {} not supported yet - "
+                                      "currently only sigmoid"
+                                      .format(self.activation_function_output))
+        return lambda y : np.log(y/(1-y))
+
+
+    def plot_significance(self, significance_function=None, maxsteps=None, lumifactor=1., vectorized=False, invert_activation=False):
+        """
+        Plot the significance when cutting on all posible thresholds and plot against signal efficiency.
+        """
+
+        if significance_function is None:
+            vectorized = True
+            significance_function = poisson_asimov_significance
+
+        if invert_activation:
+            trf = self.get_inverse_act_fn()
+        else:
+            trf = lambda y : y
+
+        fig, ax = plt.subplots()
+        ax2 = ax.twinx()
+        prop_cycle = plt.rcParams['axes.prop_cycle']
+        colors = prop_cycle.by_key()['color']
+        for (scores, y, w, label), col in zip(
+                [(self.scores_train, self.l_train, self.w_train, "train"),
+                 (self.scores_test, self.l_test, self.w_test, "test")],
+                colors
+        ):
+            scores = trf(scores)
+            s_sumws, s_errs, b_sumws, b_errs, thresholds = self.calc_s_ds_b_db(scores, y, w)
+            if maxsteps is not None:
+                stepsize = int(len(s_sumws))/int(maxsteps)
+            else:
+                stepsize = 1
+            if stepsize == 0:
+                stepsize = 1
+            s_sumws = s_sumws[::stepsize]*lumifactor*self.step_signal
+            s_errs = s_errs[::stepsize]*lumifactor*self.step_signal
+            b_sumws = b_sumws[::stepsize]*lumifactor*self.step_bkg
+            b_errs = b_errs[::stepsize]*lumifactor*self.step_bkg
+            nonzero_b = np.where(b_sumws!=0)[0]
+            s_sumws = s_sumws[nonzero_b]
+            s_errs = s_errs[nonzero_b]
+            b_sumws = b_sumws[nonzero_b]
+            b_errs = b_errs[nonzero_b]
+            thresholds = thresholds[nonzero_b]
+            if not vectorized:
+                zs = []
+                for s, ds, b, db in zip(s_sumws, s_errs, b_sumws, b_errs):
+                    zs.append(significance_function(s, ds, b, db))
+            else:
+                zs = significance_function(s_sumws, s_errs, b_sumws, b_errs)
+            ax.plot(s_sumws/s_sumws[-1], zs, label=label, color=col)
+            ax2.plot(s_sumws/s_sumws[-1], thresholds, "--", color=col)
+        ax.set_xlabel("Signal efficiency")
+        ax.set_ylabel("Significance")
+        ax.set_xlim(0, 1)
+        ax2.set_ylabel("Threshold")
+        ax.legend()
+        return save_show(plt, fig, os.path.join(self.project_dir, "significances.pdf"))


    @property
@@ -1075,22 +1703,22 @@ class ClassificationProject(object):
            hist_dict = self.csv_hist

        logger.info("Plot losses")
-        plt.plot(hist_dict['loss'])
-        plt.plot(hist_dict['val_loss'])
-        plt.ylabel('loss')
-        plt.xlabel('epoch')
-        plt.legend(['training data','validation data'], loc='upper left')
+        fig, ax = plt.subplots()
+        ax.plot(hist_dict['loss'])
+        ax.plot(hist_dict['val_loss'])
+        ax.set_ylabel('loss')
+        ax.set_xlabel('epoch')
+        ax.legend(['training data','validation data'], loc='upper left')
        if log:
-            plt.yscale("log")
+            ax.set_yscale("log")
        if xlim is not None:
-            plt.xlim(*xlim)
+            ax.set_xlim(*xlim)
        if ylim is not None:
-            plt.ylim(*ylim)
-        plt.savefig(os.path.join(self.project_dir, "losses.pdf"))
-        plt.clf()
+            ax.set_ylim(*ylim)
+        return save_show(plt, fig, os.path.join(self.project_dir, "losses.pdf"))


-    def plot_accuracy(self, all_trainings=False, log=False):
+    def plot_accuracy(self, all_trainings=False, log=False, acc_suffix="weighted_acc"):
        """
        Plot the value of the accuracy metric for each epoch

@@ -1102,14 +1730,14 @@ class ClassificationProject(object):
        else:
            hist_dict = self.history.history

-        if (not 'acc' in hist_dict) or (not 'val_acc' in hist_dict):
+        if (not acc_suffix in hist_dict) or (not 'val_'+acc_suffix in hist_dict):
            logger.warning("No previous history found for plotting, try global history")
            hist_dict = self.csv_hist

        logger.info("Plot accuracy")

-        plt.plot(hist_dict['acc'])
-        plt.plot(hist_dict['val_acc'])
+        plt.plot(hist_dict[acc_suffix])
+        plt.plot(hist_dict['val_'+acc_suffix])
        plt.title('model accuracy')
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
@@ -1122,11 +1750,30 @@ class ClassificationProject(object):

    def plot_all(self):
        self.plot_ROC()
-        self.plot_accuracy()
+        # self.plot_accuracy()
        self.plot_loss()
        self.plot_score()
        self.plot_weights()
-        self.plot_significance()
+        # self.plot_significance()
+
+
+    def to_DataFrame(self):
+        df = pd.DataFrame(np.concatenate([self.x_train, self.x_test]), columns=self.fields)
+        df["weight"] = np.concatenate([self.w_train, self.w_test])
+        df["labels"] = pd.Categorical.from_codes(
+            np.concatenate([self.l_train, self.l_test]),
+            categories=["background", "signal"]
+        )
+        for identifier in self.identifiers:
+            try:
+                df[identifier] = np.concatenate([self.s_eventlist_train[identifier],
+                                                 self.b_eventlist_train[identifier],
+                                                 -1*np.ones(len(self.x_test), dtype="i8")])
+            except IOError:
+                logger.warning("Can't find eventlist - DataFrame won't contain identifiers")
+        df["is_train"] = np.concatenate([np.ones(len(self.x_train), dtype=np.bool),
+                                         np.zeros(len(self.x_test), dtype=np.bool)])
+        return df


 def create_getter(dataset_name):
@@ -1147,6 +1794,319 @@ for dataset_name in ClassificationProject.dataset_names:
                                                          create_setter(dataset_name)))


+class ClassificationProjectDataFrame(ClassificationProject):
+
+    """
+    A little hack to initialize a ClassificationProject from a pandas DataFrame instead of ROOT TTrees
+    """
+
+    def __init__(self, name, *args, **kwargs):
+        if len(args) < 1 and len(kwargs) < 1:
+            # if no further arguments given, interpret as directory name
+            self._init_from_dir(name)
+        else:
+            # otherwise initialise new project
+            self._init_from_args(name, *args, **kwargs)
+            with open(os.path.join(self.project_dir, "options.pickle"), "wb") as of:
+                # don't put the dataframe into options.pickle!
+                if len(args) > 1:
+                    args = args[1:]
+                else:
+                    args = []
+                pickle.dump(dict(args=args, kwargs=kwargs), of)
+
+
+    def _init_from_args(self,
+                        name,
+                        df,
+                        input_columns,
+                        weight_column="weights",
+                        label_column="labels",
+                        signal_label="signal",
+                        background_label="background",
+                        split_mode="split_column",
+                        split_column="is_train",
+                        **kwargs):
+
+        self.df = df
+        self.input_columns = input_columns
+        self.weight_column = weight_column
+        self.label_column = label_column
+        self.signal_label = signal_label
+        self.background_label = background_label
+        if split_mode != "split_column":
+            raise NotImplementedError("'split_column' is the only currently supported split mode")
+        self.split_mode = split_mode
+        self.split_column = split_column
+        super(ClassificationProjectDataFrame, self)._init_from_args(
+            name,
+            signal_trees=[], bkg_trees=[], branches=[], weight_expr="1",
+            **kwargs
+        )
+        self._x_train = None
+        self._x_test = None
+        self._y_train = None
+        self._y_test = None
+        self._w_train = None
+        self._w_test = None
+
+    @property
+    def x_train(self):
+        if self._x_train is None:
+            self._x_train = self.df[self.df[self.split_column]][self.input_columns].values
+        return self._x_train
+
+    @x_train.setter
+    def x_train(self, value):
+        self._x_train = value
+
+    @property
+    def x_test(self):
+        if self._x_test is None:
+            self._x_test = self.df[~self.df[self.split_column]][self.input_columns].values
+        return self._x_test
+
+    @x_test.setter
+    def x_test(self, value):
+        self._x_test = value
+
+    @property
+    def y_train(self):
+        if self._y_train is None:
+            self._y_train = (self.df[self.df[self.split_column]][self.label_column] == self.signal_label).values
+        return self._y_train
+
+    @y_train.setter
+    def y_train(self, value):
+        self._y_train = value
+
+    @property
+    def y_test(self):
+        if self._y_test is None:
+            self._y_test = (self.df[~self.df[self.split_column]][self.label_column] == self.signal_label).values
+        return self._y_test
+
+    @y_test.setter
+    def y_test(self, value):
+        self._y_test = value
+
+    @property
+    def w_train(self):
+        if self._w_train is None:
+            self._w_train = self.df[self.df[self.split_column]][self.weight_column].values
+        return self._w_train
+
+    @w_train.setter
+    def w_train(self, value):
+        self._w_train = value
+
+    @property
+    def w_test(self):
+        if self._w_test is None:
+            self._w_test = self.df[~self.df[self.split_column]][self.weight_column].values
+        return self._w_test
+
+    @w_test.setter
+    def w_test(self, value):
+        self._w_test = value
+
+    @property
+    def fields(self):
+        return self.input_columns
+
+
+    def load(self, reload=False):
+
+        if reload:
+            self.data_loaded = False
+            self._x_train = None
+            self._x_test = None
+            self._y_train = None
+            self._y_test = None
+            self._w_train = None
+            self._w_test = None
+            self._w_train_tot = None
+
+        self.data_loaded = True
+
+
+class ClassificationProjectRNN(ClassificationProject):
+
+    """
+    A little wrapper to use recurrent units for things like jet collections
+    """
+
+    def _init_from_args(self, name,
+                        recurrent_field_names=None,
+                        rnn_layer_nodes=32,
+                        mask_value=-999,
+                        recurrent_unit_type="GRU",
+                        **kwargs):
+        """
+        recurrent_field_names example:
+        [["jet1Pt", "jet1Eta", "jet1Phi"],
+         ["jet2Pt", "jet2Eta", "jet2Phi"],
+         ["jet3Pt", "jet3Eta", "jet3Phi"]],
+        [["lep1Pt", "lep1Eta", "lep1Phi", "lep1flav"],
+         ["lep2Pt", "lep2Eta", "lep2Phi", "lep2flav"]],
+        """
+        super(ClassificationProjectRNN, self)._init_from_args(name, **kwargs)
+
+        self._write_info("project_type", "ClassificationProjectRNN")
+
+        self.recurrent_field_names = recurrent_field_names
+        if self.recurrent_field_names is None:
+            self.recurrent_field_names = []
+        self.rnn_layer_nodes = rnn_layer_nodes
+        self.mask_value = mask_value
+        self.recurrent_unit_type = recurrent_unit_type
+
+        # convert to  of indices
+        self.recurrent_field_idx = []
+        for field_name_list in self.recurrent_field_names:
+            field_names = np.array([field_name_list])
+            if field_names.dtype == np.object:
+                raise ValueError(
+                    "Invalid entry for recurrent fields: {} - "
+                    "please ensure that the length for all elements in the list is equal"
+                    .format(field_names)
+                )
+            field_idx = (
+                np.array([self.fields.index(field_name)
+                          for field_name in field_names.reshape(-1)])
+                .reshape(field_names.shape)
+            )
+            self.recurrent_field_idx.append(field_idx)
+        self.flat_fields = []
+        for field in self.fields:
+            if any(self.fields.index(field) in field_idx.reshape(-1) for field_idx in self.recurrent_field_idx):
+                continue
+            self.flat_fields.append(field)
+
+        if self.scaler_type != "WeightedRobustScaler":
+            raise NotImplementedError(
+                "Invalid scaler '{}' - only WeightedRobustScaler is currently supported for RNN"
+                .format(self.scaler_type)
+            )
+
+
+    @property
+    def model(self):
+        if self._model is None:
+            # following the setup from the tutorial:
+            # https://github.com/YaleATLAS/CERNDeepLearningTutorial
+            rnn_inputs = []
+            rnn_channels = []
+            for field_idx in self.recurrent_field_idx:
+                chan_inp = Input(field_idx.shape[1:])
+                channel = Masking(mask_value=self.mask_value)(chan_inp)
+                if self.recurrent_unit_type == "GRU":
+                    channel = GRU(self.rnn_layer_nodes)(channel)
+                elif self.recurrent_unit_type == "SimpleRNN":
+                    channel = SimpleRNN(self.rnn_layer_nodes)(channel)
+                elif self.recurrent_unit_type == "LSTM":
+                    channel = LSTM(self.rnn_layer_nodes)(channel)
+                else:
+                    raise NotImplementedError("{} not implemented".format(self.recurrent_unit_type))
+                logger.info("Added {} unit".format(self.recurrent_unit_type))
+                # TODO: configure dropout for recurrent layers
+                #channel = Dropout(0.3)(channel)
+                rnn_inputs.append(chan_inp)
+                rnn_channels.append(channel)
+            flat_input = Input((len(self.flat_fields),))
+            if self.dropout_input is not None:
+                flat_channel = Dropout(rate=self.dropout_input)(flat_input)
+            else:
+                flat_channel = flat_input
+            combined = concatenate(rnn_channels+[flat_channel])
+            for node_count, dropout_fraction in zip(self.nodes, self.dropout):
+                extra_opts = dict()
+                if self.kernel_initializer is not None:
+                    extra_opts["kernel_initializer"] = getattr(keras.initializers, self.kernel_initializer)()
+                combined = Dense(node_count, activation=self.activation_function, **extra_opts)(combined)
+                if (dropout_fraction is not None) and (dropout_fraction > 0):
+                    combined = Dropout(rate=dropout_fraction)(combined)
+            combined = Dense(1, activation=self.activation_function_output)(combined)
+            outputs = [combined]
+
+            # optional regression targets
+            for target_field in self.target_fields:
+                extra_target = Dense(1, activation="linear", name="target_{}".format(target_field))(combined)
+                outputs.append(extra_target)
+
+            self._model = Model(inputs=rnn_inputs+[flat_input], outputs=outputs)
+            self._compile_or_load_model()
+        return self._model
+
+
+    def clean_mask(self, x):
+        """
+        Mask recurrent fields such that once a masked value occurs,
+        all values corresponding to the same and following objects are
+        masked as well. Works in place.
+        """
+        for recurrent_field_idx in self.recurrent_field_idx:
+            for evt in x:
+                masked = False
+                for line_idx in recurrent_field_idx.reshape(*recurrent_field_idx.shape[1:]):
+                    if (evt[line_idx] == self.mask_value).any():
+                        masked=True
+                    if masked:
+                        evt[line_idx] = self.mask_value
+
+
+    def mask_uniform(self, x):
+        """
+        Mask recurrent fields with a random (uniform) number of objects. Works in place.
+        """
+        for recurrent_field_idx in self.recurrent_field_idx:
+            for evt in x:
+                masked = False
+                nobj = int(random.random()*(recurrent_field_idx.shape[1]+1))
+                for obj_number, line_idx in enumerate(recurrent_field_idx.reshape(*recurrent_field_idx.shape[1:])):
+                    if obj_number == nobj:
+                        masked=True
+                    if masked:
+                        evt[line_idx] = self.mask_value
+
+
+    def get_input_list(self, x):
+        """
+        Returns a list of 3-dimensional inputs for each
+        recurrent layer and a 2-dimensional one for the normal flat inputs.
+        """
+        x_input = []
+        for field_idx in self.recurrent_field_idx:
+            x_recurrent = x[:,field_idx.reshape(-1)].reshape(-1, *field_idx.shape[1:])
+            x_input.append(x_recurrent)
+        x_flat = x[:,[self.fields.index(field_name) for field_name in self.flat_fields]]
+        x_input.append(x_flat)
+        return x_input
+
+
+    def get_input_flat(self, x):
+        "Transform the multiple inputs back to flat ntuple"
+        nevent = x[0].shape[0]
+        x_flat = np.empty((nevent, len(self.fields)), dtype=np.float)
+        # recurrent fields
+        for rec_ar, idx in zip(x, self.recurrent_field_idx):
+            idx = idx.reshape(-1)
+            for source_idx, target_idx in enumerate(idx):
+                x_flat[:,target_idx] = rec_ar.reshape(nevent, -1)[:,source_idx]
+        # flat fields
+        for source_idx, field_name in enumerate(self.flat_fields):
+            target_idx = self.fields.index(field_name)
+            x_flat[:,target_idx] = x[-1][:,source_idx]
+        return x_flat
+
+
+    def evaluate(self, x_eval, mode=None):
+        logger.debug("Evaluate score for {}".format(x_eval))
+        x_eval = self.transform(x_eval)
+        logger.debug("Evaluate for transformed array: {}".format(x_eval))
+        return self.predict(self.get_input_list(x_eval), mode=mode)
+
+
 if __name__ == "__main__":

    logging.basicConfig()
@@ -1165,8 +2125,8 @@ if __name__ == "__main__":
                              optimizer="Adam",
                              #optimizer="SGD",
                              #optimizer_opts=dict(lr=100., decay=1e-6, momentum=0.9),
-                                earlystopping_opts=dict(monitor='val_loss',
-                                                        min_delta=0, patience=2, verbose=0, mode='auto'),
+                              earlystopping_opts=dict(monitor='val_loss',
+                                                      min_delta=0, patience=2, verbose=0, mode='auto'),
                              selection="1",
                              branches = ["met", "mt"],
                              weight_expr = "eventWeight*genWeight",

--- a/utils.py
+++ b/utils.py
@@ -13,46 +13,96 @@ from meme import cache
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())

-def get_single_neuron_function(model, layer, neuron, scaler=None):
+def get_single_neuron_function(model, layer, neuron, input_transform=None):

-    f = K.function([model.input]+[K.learning_phase()], [model.layers[layer].output[:,neuron]])
+    inp = model.input
+    if not isinstance(inp, list):
+        inp = [inp]
+
+    f = K.function(inp+[K.learning_phase()], [model.layers[layer].output[:,neuron]])

    def eval_single_neuron(x):
-        if scaler is not None:
-            x_eval = scaler.transform(x)
+        if input_transform is not None:
+            x_eval = input_transform(x)
        else:
            x_eval = x
-        return f([x_eval])[0]
+        if not isinstance(x_eval, list):
+            x_eval = [x_eval]
+        return f(x_eval)[0]

    return eval_single_neuron


-def create_random_event(ranges):
+def create_random_event(ranges, mask_probs=None, mask_value=None):
    random_event = np.array([p[0]+(p[1]-p[0])*np.random.rand() for p in ranges])
    random_event = random_event.reshape(-1, len(random_event))
+    # if given, mask values with a certain probability
+    if mask_probs is not None:
+        if mask_value is None:
+            raise ValueError("Need to provide mask_value if random events should be masked")
+        for var_index, mask_prob in enumerate(mask_probs):
+            random_event[:,var_index][np.random.rand(len(random_event)) < mask_prob] = mask_value
    return random_event


-def max_activation_wrt_input(gradient_function, random_event, threshold=None, maxthreshold=None, maxit=100, step=1, const_indices=[]):
-    for i in range(maxit):
-        loss_value, grads_value = gradient_function([random_event])
-        for const_index in const_indices:
-            grads_value[0][const_index] = 0
-        if threshold is not None:
-            if loss_value > threshold and (maxthreshold is None or loss_value < maxthreshold):
-                # found an event within the thresholds
-                return loss_value, random_event
-            elif (maxthreshold is not None and loss_value > maxthreshold):
-                random_event -= grads_value*step
-            else:
-                random_event += grads_value*step
+def get_ranges(x, quantiles, weights, mask_value=None, filter_index=None, max_evts=None):
+    "Get ranges for plotting or random event generation based on quantiles"
+    ranges = []
+    mask_probs = []
+    if max_evts is not None:
+        rnd_idx = np.random.permutation(np.arange(len(x)))
+        rnd_idx = rnd_idx[:max_evts]
+    for var_index in range(x.shape[1]):
+        if (filter_index is not None) and (var_index != filter_index):
+            continue
+        x_var = x[:,var_index]
+        if max_evts is not None:
+            x_var = x_var[rnd_idx]
+        not_masked = np.where(x_var != mask_value)[0]
+        masked = np.where(x_var == mask_value)[0]
+        ranges.append(weighted_quantile(x_var[not_masked], quantiles, sample_weight=weights[not_masked]))
+        mask_probs.append(float(len(masked))/float(len(x_var)))
+    return ranges, mask_probs
+
+
+def max_activation_wrt_input(gradient_function, random_event, threshold=None, maxthreshold=None, maxit=100, step=1, const_indices=[],
+                             input_transform=None, input_inverse_transform=None):
+    if input_transform is not None:
+        random_event = input_transform(random_event)
+    if not isinstance(random_event, list):
+        random_event = [random_event]
+
+    def iterate(random_event):
+        for i in range(maxit):
+            grads_out = gradient_function(random_event)
+            loss_value = grads_out[0][0]
+            grads_values = grads_out[1:]
+            # follow gradient for all inputs
+            for i, (grads_value, input_event) in enumerate(zip(grads_values, random_event)):
+                for const_index in const_indices:
+                    grads_value[0][const_index] = 0
+                if threshold is not None:
+                    if loss_value > threshold and (maxthreshold is None or loss_value < maxthreshold):
+                        # found an event within the thresholds
+                        return loss_value, random_event
+                    elif (maxthreshold is not None and loss_value > maxthreshold):
+                        random_event[i] -= grads_value*step
+                    else:
+                        random_event[i] += grads_value*step
+                else:
+                    random_event[i] += grads_value*step
        else:
-            random_event += grads_value*step
-    else:
-        if threshold is not None:
-            # no event found
-            return None
-    # if no threshold requested, always return last status
+            if threshold is not None:
+                # no event found for the given threshold
+                return None, None
+        # otherwise return last status
+        return loss_value, random_event
+
+    loss_value, random_event = iterate(random_event)
+    if input_inverse_transform is not None and random_event is not None:
+        random_event = input_inverse_transform(random_event)
+    elif random_event is None:
+        return None
    return loss_value, random_event


@@ -60,12 +110,16 @@ def get_grad_function(model, layer, neuron):

    loss = model.layers[layer].output[:,neuron]

-    grads = K.gradients(loss, model.input)[0]
+    grads = K.gradients(loss, model.input)

    # trick from https://blog.keras.io/how-convolutional-neural-networks-see-the-world.html
-    grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)
+    norm_grads = [grad/(K.sqrt(K.mean(K.square(grad))) + 1e-5) for grad in grads]
+
+    inp = model.input
+    if not isinstance(inp, list):
+        inp = [inp]

-    return K.function([model.input], [loss, grads])
+    return K.function(inp, [loss]+norm_grads)


 @cache(useJSON=True,
@@ -73,8 +127,9 @@ def get_grad_function(model, layer, neuron):
           lambda model: [hash(i.tostring()) for i in model.get_weights()],
           lambda ranges: [hash(i.tostring()) for i in ranges],
       ],
+       ignoreKwargs=["input_transform", "input_inverse_transform"],
 )
-def get_max_activation_events(model, ranges, ntries, layer, neuron, seed=42, **kwargs):
+def get_max_activation_events(model, ranges, ntries, layer, neuron, seed=42, mask_probs=None, mask_value=None, **kwargs):

    gradient_function = get_grad_function(model, layer, neuron)

@@ -84,9 +139,18 @@ def get_max_activation_events(model, ranges, ntries, layer, neuron, seed=42, **k
    for i in range(ntries):
        if not (i%100):
            logger.info(i)
-        res = max_activation_wrt_input(gradient_function, create_random_event(ranges), **kwargs)
+        res = max_activation_wrt_input(
+            gradient_function,
+            create_random_event(
+                ranges,
+                mask_probs=mask_probs,
+                mask_value=mask_value
+            ),
+            **kwargs
+        )
        if res is not None:
            loss, event = res
+            loss = np.array([loss])
        else:
            continue
        if events is None:
@@ -133,14 +197,53 @@ def weighted_quantile(values, quantiles, sample_weight=None, values_sorted=False

 class WeightedRobustScaler(RobustScaler):

-    def fit(self, X, y=None, weights=None):
-        RobustScaler.fit(self, X, y)
-        if weights is None:
-            return self
+    def fit(self, X, y=None, weights=None, mask_value=None):
+        if not np.isnan(X).any() and mask_value is not None and weights is None:
+            # these checks don't work for nan values
+            return super(WeightedRobustScaler, self).fit(X, y)
        else:
-            wqs = np.array([weighted_quantile(X[:,i], [0.25, 0.5, 0.75], sample_weight=weights) for i in range(X.shape[1])])
+            if weights is None:
+                weights = np.ones(len(self.X))
+            wqs = []
+            for i in range(X.shape[1]):
+                mask = ~np.isnan(X[:,i])
+                if mask_value is not None:
+                    mask &= (X[:,i] != mask_value)
+                wqs.append(
+                    weighted_quantile(
+                        X[:,i][mask],
+                        [0.25, 0.5, 0.75],
+                        sample_weight=weights[mask]
+                    )
+                )
+            wqs = np.array(wqs)
            self.center_ = wqs[:,1]
            self.scale_ = wqs[:,2]-wqs[:,0]
            self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False)
            return self

+
+    def transform(self, X):
+        if np.isnan(X).any():
+            # we'd like to ignore nan values, so lets calculate without further checks
+            X -= self.center_
+            X /= self.scale_
+            return X
+        else:
+            return super(WeightedRobustScaler, self).transform(X)
+
+
+    def inverse_transform(self, X):
+        if np.isnan(X).any():
+            X *= self.scale_
+            X += self.center_
+            return X
+        else:
+            return super(WeightedRobustScaler, self).inverse_transform(X)
+
+
+def poisson_asimov_significance(s, ds, b, db):
+    "see `<http://www.pp.rhul.ac.uk/~cowan/stat/medsig/medsigNote.pdf>`_)"
+    db = np.sqrt(db**2+ds**2)
+    return np.sqrt(2*((s+b)*np.log(((s+b)*(b+db**2))/(b**2+(s+b)*db**2))-(b**2)/(db**2)*np.log(1+(db**2*s)/(b*(b+db**2)))))
+
No results found