diff --git a/__init__.py b/__init__.py
index e938f9e61047d1a58d7e0b1ef1c36243b77e682a..c2807c606b193f18802503a3c93270bb5636cbf3 100644
--- a/__init__.py
+++ b/__init__.py
@@ -1,3 +1,3 @@
-from .toolkit import ClassificationProject
-from .compare import overlay_ROC, overlay_loss
-from .add_friend import add_friend
+from .toolkit import *
+from .compare import *
+from .add_friend import *
diff --git a/plotting.py b/plotting.py
index db24def073d5f9aaa7a1eb55f86e3e508ec3bb11..ae25dfc096feffa92f3470e134155b7439638f6c 100644
--- a/plotting.py
+++ b/plotting.py
@@ -120,11 +120,13 @@ def plot_NN_vs_var_2D(plotname, means,
 
 
 def plot_NN_vs_var_2D_all(plotname, model, means,
-                          var1_index, var1_range,
-                          var2_index, var2_range,
+                          varx_index,
+                          vary_index,
+                          nbinsx, xmin, xmax,
+                          nbinsy, ymin, ymax,
                           transform_function=None,
-                          var1_label=None,
-                          var2_label=None,
+                          varx_label=None,
+                          vary_label=None,
                           zrange=None, logz=False,
                           plot_last_layer=False,
                           log_default_ymin=1e-5,
@@ -132,15 +134,15 @@ def plot_NN_vs_var_2D_all(plotname, model, means,
 
     "Similar to plot_NN_vs_var_2D, but creates a grid of plots for all neurons."
 
-    var1_vals = np.arange(*var1_range)
-    var2_vals = np.arange(*var2_range)
+    varx_vals = np.linspace(xmin, xmax, nbinsx)
+    vary_vals = np.linspace(ymin, ymax, nbinsy)
 
     # create the events for which we want to fetch the activations
-    events = np.tile(means, len(var1_vals)*len(var2_vals)).reshape(len(var2_vals), len(var1_vals), -1)
-    for i, y in enumerate(var2_vals):
-        for j, x in enumerate(var1_vals):
-            events[i][j][var1_index] = x
-            events[i][j][var2_index] = y
+    events = np.tile(means, len(varx_vals)*len(vary_vals)).reshape(len(vary_vals), len(varx_vals), -1)
+    for i, y in enumerate(vary_vals):
+        for j, x in enumerate(varx_vals):
+            events[i][j][varx_index] = x
+            events[i][j][vary_index] = y
 
     # convert back into 1d array
     events = events.reshape(-1, len(means))
@@ -187,7 +189,7 @@ def plot_NN_vs_var_2D_all(plotname, model, means,
     for layer in range(layers):
         for neuron in range(len(acts[layer][0])):
             acts_neuron = acts[layer][:,neuron]
-            acts_neuron = acts_neuron.reshape(len(var2_vals), len(var1_vals))
+            acts_neuron = acts_neuron.reshape(len(vary_vals), len(varx_vals))
             ax = grid_array[neuron][layer]
             extra_opts = {}
             if not (plot_last_layer and layer == layers-1):
@@ -200,12 +202,12 @@ def plot_NN_vs_var_2D_all(plotname, model, means,
                     extra_opts["norm"] = norm(vmin=zrange[0], vmax=zrange[1])
                 else:
                     extra_opts["norm"] = norm(vmin=global_min, vmax=global_max)
-            im = ax.pcolormesh(var1_vals, var2_vals, acts_neuron, cmap=cmap, linewidth=0, rasterized=True, **extra_opts)
+            im = ax.pcolormesh(varx_vals, vary_vals, acts_neuron, cmap=cmap, linewidth=0, rasterized=True, **extra_opts)
             ax.set_facecolor("black")
-            if var1_label is not None:
-                ax.set_xlabel(var1_label)
-            if var2_label is not None:
-                ax.set_ylabel(var2_label)
+            if varx_label is not None:
+                ax.set_xlabel(varx_label)
+            if vary_label is not None:
+                ax.set_ylabel(vary_label)
             ax.text(0., 0.5, "{}, {}".format(layer, neuron), transform=ax.transAxes, color="white")
 
     cb = fig.colorbar(im, cax=grid[0].cax, orientation="horizontal")
@@ -342,6 +344,8 @@ if __name__ == "__main__":
 
     def test_mean_signal():
 
+        c._load_data() # untransformed
+
         mean_signal = get_mean_event(c.x_test, c.y_test, 1)
 
         print("Mean signal: ")
@@ -371,9 +375,11 @@ if __name__ == "__main__":
 
         plot_NN_vs_var_2D_all("mt_vs_met_all.pdf", means=mean_signal,
                               model=c.model, transform_function=c.scaler.transform,
-                              var1_index=c.fields.index("met"), var1_range=(0, 1000, 10),
-                              var2_index=c.fields.index("mt"), var2_range=(0, 500, 10),
-                              var1_label="met [GeV]", var2_label="mt [GeV]")
+                              varx_index=c.fields.index("met"),
+                              vary_index=c.fields.index("mt"),
+                              nbinsx=100, xmin=0, xmax=1000,
+                              nbinsy=100, ymin=0, ymax=500,
+                              varx_label="met [GeV]", vary_label="mt [GeV]")
 
         plot_NN_vs_var_2D("mt_vs_met_crosscheck.pdf", means=mean_signal,
                           scorefun=get_single_neuron_function(c.model, layer=3, neuron=0, scaler=c.scaler),
diff --git a/scripts/eval_model.py b/scripts/eval_model.py
new file mode 100755
index 0000000000000000000000000000000000000000..63c4bbde062d0532c7abc632d86e83e8c3969e8e
--- /dev/null
+++ b/scripts/eval_model.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+import os
+import argparse
+
+import keras
+import h5py
+from sklearn.metrics import roc_curve, auc
+import matplotlib.pyplot as plt
+import numpy as np
+
+from KerasROOTClassification import ClassificationProject
+
+parser = argparse.ArgumentParser(description='Evaluate a model from a classification project using the given '
+                                             'weights and plot the ROC curve and train/test overlayed scores')
+parser.add_argument("project_dir")
+parser.add_argument("weights")
+parser.add_argument("-p", "--plot-prefix", default="eval_nn")
+args = parser.parse_args()
+
+c = ClassificationProject(args.project_dir)
+
+c.model.load_weights(args.weights)
+
+print("Predicting for test sample ...")
+scores_test = c.evaluate(c.x_test)
+print("Done")
+
+fpr, tpr, threshold = roc_curve(c.y_test, scores_test, sample_weight = c.w_test)
+fpr = 1.0 - fpr
+try:
+    roc_auc = auc(tpr, fpr, reorder=True)
+except ValueError:
+    logger.warning("Got a value error from auc - trying to rerun with reorder=True")
+    roc_auc = auc(tpr, fpr, reorder=True)
+
+plt.grid(color='gray', linestyle='--', linewidth=1)
+plt.plot(tpr,  fpr, label=str(c.name + " (AUC = {})".format(roc_auc)))
+plt.plot([0,1],[1,0], linestyle='--', color='black', label='Luck')
+plt.ylabel("Background rejection")
+plt.xlabel("Signal efficiency")
+plt.title('Receiver operating characteristic')
+plt.xlim(0,1)
+plt.ylim(0,1)
+plt.xticks(np.arange(0,1,0.1))
+plt.yticks(np.arange(0,1,0.1))
+plt.legend(loc='lower left', framealpha=1.0)
+plt.savefig(args.plot_prefix+"_ROC.pdf")
+plt.clf()
+
diff --git a/scripts/plot_NN_2D.py b/scripts/plot_NN_2D.py
index ba54b1eb6551d2bb2baefb985d9cbfc7b43557b3..108c35d53d6e18cf59956b6a11cefde384b51b00 100755
--- a/scripts/plot_NN_2D.py
+++ b/scripts/plot_NN_2D.py
@@ -7,15 +7,20 @@ logging.basicConfig()
 
 import numpy as np
 
+import ROOT
+ROOT.gROOT.SetBatch()
+ROOT.PyConfig.IgnoreCommandLineOptions = True
+
 from KerasROOTClassification import ClassificationProject
 from KerasROOTClassification.plotting import (
     get_mean_event,
     plot_NN_vs_var_2D,
     plot_profile_2D,
     plot_hist_2D_events,
-    plot_cond_avg_actmax_2D
+    plot_cond_avg_actmax_2D,
+    plot_NN_vs_var_2D_all,
 )
-from KerasROOTClassification.utils import get_single_neuron_function, get_max_activation_events
+from KerasROOTClassification.utils import get_single_neuron_function, get_max_activation_events, weighted_quantile
 
 parser = argparse.ArgumentParser(description='Create various 2D plots for a single neuron')
 parser.add_argument("project_dir")
@@ -27,6 +32,7 @@ parser.add_argument("-m", "--mode",
                     default="mean_sig")
 parser.add_argument("-l", "--layer", type=int, help="Layer index (takes last layer by default)")
 parser.add_argument("-n", "--neuron", type=int, default=0, help="Neuron index (takes first neuron by default)")
+parser.add_argument("-a", "--all-neurons", action="store_true", help="Create a summary plot for all neurons in all hidden layers")
 parser.add_argument("--log", action="store_true", help="Plot in color in log scale")
 parser.add_argument("--contour", action="store_true", help="Interpolate with contours")
 parser.add_argument("-b", "--nbins", default=20, type=int, help="Number of bins in x and y direction")
@@ -42,6 +48,9 @@ parser.add_argument("-s", "--step-size", help="step size for activation maximisa
 
 args = parser.parse_args()
 
+if args.all_neurons and (not args.mode.startswith("mean")):
+    parser.error("--all-neurons currently only supported for mean_sig and mean_bkg")
+
 if args.verbose:
     logging.getLogger().setLevel(logging.DEBUG)
 
@@ -64,8 +73,13 @@ else:
 varx_label = args.varx
 vary_label = args.vary
 
-percentilesx = np.percentile(c.x_test[:,varx_index], [1,99])
-percentilesy = np.percentile(c.x_test[:,vary_index], [1,99])
+# percentilesx = np.percentile(c.x_test[:,varx_index], [1,99])
+# percentilesy = np.percentile(c.x_test[:,vary_index], [1,99])
+
+total_weights = c.w_test*np.array(c.class_weight)[c.y_test.astype(int)]
+
+percentilesx = weighted_quantile(c.x_test[:,varx_index], [0.1, 0.99], sample_weight=total_weights)
+percentilesy = weighted_quantile(c.x_test[:,vary_index], [0.1, 0.99], sample_weight=total_weights)
 
 if args.xrange is not None:
     if len(args.xrange) < 3:
@@ -90,17 +104,31 @@ if args.mode.startswith("mean"):
     elif args.mode == "mean_bkg":
         means = get_mean_event(c.x_test, c.y_test, 0)
 
-    plot_NN_vs_var_2D(
-        args.output_filename,
-        means=means,
-        varx_index=varx_index,
-        vary_index=vary_index,
-        scorefun=get_single_neuron_function(c.model, layer, neuron, scaler=c.scaler),
-        xmin=varx_range[0], xmax=varx_range[1], nbinsx=varx_range[2],
-        ymin=vary_range[0], ymax=vary_range[1], nbinsy=vary_range[2],
-        varx_label=varx_label, vary_label=vary_label,
-        logscale=args.log, only_pixels=(not args.contour)
-    )
+    if not args.all_neurons:
+        plot_NN_vs_var_2D(
+            args.output_filename,
+            means=means,
+            varx_index=varx_index,
+            vary_index=vary_index,
+            scorefun=get_single_neuron_function(c.model, layer, neuron, scaler=c.scaler),
+            xmin=varx_range[0], xmax=varx_range[1], nbinsx=varx_range[2],
+            ymin=vary_range[0], ymax=vary_range[1], nbinsy=vary_range[2],
+            varx_label=varx_label, vary_label=vary_label,
+            logscale=args.log, only_pixels=(not args.contour)
+        )
+    else:
+        plot_NN_vs_var_2D_all(
+            args.output_filename,
+            means=means,
+            model=c.model,
+            transform_function=c.scaler.transform,
+            varx_index=varx_index,
+            vary_index=vary_index,
+            xmin=varx_range[0], xmax=varx_range[1], nbinsx=varx_range[2],
+            ymin=vary_range[0], ymax=vary_range[1], nbinsy=vary_range[2],
+            logz=args.log,
+            plot_last_layer=False,
+        )
 
 elif args.mode.startswith("profile"):
 
diff --git a/toolkit.py b/toolkit.py
index ce6307879ed23daef2818f61ed40284a47429901..ada4e54c9118a414b70a644e6e44dad9b78f5764 100755
--- a/toolkit.py
+++ b/toolkit.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 
+__all__ = ["ClassificationProject", "ClassificationProjectDataFrame"]
+
 from sys import version_info
 
 if version_info[0] > 2:
@@ -10,13 +12,13 @@ else:
 
 import os
 import json
-import yaml
 import pickle
 import importlib
 import csv
 import math
 import glob
 import shutil
+import gc
 
 import logging
 logger = logging.getLogger("KerasROOTClassification")
@@ -32,7 +34,7 @@ from sklearn.metrics import roc_curve, auc
 from keras.models import Sequential
 from keras.layers import Dense, Dropout
 from keras.models import model_from_json
-from keras.callbacks import History, EarlyStopping, CSVLogger, ModelCheckpoint
+from keras.callbacks import History, EarlyStopping, CSVLogger, ModelCheckpoint, TensorBoard
 from keras.optimizers import SGD
 import keras.optimizers
 import matplotlib.pyplot as plt
@@ -41,15 +43,15 @@ from .utils import WeightedRobustScaler, weighted_quantile
 
 # configure number of cores
 # this doesn't seem to work, but at least with these settings keras only uses 4 processes
-import tensorflow as tf
-from keras import backend as K
-num_cores = 1
-config = tf.ConfigProto(intra_op_parallelism_threads=num_cores,
-                        inter_op_parallelism_threads=num_cores,
-                        allow_soft_placement=True,
-                        device_count = {'CPU': num_cores})
-session = tf.Session(config=config)
-K.set_session(session)
+# import tensorflow as tf
+# from keras import backend as K
+# num_cores = 1
+# config = tf.ConfigProto(intra_op_parallelism_threads=num_cores,
+#                         inter_op_parallelism_threads=num_cores,
+#                         allow_soft_placement=True,
+#                         device_count = {'CPU': num_cores})
+# session = tf.Session(config=config)
+# K.set_session(session)
 
 import ROOT
 
@@ -69,6 +71,7 @@ def byteify(input):
 if version_info[0] > 2:
     byteify = lambda input : input
 
+
 class ClassificationProject(object):
 
     """Simple framework to load data from ROOT TTrees and train Keras
@@ -110,7 +113,9 @@ class ClassificationProject(object):
 
     :param nodes: list number of nodes in each layer. If only a single number is given, use this number for every layer
 
-    :param dropout: dropout fraction after each hidden layer. Set to None for no Dropout
+    :param dropout: dropout fraction after each hidden layer. You can also pass a list for dropout fractions for each layer. Set to None for no Dropout.
+
+    :param dropout_input: dropout fraction for the input layer. Set to None for no Dropout.
 
     :param batch_size: size of the training batches
 
@@ -128,6 +133,10 @@ class ClassificationProject(object):
 
     :param step_bkg: step size when selecting background training events (e.g. 2 means take every second event)
 
+    :param stop_train: stop after this number of events for reading in training events
+
+    :param stop_test: stop after this number of events for reading in test events
+
     :param optimizer: name of optimizer class in keras.optimizers
 
     :param optimizer_opts: dictionary of options for the optimizer
@@ -143,6 +152,10 @@ class ClassificationProject(object):
                                  you change the format of the saved model weights it has to be of
                                  the form "weights*.h5"
 
+    :param use_tensorboard: if True, use the tensorboard callback to write logs for tensorboard
+
+    :param tensorboard_opts: options for the TensorBoard callback
+
     :param balance_dataset: if True, balance the dataset instead of
                             applying class weights. Only a fraction of the overrepresented
                             class will be used in each epoch, but different subsets of the
@@ -196,6 +209,7 @@ class ClassificationProject(object):
                         layers=3,
                         nodes=64,
                         dropout=None,
+                        dropout_input=None,
                         batch_size=128,
                         validation_split=0.33,
                         activation_function='relu',
@@ -203,12 +217,16 @@ class ClassificationProject(object):
                         scaler_type="WeightedRobustScaler",
                         step_signal=2,
                         step_bkg=2,
+                        stop_train=None,
+                        stop_test=None,
                         optimizer="SGD",
                         optimizer_opts=None,
                         use_earlystopping=True,
                         earlystopping_opts=None,
                         use_modelcheckpoint=True,
                         modelcheckpoint_opts=None,
+                        use_tensorboard=False,
+                        tensorboard_opts=None,
                         random_seed=1234,
                         balance_dataset=False,
                         loss='binary_crossentropy'):
@@ -243,6 +261,11 @@ class ClassificationProject(object):
             logger.warning("Number of layers not equal to the given nodes "
                            "per layer - adjusted to " + str(self.layers))
         self.dropout = dropout
+        if not isinstance(self.dropout, list):
+            self.dropout = [self.dropout for i in range(self.layers)]
+        if len(self.dropout) != self.layers:
+            raise ValueError("List of dropout fractions has to be of equal size as the number of layers!")
+        self.dropout_input = dropout_input
         self.batch_size = batch_size
         self.validation_split = validation_split
         self.activation_function = activation_function
@@ -250,9 +273,12 @@ class ClassificationProject(object):
         self.scaler_type = scaler_type
         self.step_signal = step_signal
         self.step_bkg = step_bkg
+        self.stop_train = stop_train
+        self.stop_test = stop_test
         self.optimizer = optimizer
         self.use_earlystopping = use_earlystopping
         self.use_modelcheckpoint = use_modelcheckpoint
+        self.use_tensorboard = use_tensorboard
         if optimizer_opts is None:
             optimizer_opts = dict()
         self.optimizer_opts = optimizer_opts
@@ -266,6 +292,11 @@ class ClassificationProject(object):
                 filepath="weights.h5"
             )
         self.modelcheckpoint_opts = modelcheckpoint_opts
+        self.tensorboard_opts = dict(
+            log_dir=os.path.join(self.project_dir, "tensorboard"),
+        )
+        if tensorboard_opts is not None:
+            self.tensorboard_opts.update(**tensorboard_opts)
         self.random_seed = random_seed
         self.balance_dataset = balance_dataset
         self.loss = loss
@@ -349,19 +380,19 @@ class ClassificationProject(object):
             self.s_train = tree2array(signal_chain,
                                       branches=self.branches+[self.weight_expr]+self.identifiers,
                                       selection=self.selection,
-                                      start=0, step=self.step_signal)
+                                      start=0, step=self.step_signal, stop=self.stop_train)
             self.b_train = tree2array(bkg_chain,
                                       branches=self.branches+[self.weight_expr]+self.identifiers,
                                       selection=self.selection,
-                                      start=0, step=self.step_bkg)
+                                      start=0, step=self.step_bkg, stop=self.stop_train)
             self.s_test = tree2array(signal_chain,
                                      branches=self.branches+[self.weight_expr],
                                      selection=self.selection,
-                                     start=1, step=self.step_signal)
+                                     start=1, step=self.step_signal, stop=self.stop_test)
             self.b_test = tree2array(bkg_chain,
                                      branches=self.branches+[self.weight_expr],
                                      selection=self.selection,
-                                     start=1, step=self.step_bkg)
+                                     start=1, step=self.step_bkg, stop=self.stop_test)
 
             self.rename_fields(self.s_train)
             self.rename_fields(self.b_train)
@@ -380,19 +411,23 @@ class ClassificationProject(object):
             # the first block will be signals, the second block backgrounds
             self.x_train = rec2array(self.s_train[self.fields])
             self.x_train = np.concatenate((self.x_train, rec2array(self.b_train[self.fields])))
-            self.x_test = rec2array(self.s_test[self.fields])
-            self.x_test = np.concatenate((self.x_test, rec2array(self.b_test[self.fields])))
             self.w_train = self.s_train[self.weight_expr]
             self.w_train = np.concatenate((self.w_train, self.b_train[self.weight_expr]))
-            self.w_test = self.s_test[self.weight_expr]
-            self.w_test = np.concatenate((self.w_test, self.b_test[self.weight_expr]))
-
-            self.y_train = np.empty(len(self.x_train))
+            self.y_train = np.empty(len(self.x_train), dtype=np.bool)
             self.y_train[:len(self.s_train)] = 1
             self.y_train[len(self.s_train):] = 0
-            self.y_test = np.empty(len(self.x_test))
+            self.b_train = None
+            self.s_train = None
+
+            self.x_test = rec2array(self.s_test[self.fields])
+            self.x_test = np.concatenate((self.x_test, rec2array(self.b_test[self.fields])))
+            self.w_test = self.s_test[self.weight_expr]
+            self.w_test = np.concatenate((self.w_test, self.b_test[self.weight_expr]))
+            self.y_test = np.empty(len(self.x_test), dtype=np.bool)
             self.y_test[:len(self.s_test)] = 1
             self.y_test[len(self.s_test):] = 0
+            self.b_test = None
+            self.s_test = None
 
             self._dump_to_hdf5(*self.dataset_names_tree)
 
@@ -474,6 +509,8 @@ class ClassificationProject(object):
             if not os.path.dirname(mc.filepath) == self.project_dir:
                 mc.filepath = os.path.join(self.project_dir, mc.filepath)
                 logger.debug("Prepending project dir to ModelCheckpoint filepath: {}".format(mc.filepath))
+        if self.use_tensorboard:
+            self._callbacks_list.append(TensorBoard(**self.tensorboard_opts))
         self._callbacks_list.append(CSVLogger(os.path.join(self.project_dir, "training.log"), append=True))
         return self._callbacks_list
 
@@ -499,7 +536,10 @@ class ClassificationProject(object):
                 else:
                     raise ValueError("Scaler type {} unknown".format(self.scaler_type))
                 logger.info("Fitting {} to training data".format(self.scaler_type))
+                orig_copy_setting = self.scaler.copy
+                self.scaler.copy = False
                 self._scaler.fit(self.x_train, **scaler_fit_kwargs)
+                self.scaler.copy = orig_copy_setting
                 joblib.dump(self._scaler, filename)
         return self._scaler
 
@@ -541,9 +581,12 @@ class ClassificationProject(object):
             logger.debug("training data before transformation: {}".format(self.x_train))
             logger.debug("minimum values: {}".format([np.min(self.x_train[:,i]) for i in range(self.x_train.shape[1])]))
             logger.debug("maximum values: {}".format([np.max(self.x_train[:,i]) for i in range(self.x_train.shape[1])]))
+            orig_copy_setting = self.scaler.copy
+            self.scaler.copy = False
             self.x_train = self.scaler.transform(self.x_train)
             logger.debug("training data after transformation: {}".format(self.x_train))
             self.x_test = self.scaler.transform(self.x_test)
+            self.scaler.copy = orig_copy_setting
             self.data_transformed = True
             logger.info("Training and test data transformed")
 
@@ -588,15 +631,21 @@ class ClassificationProject(object):
 
             self._model = Sequential()
 
-            # first hidden layer
-            self._model.add(Dense(self.nodes[0], input_dim=len(self.fields), activation=self.activation_function))
-            # the other hidden layers
-            for node_count, layer_number in zip(self.nodes[1:], range(self.layers-1)):
+            if self.dropout_input is None:
+                self._model.add(Dense(self.nodes[0], input_dim=len(self.fields), activation=self.activation_function))
+                # in case of no Dropout we already have the first hidden layer
+                start_layer = 1
+            else:
+                self._model.add(Dropout(rate=self.dropout_input, input_shape=(len(self.fields),)))
+                start_layer = 0
+            # the (other) hidden layers
+            for node_count, dropout_fraction in zip(self.nodes[start_layer:], self.dropout[start_layer:]):
                 self._model.add(Dense(node_count, activation=self.activation_function))
-                if self.dropout is not None:
-                    self._model.add(Dropout(rate=self.dropout))
+                if (dropout_fraction is not None) and (dropout_fraction > 0):
+                    self._model.add(Dropout(rate=dropout_fraction))
             # last layer is one neuron (binary classification)
             self._model.add(Dense(1, activation=self.activation_function_output))
+
             logger.info("Using {}(**{}) as Optimizer".format(self.optimizer, self.optimizer_opts))
             Optimizer = getattr(keras.optimizers, self.optimizer)
             optimizer = Optimizer(**self.optimizer_opts)
@@ -605,11 +654,14 @@ class ClassificationProject(object):
             np.random.seed(self.random_seed)
             self._model.compile(optimizer=optimizer,
                                 loss=self.loss,
-                                metrics=['accuracy'])
+                                weighted_metrics=['accuracy']
+            )
             np.random.set_state(rn_state)
+
             if os.path.exists(os.path.join(self.project_dir, "weights.h5")):
                 if self.is_training:
-                    continue_training = self.query_yn("Found previously trained weights - continue training (choosing N will restart)? (Y/N) ")
+                    continue_training = self.query_yn("Found previously trained weights - "
+                                                      "continue training (choosing N will restart)? (Y/N) ")
                 else:
                     continue_training = True
                 if continue_training:
@@ -946,34 +998,32 @@ class ClassificationProject(object):
         plt.close(fig)
 
 
-    def plot_ROC(self, truth=True):
+    def plot_ROC(self, xlim=(0,1), ylim=(0,1)):
 
         logger.info("Plot ROC curve")
-        fpr, tpr, threshold = roc_curve(self.y_test, self.scores_test, sample_weight = self.w_test)
-        fpr = 1.0 - fpr
-        try:
-            roc_auc = auc(tpr, fpr, reorder=True)
-        except ValueError:
-            logger.warning("Got a value error from auc - trying to rerun with reorder=True")
-            roc_auc = auc(tpr, fpr, reorder=True)
+        plt.grid(color='gray', linestyle='--', linewidth=1)
 
-        if truth:
-            plot_name = "ROC_truth.pdf"
-            legend_name = "Truth test"
-        else:
-            plot_name = "ROC_reco.pdf"
-            legend_name = "Reco test"
+        for y, scores, weight, label in [
+                (self.y_train, self.scores_train, self.w_train, "train"),
+                (self.y_test, self.scores_test, self.w_test, "test")
+        ]:
+            fpr, tpr, threshold = roc_curve(y, scores, sample_weight = weight)
+            fpr = 1.0 - fpr # background rejection
+            try:
+                roc_auc = auc(tpr, fpr)
+            except ValueError:
+                logger.warning("Got a value error from auc - trying to rerun with reorder=True")
+                roc_auc = auc(tpr, fpr, reorder=True)
+            plt.plot(tpr,  fpr, label=str(self.name + " {} (AUC = {:.3f})".format(label, roc_auc)))
 
-        plt.grid(color='gray', linestyle='--', linewidth=1)
-        plt.plot(tpr,  fpr, label=str(legend_name + " (AUC = {})".format(roc_auc)))
         plt.plot([0,1],[1,0], linestyle='--', color='black', label='Luck')
         plt.ylabel("Background rejection")
         plt.xlabel("Signal efficiency")
-        # plt.title('Receiver operating characteristic')
-        plt.xlim(0,1)
-        plt.ylim(0,1)
-        plt.xticks(np.arange(0,1,0.1))
-        plt.yticks(np.arange(0,1,0.1))
+        plt.title('Receiver operating characteristic')
+        plt.xlim(*xlim)
+        plt.ylim(*ylim)
+        # plt.xticks(np.arange(0,1,0.1))
+        # plt.yticks(np.arange(0,1,0.1))
         plt.legend(loc='lower left', framealpha=1.0)
         plt.savefig(os.path.join(self.project_dir, plot_name))
         plt.clf()
@@ -1097,7 +1147,7 @@ class ClassificationProject(object):
         plt.clf()
 
 
-    def plot_accuracy(self, all_trainings=False, log=False):
+    def plot_accuracy(self, all_trainings=False, log=False, acc_suffix="weighted_acc"):
         """
         Plot the value of the accuracy metric for each epoch
 
@@ -1109,14 +1159,14 @@ class ClassificationProject(object):
         else:
             hist_dict = self.history.history
 
-        if (not 'acc' in hist_dict) or (not 'val_acc' in hist_dict):
+        if (not acc_suffix in hist_dict) or (not 'val_'+acc_suffix in hist_dict):
             logger.warning("No previous history found for plotting, try global history")
             hist_dict = self.csv_hist
 
         logger.info("Plot accuracy")
 
-        plt.plot(hist_dict['acc'])
-        plt.plot(hist_dict['val_acc'])
+        plt.plot(hist_dict[acc_suffix])
+        plt.plot(hist_dict['val_'+acc_suffix])
         plt.title('model accuracy')
         plt.ylabel('accuracy')
         plt.xlabel('epoch')
@@ -1129,11 +1179,30 @@ class ClassificationProject(object):
 
     def plot_all(self):
         self.plot_ROC()
-        self.plot_accuracy()
+        # self.plot_accuracy()
         self.plot_loss()
         self.plot_score()
         self.plot_weights()
-        self.plot_significance()
+        # self.plot_significance()
+
+
+    def to_DataFrame(self):
+        df = pd.DataFrame(np.concatenate([self.x_train, self.x_test]), columns=self.fields)
+        df["weight"] = np.concatenate([self.w_train, self.w_test])
+        df["labels"] = pd.Categorical.from_codes(
+            np.concatenate([self.y_train, self.y_test]),
+            categories=["background", "signal"]
+        )
+        for identifier in self.identifiers:
+            try:
+                df[identifier] = np.concatenate([self.s_eventlist_train[identifier],
+                                                 self.b_eventlist_train[identifier],
+                                                 -1*np.ones(len(self.x_test), dtype="i8")])
+            except IOError:
+                logger.warning("Can't find eventlist - DataFrame won't contain identifiers")
+        df["is_train"] = np.concatenate([np.ones(len(self.x_train), dtype=np.bool),
+                                         np.zeros(len(self.x_test), dtype=np.bool)])
+        return df
 
 
 def create_getter(dataset_name):
@@ -1154,6 +1223,125 @@ for dataset_name in ClassificationProject.dataset_names:
                                                           create_setter(dataset_name)))
 
 
+class ClassificationProjectDataFrame(ClassificationProject):
+
+    """
+    A little hack to initialize a ClassificationProject from a pandas DataFrame instead of ROOT TTrees
+    """
+
+    def __init__(self,
+                 name,
+                 df,
+                 input_columns,
+                 weight_column="weights",
+                 label_column="labels",
+                 signal_label="signal",
+                 background_label="background",
+                 split_mode="split_column",
+                 split_column="is_train",
+                 **kwargs):
+
+        self.df = df
+        self.input_columns = input_columns
+        self.weight_column = weight_column
+        self.label_column = label_column
+        self.signal_label = signal_label
+        self.background_label = background_label
+        if split_mode != "split_column":
+            raise NotImplementedError("'split_column' is the only currently supported split mode")
+        self.split_mode = split_mode
+        self.split_column = split_column
+        super(ClassificationProjectDataFrame, self).__init__(name,
+                                                             signal_trees=[], bkg_trees=[], branches=[], weight_expr="1",
+                                                             **kwargs)
+        self._x_train = None
+        self._x_test = None
+        self._y_train = None
+        self._y_test = None
+        self._w_train = None
+        self._w_test = None
+
+    @property
+    def x_train(self):
+        if self._x_train is None:
+            self._x_train = self.df[self.df[self.split_column]][self.input_columns].values
+        return self._x_train
+
+    @x_train.setter
+    def x_train(self, value):
+        self._x_train = value
+
+    @property
+    def x_test(self):
+        if self._x_test is None:
+            self._x_test = self.df[~self.df[self.split_column]][self.input_columns].values
+        return self._x_test
+
+    @x_test.setter
+    def x_test(self, value):
+        self._x_test = value
+
+    @property
+    def y_train(self):
+        if self._y_train is None:
+            self._y_train = (self.df[self.df[self.split_column]][self.label_column] == self.signal_label).values
+        return self._y_train
+
+    @y_train.setter
+    def y_train(self, value):
+        self._y_train = value
+
+    @property
+    def y_test(self):
+        if self._y_test is None:
+            self._y_test = (self.df[~self.df[self.split_column]][self.label_column] == self.signal_label).values
+        return self._y_test
+
+    @y_test.setter
+    def y_test(self, value):
+        self._y_test = value
+
+    @property
+    def w_train(self):
+        if self._w_train is None:
+            self._w_train = self.df[self.df[self.split_column]][self.weight_column].values
+        return self._w_train
+
+    @w_train.setter
+    def w_train(self, value):
+        self._w_train = value
+
+    @property
+    def w_test(self):
+        if self._w_test is None:
+            self._w_test = self.df[~self.df[self.split_column]][self.weight_column].values
+        return self._w_test
+
+    @w_test.setter
+    def w_test(self, value):
+        self._w_test = value
+
+    @property
+    def fields(self):
+        return self.input_columns
+
+
+    def load(self, reload=False):
+
+        if reload:
+            self.data_loaded = False
+            self.data_transformed = False
+            self._x_train = None
+            self._x_test = None
+            self._y_train = None
+            self._y_test = None
+            self._w_train = None
+            self._w_test = None
+
+        if not self.data_transformed:
+            self._transform_data()
+
+
 if __name__ == "__main__":
 
     logging.basicConfig()
@@ -1172,8 +1360,8 @@ if __name__ == "__main__":
                               optimizer="Adam",
                               #optimizer="SGD",
                               #optimizer_opts=dict(lr=100., decay=1e-6, momentum=0.9),
-                                earlystopping_opts=dict(monitor='val_loss',
-                                                        min_delta=0, patience=2, verbose=0, mode='auto'),
+                              earlystopping_opts=dict(monitor='val_loss',
+                                                      min_delta=0, patience=2, verbose=0, mode='auto'),
                               selection="1",
                               branches = ["met", "mt"],
                               weight_expr = "eventWeight*genWeight",