diff --git a/compare.py b/compare.py
index 98b2fdbd7788f9d86addc90ab8d8bc419250d299..bcd2e87e525bd148fbc4c0bcbcd2d071f26b01c4 100755
--- a/compare.py
+++ b/compare.py
@@ -22,6 +22,7 @@ def overlay_ROC(filename, *projects, **kwargs):
     threshold_log = kwargs.pop("threshold_log", True)
     lumifactor = kwargs.pop("lumifactor", None)
     tight_layout = kwargs.pop("tight_layout", False)
+    show_auc = kwargs.pop("show_auc", True)
     if kwargs:
         raise KeyError("Unknown kwargs: {}".format(kwargs))
 
@@ -52,7 +53,11 @@ def overlay_ROC(filename, *projects, **kwargs):
             roc_auc = auc(tpr, fpr, reorder=True)
 
         ax.grid(color='gray', linestyle='--', linewidth=1)
-        ax.plot(tpr,  fpr, label=str(p.name+" (AUC = {:.3f})".format(roc_auc)), color=color)
+        if show_auc:
+            label = str(p.name+" (AUC = {:.3f})".format(roc_auc))
+        else:
+            label = p.name
+        ax.plot(tpr,  fpr, label=label, color=color)
         if plot_thresholds:
             ax2.plot(tpr, threshold, "--", color=color)
         if lumifactor is not None:
diff --git a/test/test_toolkit.py b/test/test_toolkit.py
index ecd3aa244cd2269ae24a3386eaf990272d0c4871..1ebd7073d6667a480cf0e74ebe763f2ffa465a40 100644
--- a/test/test_toolkit.py
+++ b/test/test_toolkit.py
@@ -7,6 +7,7 @@ from keras.layers import GRU
 
 from KerasROOTClassification import ClassificationProject, ClassificationProjectRNN
 
+
 def create_dataset(path):
 
     # create example dataset with (low-weighted) noise added
@@ -50,6 +51,7 @@ def test_ClassificationProject(tmp_path):
         layers=3,
         nodes=128,
     )
+
     c.train(epochs=200)
     c.plot_all_inputs()
     c.plot_loss()
diff --git a/toolkit.py b/toolkit.py
index b0500fa5da7675bb53f16b23968ef124b62f230b..ee25e6a0f7850fa502c54dba99b71664186c3a4d 100755
--- a/toolkit.py
+++ b/toolkit.py
@@ -39,9 +39,12 @@ from keras.models import Sequential, Model, model_from_json
 from keras.layers import Dense, Dropout, Input, Masking, GRU, LSTM, concatenate, SimpleRNN
 from keras.callbacks import History, EarlyStopping, CSVLogger, ModelCheckpoint, TensorBoard, CallbackList, BaseLogger
 from keras.optimizers import SGD
+from keras.activations import relu
+import keras.initializers
 import keras.optimizers
 from keras.utils.vis_utils import model_to_dot
 from keras import backend as K
+import tensorflow as tf
 import matplotlib.pyplot as plt
 
 from .utils import WeightedRobustScaler, weighted_quantile, poisson_asimov_significance
@@ -65,6 +68,25 @@ if version_info[0] > 2:
     byteify = lambda input : input
 
 
+def set_session_threads(n_cpu=None):
+    "Set the number of threads based on OMP_NUM_THREADS or the given argument"
+
+    if n_cpu is None:
+        if os.environ.get('OMP_NUM_THREADS'):
+            n_cpu = int(os.environ.get('OMP_NUM_THREADS'))
+        else:
+            return
+
+    # not sure if this is the best configuration ...
+    config = tf.ConfigProto(intra_op_parallelism_threads=n_cpu,
+                            inter_op_parallelism_threads=1,
+                            allow_soft_placement=True,
+                            #log_device_placement=True,
+                            device_count = {'CPU': n_cpu})
+    session = tf.Session(config=config)
+    K.set_session(session)
+
+
 def load_from_dir(path):
     "Load a project and the options from a directory"
     try:
@@ -143,6 +165,8 @@ class ClassificationProject(object):
 
     :param activation_function_output: activation function in the output layer
 
+    :param leaky_relu_alpha: set this to a non-zero value to use the LeakyReLU variant with a slope in the negative part
+
     :param out_dir: base directory in which the project directories should be stored
 
     :param scaler_type: sklearn scaler class name to transform the data before training (options: "StandardScaler", "RobustScaler")
@@ -195,6 +219,10 @@ class ClassificationProject(object):
 
     :param normalize_weights: normalize the weights to mean 1
 
+    :param ignore_neg_weights: ignore events with negative weights in training - not recommended! (default: False)
+
+    :param kernel_initializer: weight initializer for the dense layers - if None (default) the keras defaults are used
+
     :param shuffle: shuffle training data after (and before first) epoch
 
     """
@@ -247,6 +275,7 @@ class ClassificationProject(object):
                         kfold_splits=None,
                         kfold_index=0,
                         activation_function='relu',
+                        leaky_relu_alpha=None,
                         activation_function_output='sigmoid',
                         scaler_type="WeightedRobustScaler",
                         step_signal=2,
@@ -269,6 +298,8 @@ class ClassificationProject(object):
                         mask_value=None,
                         apply_class_weight=True,
                         normalize_weights=True,
+                        ignore_neg_weights=False,
+                        kernel_initializer=None,
                         shuffle=True,
     ):
 
@@ -320,6 +351,9 @@ class ClassificationProject(object):
         self.kfold_splits = kfold_splits
         self.kfold_index = kfold_index
         self.activation_function = activation_function
+        self.leaky_relu_alpha = leaky_relu_alpha
+        if self.activation_function == "relu" and self.leaky_relu_alpha:
+            self.activation_function = lambda x : relu(x, alpha=self.leaky_relu_alpha)
         self.activation_function_output = activation_function_output
         self.scaler_type = scaler_type
         self.step_signal = step_signal
@@ -359,6 +393,8 @@ class ClassificationProject(object):
         self.mask_value = mask_value
         self.apply_class_weight = apply_class_weight
         self.normalize_weights = normalize_weights
+        self.ignore_neg_weights = ignore_neg_weights
+        self.kernel_initializer = kernel_initializer
         self.shuffle = shuffle
 
         self.s_train = None
@@ -468,6 +504,10 @@ class ClassificationProject(object):
                                      selection=self.selection,
                                      start=1, step=self.step_bkg, stop=self.stop_test)
 
+            if self.ignore_neg_weights:
+                self.s_train = self.s_train[self.s_train[self.weight_expr]>0]
+                self.b_train = self.b_train[self.b_train[self.weight_expr]>0]
+
             self.rename_fields(self.s_train)
             self.rename_fields(self.b_train)
             self.rename_fields(self.s_test)
@@ -775,6 +815,7 @@ class ClassificationProject(object):
 
         if self._model is None:
 
+
             # input
             input_layer = Input((len(self.fields),))
 
@@ -790,7 +831,10 @@ class ClassificationProject(object):
                     self.dropout,
                     self.use_bias,
             ):
-                hidden_layer = Dense(node_count, activation=self.activation_function, use_bias=use_bias)(hidden_layer)
+                extra_opts = dict()
+                if self.kernel_initializer is not None:
+                    extra_opts["kernel_initializer"] = getattr(keras.initializers, self.kernel_initializer)()
+                hidden_layer = Dense(node_count, activation=self.activation_function, use_bias=use_bias, **extra_opts)(hidden_layer)
                 if (dropout_fraction is not None) and (dropout_fraction > 0):
                     hidden_layer = Dropout(rate=dropout_fraction)(hidden_layer)
 
@@ -1070,6 +1114,8 @@ class ClassificationProject(object):
 
         self.total_epochs = self._read_info("epochs", 0)
 
+        set_session_threads()
+
         logger.info("Train model")
         if not self.balance_dataset:
             try:
@@ -1263,17 +1309,53 @@ class ClassificationProject(object):
         return centers, hist, errors
 
 
-    def plot_input(self, var_index, ax=None):
-        "plot a single input variable"
+    def plot_input(self, var_index, ax=None, from_training_batches=False, max_n_batches=None):
+        """
+        plot a single input variable as a histogram (signal vs background)
+
+        :param from_training_batches: use data from training batch generator
+        :param max_n_batches: if training batch generator is used, just use
+                              this number of batches (otherwise steps_per_epoch is used)
+        """
         branch = self.fields[var_index]
         if ax is None:
             fig, ax = plt.subplots()
         else:
             fig = None
-        bkg = self.x_train[:,var_index][self.l_train == 0]
-        sig = self.x_train[:,var_index][self.l_train == 1]
-        bkg_weights = self.w_train_tot[self.l_train == 0]
-        sig_weights = self.w_train_tot[self.l_train == 1]
+
+        if not from_training_batches:
+            bkg = self.x_train[:,var_index][self.l_train == 0]
+            sig = self.x_train[:,var_index][self.l_train == 1]
+            bkg_weights = self.w_train_tot[self.l_train == 0]
+            sig_weights = self.w_train_tot[self.l_train == 1]
+        else:
+            bkg = None
+            sig = None
+            bkg_weights = None
+            sig_weights = None
+            if max_n_batches is not None:
+                n_batches = max_n_batches
+            else:
+                n_batches = self.steps_per_epoch
+            for i_batch, (x, y, w) in enumerate(self.yield_batch()):
+                if i_batch > n_batches:
+                    break
+                if self.target_fields:
+                    y = y[0]
+                bkg_batch = x[:,var_index][y==0]
+                sig_batch = x[:,var_index][y==1]
+                bkg_weights_batch = w[y==0]
+                sig_weights_batch = w[y==1]
+                if bkg is None:
+                    bkg = bkg_batch
+                    sig = sig_batch
+                    bkg_weights = bkg_weights_batch
+                    sig_weights = sig_weights_batch
+                else:
+                    bkg = np.concatenate([bkg, bkg_batch])
+                    sig = np.concatenate([sig, sig_batch])
+                    bkg_weights = np.concatenate([bkg_weights, bkg_weights_batch])
+                    sig_weights = np.concatenate([sig_weights, sig_weights_batch])
 
         if hasattr(self, "mask_value"):
             bkg_not_masked = np.where(bkg != self.mask_value)[0]
@@ -1324,7 +1406,10 @@ class ClassificationProject(object):
             centers_sig, hist_sig, _ = self.get_bin_centered_hist(sig, bins=bins, range=plot_range, weights=sig_weights)
             centers_bkg, hist_bkg, _ = self.get_bin_centered_hist(bkg, bins=bins, range=plot_range, weights=bkg_weights)
 
-        width = centers_sig[1]-centers_sig[0]
+        if bins > 1:
+            width = centers_sig[1]-centers_sig[0]
+        else:
+            width = 1.
         ax.bar(centers_bkg, hist_bkg, color="b", alpha=0.5, width=width)
         ax.bar(centers_sig, hist_sig, color="r", alpha=0.5, width=width)
 
@@ -1337,13 +1422,13 @@ class ClassificationProject(object):
             return save_show(plt, fig, os.path.join(plot_dir, "var_{}.pdf".format(var_index)))
 
 
-    def plot_all_inputs(self):
+    def plot_all_inputs(self, **kwargs):
         nrows = math.ceil(math.sqrt(len(self.fields)))
         fig, axes = plt.subplots(nrows=int(nrows), ncols=int(nrows),
                                  figsize=(3*nrows, 3*nrows),
                                  gridspec_kw=dict(wspace=0.4, hspace=0.4))
         for i in range(len(self.fields)):
-            self.plot_input(i, ax=axes.reshape(-1)[i])
+            self.plot_input(i, ax=axes.reshape(-1)[i], **kwargs)
         return save_show(plt, fig, os.path.join(self.project_dir, "all_inputs.pdf"))
 
 
@@ -1939,7 +2024,10 @@ class ClassificationProjectRNN(ClassificationProject):
                 flat_channel = flat_input
             combined = concatenate(rnn_channels+[flat_channel])
             for node_count, dropout_fraction in zip(self.nodes, self.dropout):
-                combined = Dense(node_count, activation=self.activation_function)(combined)
+                extra_opts = dict()
+                if self.kernel_initializer is not None:
+                    extra_opts["kernel_initializer"] = getattr(keras.initializers, self.kernel_initializer)()
+                combined = Dense(node_count, activation=self.activation_function, **extra_opts)(combined)
                 if (dropout_fraction is not None) and (dropout_fraction > 0):
                     combined = Dropout(rate=dropout_fraction)(combined)
             combined = Dense(1, activation=self.activation_function_output)(combined)