From 4f02d77d75bea898ee2c786c3de0692df502e00a Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <Nikolai.Hartmann@physik.uni-muenchen.de>
Date: Wed, 22 Aug 2018 11:21:00 +0200
Subject: [PATCH] adding functionality to mask values in random events

---
 toolkit.py | 16 ++++++++++++++++
 utils.py   | 11 +++++++++--
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/toolkit.py b/toolkit.py
index c3040d8..95fdcbd 100755
--- a/toolkit.py
+++ b/toolkit.py
@@ -1710,6 +1710,22 @@ class ClassificationProjectRNN(ClassificationProject):
         self.checkpoint_model()
 
 
+    def clean_mask(self, x):
+        """
+        Mask recurrent fields such that once a masked value occurs,
+        all values corresponding to the same and following objects are
+        masked as well. Works in place.
+        """
+        for recurrent_field_idx in self.recurrent_field_idx:
+            for evt in x:
+                masked = False
+                for line_idx in recurrent_field_idx.reshape(*recurrent_field_idx.shape[1:]):
+                    if (evt[line_idx] == self.mask_value).any():
+                        masked=True
+                    if masked:
+                        evt[line_idx] = self.mask_value
+
+
     def get_input_list(self, x):
         "Format the input starting from flat ntuple"
         x_input = []
diff --git a/utils.py b/utils.py
index f3323c4..029f76e 100644
--- a/utils.py
+++ b/utils.py
@@ -33,22 +33,29 @@ def get_single_neuron_function(model, layer, neuron, input_transform=None):
     return eval_single_neuron
 
 
-def create_random_event(ranges):
+def create_random_event(ranges, mask_probs=None, mask_value=None):
     random_event = np.array([p[0]+(p[1]-p[0])*np.random.rand() for p in ranges])
     random_event = random_event.reshape(-1, len(random_event))
+    # if given, mask values with a certain probability
+    if mask_probs is not None:
+        for var_index, mask_prob in enumerate(mask_probs):
+            random_event[:,var_index][np.random.rand(len(random_event)) < mask_prob] = mask_value
     return random_event
 
 
 def get_ranges(x, quantiles, weights, mask_value=None, filter_index=None):
     "Get ranges for plotting or random event generation based on quantiles"
     ranges = []
+    mask_probs = []
     for var_index in range(x.shape[1]):
         if (filter_index is not None) and (var_index != filter_index):
             continue
         x_var = x[:,var_index]
         not_masked = np.where(x_var != mask_value)[0]
+        masked = np.where(x_var == mask_value)[0]
         ranges.append(weighted_quantile(x_var[not_masked], quantiles, sample_weight=weights[not_masked]))
-    return ranges
+        mask_probs.append(float(len(masked))/float(len(x_var)))
+    return ranges, mask_probs
 
 
 def max_activation_wrt_input(gradient_function, random_event, threshold=None, maxthreshold=None, maxit=100, step=1, const_indices=[],
-- 
GitLab