From cebf5f267f5761f3cbec094ce5ffa1c377f1d761 Mon Sep 17 00:00:00 2001
From: Nikolai <osterei33@gmx.de>
Date: Fri, 27 Jul 2018 10:14:06 +0200
Subject: [PATCH] switch to np.digitize for histogram error calculation

---
 toolkit.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/toolkit.py b/toolkit.py
index 9a0a445..8d36861 100755
--- a/toolkit.py
+++ b/toolkit.py
@@ -828,13 +828,19 @@ class ClassificationProject(object):
         hist, bins = np.histogram(x, **np_kwargs)
         centers = (bins[:-1] + bins[1:]) / 2
         if "weights" in np_kwargs:
-            errors = []
-            for left, right in zip(bins, bins[1:]):
-                indices = np.where((x >= left) & (x < right))[0]
-                sumw2 = np.sum(np_kwargs["weights"][indices]**2)
-                content = np.sum(np_kwargs["weights"][indices])
-                errors.append(math.sqrt(sumw2)/content)
-            errors = np.array(errors)
+            bin_indices = np.digitize(x, bins)
+            sumw2 = np.array([np.sum(np_kwargs["weights"][bin_indices==i]**2)
+                              for i in range(1, len(bins)+1)])
+            sumw = np.array([np.sum(np_kwargs["weights"][bin_indices==i])
+                             for i in range(1, len(bins)+1)])
+            # move overflow to last bin
+            # (since thats what np.histogram gives us)
+            sumw2[-2] += sumw2[-1]
+            sumw2 = sumw2[:-1]
+            sumw[-2] += sumw[-1]
+            sumw = sumw[:-1]
+            # calculate relative error
+            errors = np.sqrt(sumw2)/sumw
         else:
             errors = np.sqrt(hist)/hist
         if scale_factor is not None:
-- 
GitLab