From cebf5f267f5761f3cbec094ce5ffa1c377f1d761 Mon Sep 17 00:00:00 2001 From: Nikolai <osterei33@gmx.de> Date: Fri, 27 Jul 2018 10:14:06 +0200 Subject: [PATCH] switch to np.digitize for histogram error calculation --- toolkit.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/toolkit.py b/toolkit.py index 9a0a445..8d36861 100755 --- a/toolkit.py +++ b/toolkit.py @@ -828,13 +828,19 @@ class ClassificationProject(object): hist, bins = np.histogram(x, **np_kwargs) centers = (bins[:-1] + bins[1:]) / 2 if "weights" in np_kwargs: - errors = [] - for left, right in zip(bins, bins[1:]): - indices = np.where((x >= left) & (x < right))[0] - sumw2 = np.sum(np_kwargs["weights"][indices]**2) - content = np.sum(np_kwargs["weights"][indices]) - errors.append(math.sqrt(sumw2)/content) - errors = np.array(errors) + bin_indices = np.digitize(x, bins) + sumw2 = np.array([np.sum(np_kwargs["weights"][bin_indices==i]**2) + for i in range(1, len(bins)+1)]) + sumw = np.array([np.sum(np_kwargs["weights"][bin_indices==i]) + for i in range(1, len(bins)+1)]) + # move overflow to last bin + # (since thats what np.histogram gives us) + sumw2[-2] += sumw2[-1] + sumw2 = sumw2[:-1] + sumw[-2] += sumw[-1] + sumw = sumw[:-1] + # calculate relative error + errors = np.sqrt(sumw2)/sumw else: errors = np.sqrt(hist)/hist if scale_factor is not None: -- GitLab