Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • Eric.Schanet/KerasROOTClassification
  • Nikolai.Hartmann/KerasROOTClassification
2 results
Show changes
Commits on Source (25)
...@@ -22,6 +22,7 @@ def overlay_ROC(filename, *projects, **kwargs): ...@@ -22,6 +22,7 @@ def overlay_ROC(filename, *projects, **kwargs):
threshold_log = kwargs.pop("threshold_log", True) threshold_log = kwargs.pop("threshold_log", True)
lumifactor = kwargs.pop("lumifactor", None) lumifactor = kwargs.pop("lumifactor", None)
tight_layout = kwargs.pop("tight_layout", False) tight_layout = kwargs.pop("tight_layout", False)
show_auc = kwargs.pop("show_auc", True)
if kwargs: if kwargs:
raise KeyError("Unknown kwargs: {}".format(kwargs)) raise KeyError("Unknown kwargs: {}".format(kwargs))
...@@ -43,7 +44,7 @@ def overlay_ROC(filename, *projects, **kwargs): ...@@ -43,7 +44,7 @@ def overlay_ROC(filename, *projects, **kwargs):
colors = prop_cycle.by_key()['color'] colors = prop_cycle.by_key()['color']
for p, color in zip(projects, colors): for p, color in zip(projects, colors):
fpr, tpr, threshold = roc_curve(p.y_test, p.scores_test, sample_weight = p.w_test) fpr, tpr, threshold = roc_curve(p.l_test, p.scores_test, sample_weight = p.w_test)
fpr = 1.0 - fpr fpr = 1.0 - fpr
try: try:
roc_auc = auc(tpr, fpr) roc_auc = auc(tpr, fpr)
...@@ -52,12 +53,16 @@ def overlay_ROC(filename, *projects, **kwargs): ...@@ -52,12 +53,16 @@ def overlay_ROC(filename, *projects, **kwargs):
roc_auc = auc(tpr, fpr, reorder=True) roc_auc = auc(tpr, fpr, reorder=True)
ax.grid(color='gray', linestyle='--', linewidth=1) ax.grid(color='gray', linestyle='--', linewidth=1)
ax.plot(tpr, fpr, label=str(p.name+" (AUC = {:.3f})".format(roc_auc)), color=color) if show_auc:
label = str(p.name+" (AUC = {:.3f})".format(roc_auc))
else:
label = p.name
ax.plot(tpr, fpr, label=label, color=color)
if plot_thresholds: if plot_thresholds:
ax2.plot(tpr, threshold, "--", color=color) ax2.plot(tpr, threshold, "--", color=color)
if lumifactor is not None: if lumifactor is not None:
sumw_b = p.w_test[p.y_test==0].sum()*lumifactor sumw_b = p.w_test[p.l_test==0].sum()*lumifactor
sumw_s = p.w_test[p.y_test==1].sum()*lumifactor sumw_s = p.w_test[p.l_test==1].sum()*lumifactor
ax_abs_b.plot(tpr, (1.-fpr)*sumw_b, alpha=0) ax_abs_b.plot(tpr, (1.-fpr)*sumw_b, alpha=0)
ax_abs_b.invert_yaxis() ax_abs_b.invert_yaxis()
ax_abs_s.plot(tpr*sumw_s, fpr, alpha=0) ax_abs_s.plot(tpr*sumw_s, fpr, alpha=0)
......
...@@ -7,13 +7,24 @@ from keras.layers import GRU ...@@ -7,13 +7,24 @@ from keras.layers import GRU
from KerasROOTClassification import ClassificationProject, ClassificationProjectRNN from KerasROOTClassification import ClassificationProject, ClassificationProjectRNN
def create_dataset(path): def create_dataset(path):
# create example dataset with (low-weighted) noise added
X, y = make_classification(n_samples=10000, random_state=1) X, y = make_classification(n_samples=10000, random_state=1)
X2 = np.random.normal(size=20*10000).reshape(-1, 20) X2 = np.random.normal(size=20*10000).reshape(-1, 20)
y2 = np.concatenate([np.zeros(5000), np.ones(5000)]) y2 = np.concatenate([np.zeros(5000), np.ones(5000)])
X = np.concatenate([X, X2]) X = np.concatenate([X, X2])
y = np.concatenate([y, y2]) y = np.concatenate([y, y2])
w = np.concatenate([np.ones(10000), 0.01*np.ones(10000)]) w = np.concatenate([np.ones(10000), 0.01*np.ones(10000)])
# shift and scale randomly (to check if transformation is working)
shift = np.random.rand(20)*100
scale = np.random.rand(20)*1000
X *= scale
X += shift
# write to root files
branches = ["var_{}".format(i) for i in range(len(X[0]))] branches = ["var_{}".format(i) for i in range(len(X[0]))]
df = pd.DataFrame(X, columns=branches) df = pd.DataFrame(X, columns=branches)
df["class"] = y df["class"] = y
...@@ -40,7 +51,10 @@ def test_ClassificationProject(tmp_path): ...@@ -40,7 +51,10 @@ def test_ClassificationProject(tmp_path):
layers=3, layers=3,
nodes=128, nodes=128,
) )
c.train(epochs=200) c.train(epochs=200)
c.plot_all_inputs()
c.plot_loss()
assert min(c.history.history["val_loss"]) < 0.18 assert min(c.history.history["val_loss"]) < 0.18
...@@ -71,4 +85,6 @@ def test_ClassificationProjectRNN(tmp_path): ...@@ -71,4 +85,6 @@ def test_ClassificationProjectRNN(tmp_path):
) )
assert sum([isinstance(layer, GRU) for layer in c.model.layers]) == 2 assert sum([isinstance(layer, GRU) for layer in c.model.layers]) == 2
c.train(epochs=200) c.train(epochs=200)
c.plot_all_inputs()
c.plot_loss()
assert min(c.history.history["val_loss"]) < 0.18 assert min(c.history.history["val_loss"]) < 0.18
This diff is collapsed.
...@@ -197,14 +197,26 @@ def weighted_quantile(values, quantiles, sample_weight=None, values_sorted=False ...@@ -197,14 +197,26 @@ def weighted_quantile(values, quantiles, sample_weight=None, values_sorted=False
class WeightedRobustScaler(RobustScaler): class WeightedRobustScaler(RobustScaler):
def fit(self, X, y=None, weights=None): def fit(self, X, y=None, weights=None, mask_value=None):
if not np.isnan(X).any(): if not np.isnan(X).any() and mask_value is not None and weights is None:
# these checks don't work for nan values # these checks don't work for nan values
super(WeightedRobustScaler, self).fit(X, y) return super(WeightedRobustScaler, self).fit(X, y)
if weights is None:
return self
else: else:
wqs = np.array([weighted_quantile(X[:,i][~np.isnan(X[:,i])], [0.25, 0.5, 0.75], sample_weight=weights) for i in range(X.shape[1])]) if weights is None:
weights = np.ones(len(self.X))
wqs = []
for i in range(X.shape[1]):
mask = ~np.isnan(X[:,i])
if mask_value is not None:
mask &= (X[:,i] != mask_value)
wqs.append(
weighted_quantile(
X[:,i][mask],
[0.25, 0.5, 0.75],
sample_weight=weights[mask]
)
)
wqs = np.array(wqs)
self.center_ = wqs[:,1] self.center_ = wqs[:,1]
self.scale_ = wqs[:,2]-wqs[:,0] self.scale_ = wqs[:,2]-wqs[:,0]
self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False) self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False)
......