Skip to content
Snippets Groups Projects
Commit a396d67b authored by Nikolai's avatar Nikolai
Browse files

introduce option to rename branch expressions

parent 96a77543
No related branches found
No related tags found
No related merge requests found
......@@ -346,24 +346,24 @@ if __name__ == "__main__":
print("Mean signal: ")
for branch_index, val in enumerate(mean_signal):
print("{:>20}: {:<10.3f}".format(c.branches[branch_index], val))
print("{:>20}: {:<10.3f}".format(c.fields[branch_index], val))
plot_NN_vs_var_1D("met.pdf", mean_signal,
scorefun=c.evaluate,
var_index=c.branches.index("met"),
var_index=c.fields.index("met"),
var_range=(0, 1000, 10),
var_label="met [GeV]")
plot_NN_vs_var_1D("mt.pdf", mean_signal,
scorefun=c.evaluate,
var_index=c.branches.index("mt"),
var_index=c.fields.index("mt"),
var_range=(0, 500, 10),
var_label="mt [GeV]")
plot_NN_vs_var_2D("mt_vs_met.pdf", means=mean_signal,
scorefun=c.evaluate,
varx_index=c.branches.index("met"),
vary_index=c.branches.index("mt"),
varx_index=c.fields.index("met"),
vary_index=c.fields.index("mt"),
nbinsx=100, xmin=0, xmax=1000,
nbinsy=100, ymin=0, ymax=500,
varx_label="met [GeV]", vary_label="mt [GeV]")
......@@ -371,14 +371,14 @@ if __name__ == "__main__":
plot_NN_vs_var_2D_all("mt_vs_met_all.pdf", means=mean_signal,
model=c.model, transform_function=c.scaler.transform,
var1_index=c.branches.index("met"), var1_range=(0, 1000, 10),
var2_index=c.branches.index("mt"), var2_range=(0, 500, 10),
var1_index=c.fields.index("met"), var1_range=(0, 1000, 10),
var2_index=c.fields.index("mt"), var2_range=(0, 500, 10),
var1_label="met [GeV]", var2_label="mt [GeV]")
plot_NN_vs_var_2D("mt_vs_met_crosscheck.pdf", means=mean_signal,
scorefun=get_single_neuron_function(c.model, layer=3, neuron=0, scaler=c.scaler),
varx_index=c.branches.index("met"),
vary_index=c.branches.index("mt"),
varx_index=c.fields.index("met"),
vary_index=c.fields.index("mt"),
nbinsx=100, xmin=0, xmax=1000,
nbinsy=100, ymin=0, ymax=500,
varx_label="met [GeV]", vary_label="mt [GeV]")
......@@ -388,7 +388,7 @@ if __name__ == "__main__":
# transformed events
c.load(reload=True)
ranges = [np.percentile(c.x_test[:,var_index], [1,99]) for var_index in range(len(c.branches))]
ranges = [np.percentile(c.x_test[:,var_index], [1,99]) for var_index in range(len(c.fields))]
losses, events = get_max_activation_events(c.model, ranges, ntries=100000, layer=3, neuron=0, threshold=0.2)
......@@ -396,8 +396,8 @@ if __name__ == "__main__":
plot_hist_2D_events(
"mt_vs_met_actmaxhist.pdf",
events[:,c.branches.index("met")],
events[:,c.branches.index("mt")],
events[:,c.fields.index("met")],
events[:,c.fields.index("mt")],
100, 0, 1000,
100, 0, 500,
varx_label="met [GeV]", vary_label="mt [GeV]",
......@@ -405,7 +405,7 @@ if __name__ == "__main__":
plot_hist_2D_events(
"mt_vs_output_actmax.pdf",
events[:,c.branches.index("mt")],
events[:,c.fields.index("mt")],
losses,
100, 0, 500,
100, 0, 1,
......@@ -417,13 +417,13 @@ if __name__ == "__main__":
def test_cond_max_act():
c.load(reload=True)
ranges = [np.percentile(c.x_test[:,var_index], [1,99]) for var_index in range(len(c.branches))]
ranges = [np.percentile(c.x_test[:,var_index], [1,99]) for var_index in range(len(c.fields))]
plot_cond_avg_actmax_2D(
"mt_vs_met_cond_actmax.pdf",
c.model, 3, 0, ranges,
c.branches.index("met"),
c.branches.index("mt"),
c.fields.index("met"),
c.fields.index("mt"),
30, 0, 1000,
30, 0, 500,
scaler=c.scaler,
......@@ -439,7 +439,7 @@ if __name__ == "__main__":
plot_hist_2D_events(
"mt_vs_output_signal_test.pdf",
utrf_x_test[c.y_test==1][:,c.branches.index("mt")],
utrf_x_test[c.y_test==1][:,c.fields.index("mt")],
c.scores_test[c.y_test==1].reshape(-1),
100, 0, 1000,
100, 0, 1,
......@@ -449,8 +449,8 @@ if __name__ == "__main__":
plot_hist_2D_events(
"mt_vs_met_signal.pdf",
utrf_x_test[c.y_test==1][:,c.branches.index("met")],
utrf_x_test[c.y_test==1][:,c.branches.index("mt")],
utrf_x_test[c.y_test==1][:,c.fields.index("met")],
utrf_x_test[c.y_test==1][:,c.fields.index("mt")],
100, 0, 1000,
100, 0, 500,
varx_label="met [GeV]",
......@@ -460,8 +460,8 @@ if __name__ == "__main__":
plot_hist_2D_events(
"mt_vs_met_backgound.pdf",
utrf_x_test[c.y_test==0][:,c.branches.index("met")],
utrf_x_test[c.y_test==0][:,c.branches.index("mt")],
utrf_x_test[c.y_test==0][:,c.fields.index("met")],
utrf_x_test[c.y_test==0][:,c.fields.index("mt")],
100, 0, 1000,
100, 0, 500,
varx_label="met [GeV]",
......@@ -472,7 +472,7 @@ if __name__ == "__main__":
# plot_hist_2D_events(
# "apl_vs_output_actmax.pdf",
# events[:,c.branches.index("LepAplanarity")],
# events[:,c.fields.index("LepAplanarity")],
# losses,
# 100, 0, 0.1,
# 100, 0, 1,
......@@ -487,8 +487,8 @@ if __name__ == "__main__":
plot_profile_2D(
"mt_vs_met_profilemean_sig.pdf",
utrf_x_test[c.y_test==1][:,c.branches.index("met")],
utrf_x_test[c.y_test==1][:,c.branches.index("mt")],
utrf_x_test[c.y_test==1][:,c.fields.index("met")],
utrf_x_test[c.y_test==1][:,c.fields.index("mt")],
c.scores_test[c.y_test==1].reshape(-1),
20, 0, 500,
20, 0, 1000,
......@@ -497,8 +497,8 @@ if __name__ == "__main__":
plot_profile_2D(
"mt_vs_met_profilemax_sig.pdf",
utrf_x_test[c.y_test==1][:,c.branches.index("met")],
utrf_x_test[c.y_test==1][:,c.branches.index("mt")],
utrf_x_test[c.y_test==1][:,c.fields.index("met")],
utrf_x_test[c.y_test==1][:,c.fields.index("mt")],
c.scores_test[c.y_test==1].reshape(-1),
20, 0, 500,
20, 0, 1000,
......
......@@ -55,9 +55,9 @@ neuron = args.neuron
if layer is None:
layer = c.layers
varx_index = c.branches.index(args.varx)
varx_index = c.fields.index(args.varx)
if not plot_vs_activation:
vary_index = c.branches.index(args.vary)
vary_index = c.fields.index(args.vary)
else:
vary_index = 0 # dummy value in this case
......@@ -151,7 +151,7 @@ elif args.mode.startswith("hist"):
else:
# ranges in which to sample the random events
x_test_scaled = c.scaler.transform(c.x_test)
ranges = [np.percentile(x_test_scaled[:,var_index], [1,99]) for var_index in range(len(c.branches))]
ranges = [np.percentile(x_test_scaled[:,var_index], [1,99]) for var_index in range(len(c.fields))]
losses, events = get_max_activation_events(c.model, ranges, ntries=args.ntries_actmax, step=args.step_size, layer=layer, neuron=neuron, threshold=args.threshold)
events = c.scaler.inverse_transform(events)
valsx = events[:,varx_index]
......@@ -176,7 +176,7 @@ elif args.mode.startswith("cond_actmax"):
x_test_scaled = c.scaler.transform(c.x_test)
# ranges in which to sample the random events
ranges = [np.percentile(x_test_scaled[:,var_index], [1,99]) for var_index in range(len(c.branches))]
ranges = [np.percentile(x_test_scaled[:,var_index], [1,99]) for var_index in range(len(c.fields))]
plot_cond_avg_actmax_2D(
args.output_filename,
......
......@@ -90,6 +90,8 @@ class ClassificationProject(object):
:param branches: list of branch names or expressions to be used as input values for training
:param rename_branches: dictionary that maps branch expressions to names for better readability
:param weight_expr: expression to weight the events in the loss function
:param data_dir: if given, load the data from a previous project with the given name
......@@ -186,6 +188,7 @@ class ClassificationProject(object):
def _init_from_args(self, name,
signal_trees, bkg_trees, branches, weight_expr,
rename_branches=None,
project_dir=None,
data_dir=None,
identifiers=None,
......@@ -214,6 +217,9 @@ class ClassificationProject(object):
self.signal_trees = signal_trees
self.bkg_trees = bkg_trees
self.branches = branches
if rename_branches is None:
rename_branches = {}
self.rename_branches = rename_branches
self.weight_expr = weight_expr
self.selection = selection
......@@ -294,6 +300,27 @@ class ClassificationProject(object):
# track if we are currently training
self.is_training = False
self._fields = None
@property
def fields(self):
"Renamed branch expressions"
if self._fields is None:
self._fields = []
for branch_expr in self.branches:
self._fields.append(self.rename_branches.get(branch_expr, branch_expr))
return self._fields
def rename_fields(self, ar):
"Rename fields of structured array"
fields = list(ar.dtype.names)
renamed_fields = []
for old_name in fields:
renamed_fields.append(self.rename_branches.get(old_name, old_name))
ar.dtype.names = tuple(renamed_fields)
def _load_data(self):
......@@ -330,20 +357,25 @@ class ClassificationProject(object):
selection=self.selection,
start=1, step=self.step_bkg)
self.rename_fields(self.s_train)
self.rename_fields(self.b_train)
self.rename_fields(self.s_test)
self.rename_fields(self.b_test)
self.s_eventlist_train = self.s_train[self.identifiers].astype(dtype=[(branchName, "u8") for branchName in self.identifiers])
self.b_eventlist_train = self.b_train[self.identifiers].astype(dtype=[(branchName, "u8") for branchName in self.identifiers])
self._dump_training_list()
# now we don't need the identifiers anymore
self.s_train = self.s_train[self.branches+[self.weight_expr]]
self.b_train = self.b_train[self.branches+[self.weight_expr]]
self.s_train = self.s_train[self.fields+[self.weight_expr]]
self.b_train = self.b_train[self.fields+[self.weight_expr]]
# create x (input), y (target) and w (weights) arrays
# the first block will be signals, the second block backgrounds
self.x_train = rec2array(self.s_train[self.branches])
self.x_train = np.concatenate((self.x_train, rec2array(self.b_train[self.branches])))
self.x_test = rec2array(self.s_test[self.branches])
self.x_test = np.concatenate((self.x_test, rec2array(self.b_test[self.branches])))
self.x_train = rec2array(self.s_train[self.fields])
self.x_train = np.concatenate((self.x_train, rec2array(self.b_train[self.fields])))
self.x_test = rec2array(self.s_test[self.fields])
self.x_test = np.concatenate((self.x_test, rec2array(self.b_test[self.fields])))
self.w_train = self.s_train[self.weight_expr]
self.w_train = np.concatenate((self.w_train, self.b_train[self.weight_expr]))
self.w_test = self.s_test[self.weight_expr]
......@@ -551,7 +583,7 @@ class ClassificationProject(object):
self._model = Sequential()
# first hidden layer
self._model.add(Dense(self.nodes, input_dim=len(self.branches), activation=self.activation_function))
self._model.add(Dense(self.nodes, input_dim=len(self.fields), activation=self.activation_function))
# the other hidden layers
for layer_number in range(self.layers-1):
self._model.add(Dense(self.nodes, activation=self.activation_function))
......@@ -704,7 +736,7 @@ class ClassificationProject(object):
self.load()
for branch_index, branch in enumerate(self.branches):
for branch_index, branch in enumerate(self.fields):
self.plot_input(branch_index)
self.total_epochs = self._read_info("epochs", 0)
......@@ -795,9 +827,9 @@ class ClassificationProject(object):
logger.info("Evaluating score for entry {}/{}".format(start, entries))
logger.debug("Loading next batch")
x_from_tree = tree2array(tree,
branches=self.branches+self.identifiers,
branches=self.fields+self.identifiers,
start=start, stop=start+batch_size)
x_eval = rec2array(x_from_tree[self.branches])
x_eval = rec2array(x_from_tree[self.fields])
if len(self.identifiers) > 0:
# create list of booleans that indicate which events where used for training
......@@ -854,7 +886,7 @@ class ClassificationProject(object):
def plot_input(self, var_index):
"plot a single input variable"
branch = self.branches[var_index]
branch = self.fields[var_index]
fig, ax = plt.subplots()
bkg = self.x_train[:,var_index][self.y_train == 0]
sig = self.x_train[:,var_index][self.y_train == 1]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment