Skip to content
Snippets Groups Projects
Commit 9f5de8d3 authored by Nikolai.Hartmann's avatar Nikolai.Hartmann
Browse files

adding script to write ROOT trees for parametrized learning

parent ab3785ad
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python
"""
Write new TTrees with signal parameters as branches. For the
backgrounds the parameters are generated following the total
distribution for all signals. The discrete values for the whole ntuple
of signal parameters are counted, such that correlations between
signal parameters are taken into account.
"""
import argparse, re, os
import ROOT
from root_numpy import list_trees
from root_pandas import read_root
import numpy as np
if __name__ == "__main__":
input_filename = "/project/etp4/nhartmann/trees/allTrees_m1.8_NoSys.root"
output_filename = "/project/etp4/nhartmann/trees/allTrees_m1.8_NoSys_parametrized.root"
param_names = ["mg", "mc", "mn"]
param_match = "GG_oneStep_(.*?)_(.*?)_(.*?)_NoSys"
output_signal_treename = "GG_oneStep_NoSys"
bkg_trees = [
"diboson_Sherpa221_NoSys",
"singletop_NoSys",
"ttbar_NoSys",
"ttv_NoSys",
"wjets_Sherpa221_NoSys",
"zjets_Sherpa221_NoSys",
]
# read in the number of events for each combination of parameters
f = ROOT.TFile.Open(input_filename)
count_dict = {}
for key in f.GetListOfKeys():
tree_name = key.GetName()
match = re.match(param_match, tree_name)
if match is not None:
tree = f.Get(tree_name)
params = tuple([float(i) for i in match.groups()])
if not params in count_dict:
count_dict[params] = 0
count_dict[params] += tree.GetEntries()
f.Close()
# calculate cumulative sum of counts to sample signal parameters for background from
numbers = np.array(count_dict.keys(), dtype=np.float)
counts = np.array(count_dict.values(), dtype=np.float)
probs = counts/counts.sum()
prob_bins = np.cumsum(probs)
# read and write the rest in chunks
if os.path.exists(output_filename):
os.remove(output_filename)
for tree_name in list_trees(input_filename):
match_signal = re.match(param_match, tree_name)
if match_signal is not None or tree_name in bkg_trees:
print("Writing {}".format(tree_name))
nwritten = 0
for df in read_root(input_filename, tree_name, chunksize=100000):
print("Writing event {}".format(nwritten))
if match_signal is None:
rnd = np.random.random(len(df))
rnd_idx = np.digitize(rnd, prob_bins)
param_values = numbers[rnd_idx]
for param_idx, param_name in enumerate(param_names):
df[param_name] = param_values[:,param_idx]
else:
for param_name, param_value in zip(param_names, match_signal.groups()):
df[param_name] = float(param_value)
if match_signal is None:
out_tree_name = tree_name
else:
out_tree_name = output_signal_treename
df.to_root(output_filename, mode="a", key=out_tree_name)
nwritten += len(df)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment