Skip to content
Snippets Groups Projects
Commit 43f12959 authored by Nikolai's avatar Nikolai
Browse files

Merge branch 'dev-friend'

parents 1b3e3fc3 d58f19db
No related branches found
No related tags found
No related merge requests found
...@@ -4,3 +4,4 @@ run.py ...@@ -4,3 +4,4 @@ run.py
*.swp *.swp
*.pyc *.pyc
*.pdf *.pdf
*.root
#!/usr/bin/env python
import argparse
import ROOT
parser = argparse.ArgumentParser(description='add a friend tree to a tree in another file')
parser.add_argument("infile", help="input file that contains the friend tree")
parser.add_argument("intree", help="name of the friend tree")
parser.add_argument("outfile", help="output file where the friend tree should be added")
parser.add_argument("outtree", help="name of the tree (in output file) to which the friend should be added")
args = parser.parse_args()
outfile = ROOT.TFile.Open(args.outfile, "UPDATE")
infile = ROOT.TFile.Open(args.infile)
for k in outfile.GetListOfKeys():
if k.GetName() == args.intree:
raise ValueError("Tree with name {} already exists in outputfile".format(args.intree))
outfile.cd()
outtree = outfile.Get(args.outtree)
if not outtree:
raise KeyError("Tree {} not found in file {}".format(args.outtree, args.outfile))
if outtree.GetListOfFriends():
for k in outtree.GetListOfFriends():
if k.GetName() == args.intree:
raise ValueError("Tree with name {} is already friend of {}".format(args.intree, args.outtree))
infile.cd()
intree = infile.Get(args.intree)
if not intree:
raise KeyError("Tree {} not found in file {}".format(args.intree, args.infile))
# Add friend and write friend tree and original tree to outfile
outfile.cd()
outtree.AddFriend(intree)
outtree.Write(outtree.GetName())
outfile.cd()
clonetree = intree.CloneTree(-1, "fast")
clonetree.Write(intree.GetName())
infile.Close()
outfile.Close()
...@@ -9,7 +9,7 @@ import logging ...@@ -9,7 +9,7 @@ import logging
logger = logging.getLogger("KerasROOTClassification") logger = logging.getLogger("KerasROOTClassification")
logger.addHandler(logging.NullHandler()) logger.addHandler(logging.NullHandler())
from root_numpy import tree2array, rec2array from root_numpy import tree2array, rec2array, array2root
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import h5py import h5py
...@@ -117,8 +117,8 @@ class KerasROOTClassification(object): ...@@ -117,8 +117,8 @@ class KerasROOTClassification(object):
self._scores_train = None self._scores_train = None
self._scores_test = None self._scores_test = None
self.s_eventlist_train = None self._s_eventlist_train = None
self.b_eventlist_train = None self._b_eventlist_train = None
self._scaler = None self._scaler = None
self._class_weight = None self._class_weight = None
...@@ -170,9 +170,9 @@ class KerasROOTClassification(object): ...@@ -170,9 +170,9 @@ class KerasROOTClassification(object):
selection=self.selection, selection=self.selection,
start=1, step=self.step_bkg) start=1, step=self.step_bkg)
self._dump_training_list()
self.s_eventlist_train = self.s_train[self.identifiers] self.s_eventlist_train = self.s_train[self.identifiers]
self.b_eventlist_train = self.b_train[self.identifiers] self.b_eventlist_train = self.b_train[self.identifiers]
self._dump_training_list()
# now we don't need the identifiers anymore # now we don't need the identifiers anymore
self.s_train = self.s_train[self.branches+[self.weight_expr]] self.s_train = self.s_train[self.branches+[self.weight_expr]]
...@@ -202,11 +202,37 @@ class KerasROOTClassification(object): ...@@ -202,11 +202,37 @@ class KerasROOTClassification(object):
def _dump_training_list(self): def _dump_training_list(self):
s_eventlist = pd.DataFrame(self.s_train[self.identifiers]) s_eventlist_df = pd.DataFrame(self.s_eventlist_train)
b_eventlist = pd.DataFrame(self.b_train[self.identifiers]) b_eventlist_df = pd.DataFrame(self.b_eventlist_train)
s_eventlist_df.to_csv(os.path.join(self.project_dir, "s_eventlist_train.csv"))
b_eventlist_df.to_csv(os.path.join(self.project_dir, "b_eventlist_train.csv"))
s_eventlist.to_csv(os.path.join(self.project_dir, "s_eventlist_train.csv"))
s_eventlist.to_csv(os.path.join(self.project_dir, "b_eventlist_train.csv")) @property
def s_eventlist_train(self):
if self._s_eventlist_train is None:
df = pd.read_csv(os.path.join(self.project_dir, "s_eventlist_train.csv"))
self._s_eventlist_train = df.to_records()[self.identifiers]
return self._s_eventlist_train
@s_eventlist_train.setter
def s_eventlist_train(self, value):
self._s_eventlist_train = value
@property
def b_eventlist_train(self):
if self._b_eventlist_train is None:
df = pd.read_csv(os.path.join(self.project_dir, "b_eventlist_train.csv"))
self._b_eventlist_train = df.to_records()[self.identifiers]
return self._b_eventlist_train
@b_eventlist_train.setter
def b_eventlist_train(self, value):
self._b_eventlist_train = value
def _dump_to_hdf5(self, *dataset_names): def _dump_to_hdf5(self, *dataset_names):
...@@ -435,10 +461,51 @@ class KerasROOTClassification(object): ...@@ -435,10 +461,51 @@ class KerasROOTClassification(object):
def evaluate(self): def evaluate(self, x_eval):
pass logger.debug("Evaluate score for {}".format(x_eval))
x_eval = self.scaler.transform(x_eval)
def write_friend_tree(self): logger.debug("Evaluate for transformed array: {}".format(x_eval))
return self.model.predict(x_eval)
def write_friend_tree(self, score_name,
source_filename, source_treename,
target_filename, target_treename,
batch_size=100000):
f = ROOT.TFile.Open(source_filename)
tree = f.Get(source_treename)
entries = tree.GetEntries()
if os.path.exists(target_filename):
raise IOError("{} already exists, if you want to recreate it, delete it first".format(target_filename))
for start in range(0, entries, batch_size):
logger.info("Evaluating score for entry {}/{}".format(start, entries))
logger.debug("Loading next batch")
x_from_tree = tree2array(tree,
branches=self.branches+self.identifiers,
start=start, stop=start+batch_size)
x_eval = rec2array(x_from_tree[self.branches])
# create list of booleans that indicate which events where used for training
df_identifiers = pd.DataFrame(x_from_tree[self.identifiers])
total_train_list = self.s_eventlist_train
total_train_list = np.concatenate((total_train_list, self.b_eventlist_train))
merged = df_identifiers.merge(pd.DataFrame(total_train_list), on=tuple(self.identifiers), indicator=True, how="left")
is_train = np.array(merged["_merge"] == "both")
# join scores and is_train array
scores = self.evaluate(x_eval).reshape(-1)
friend_df = pd.DataFrame(np.array(scores, dtype=[(score_name, np.float64)]))
friend_df["is_train"] = is_train
friend_tree = friend_df.to_records()[[score_name, "is_train"]]
if start == 0:
mode = "recreate"
else:
mode = "update"
logger.debug("Write to root file")
array2root(friend_tree, target_filename, treename=target_treename, mode=mode)
logger.debug("Done")
def write_all_friend_trees(self):
pass pass
...@@ -615,4 +682,13 @@ if __name__ == "__main__": ...@@ -615,4 +682,13 @@ if __name__ == "__main__":
c.plot_ROC() c.plot_ROC()
c.plot_loss() c.plot_loss()
c.plot_accuracy() c.plot_accuracy()
c.plot_weights()
c.write_friend_tree("test4_score",
source_filename=filename, source_treename="GG_oneStep_1705_1105_505_NoSys",
target_filename="friend.root", target_treename="test4_score")
np.random.seed(1234)
c.write_friend_tree("test4_score",
source_filename=filename, source_treename="ttbar_NoSys",
target_filename="friend_ttbar_NoSys.root", target_treename="test4_score")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment