Skip to content
Snippets Groups Projects
Commit 9ca36850 authored by Nikolai.Hartmann's avatar Nikolai.Hartmann
Browse files

initial commit

parents
No related branches found
No related tags found
No related merge requests found
outputs/
#!/usr/bin/env python
import os
from root_numpy import tree2array, rec2array
import numpy as np
import pandas as pd
import h5py
import ROOT
class KerasROOTClassification:
def __init__(self, name,
signal_trees, bkg_trees, branches, weight_expr, identifiers,
layers=3, nodes=64, out_dir="./outputs"):
self.name = name
self.signal_trees = signal_trees
self.bkg_trees = bkg_trees
self.branches = branches
self.weight_expr = weight_expr
self.identifiers = identifiers
self.layers = layers
self.nodes = nodes
self.out_dir = out_dir
self.project_dir = os.path.join(self.out_dir, name)
if not os.path.exists(self.out_dir):
os.mkdir(self.out_dir)
if not os.path.exists(self.project_dir):
os.mkdir(self.project_dir)
self.s_train = None
self.b_train = None
self.s_test = None
self.b_test = None
self.x_train = None
self.x_test = None
self.y_train = None
self.y_test = None
self.s_eventlist_train = None
self.b_eventlist_train = None
def load_data(self):
# Read signal and background trees into structured numpy arrays
signal_chain = ROOT.TChain()
bkg_chain = ROOT.TChain()
for filename, treename in self.signal_trees:
signal_chain.AddFile(filename, -1, treename)
for filename, treename in self.bkg_trees:
bkg_chain.AddFile(filename, -1, treename)
self.s_train = tree2array(signal_chain, branches=self.branches+[self.weight_expr]+self.identifiers, start=0, step=2)
self.b_train = tree2array(bkg_chain, branches=self.branches+[self.weight_expr]+self.identifiers, start=0, step=2)
self.s_test = tree2array(signal_chain, branches=self.branches+[self.weight_expr], start=1, step=2)
self.b_test = tree2array(bkg_chain, branches=self.branches+[self.weight_expr], start=1, step=2)
self._dump_training_list()
self.s_eventlist_train = self.s_train[self.identifiers]
self.b_eventlist_train = self.b_train[self.identifiers]
# now we don't need the identifiers anymore
self.s_train = self.s_train[self.branches+[self.weight_expr]]
self.b_train = self.b_train[self.branches+[self.weight_expr]]
# create x (input), y (target) and w (weights) arrays
# the first block will be signals, the second block backgrounds
self.x_train = rec2array(self.s_train[self.branches])
self.x_train = np.concatenate((self.x_train, rec2array(self.b_train[self.branches])))
self.x_test = rec2array(self.s_test[self.branches])
self.x_test = np.concatenate((self.x_test, rec2array(self.b_test[self.branches])))
self.w_train = self.s_train[self.weight_expr]
self.w_train = np.concatenate((self.w_train, self.b_train[self.weight_expr]))
self.w_test = self.s_test[self.weight_expr]
self.w_test = np.concatenate((self.w_test, self.b_test[self.weight_expr]))
self._dump_to_hdf5()
def _dump_training_list(self):
s_eventlist = pd.DataFrame(self.s_train[self.identifiers])
b_eventlist = pd.DataFrame(self.b_train[self.identifiers])
s_eventlist.to_csv(os.path.join(self.project_dir, "s_eventlist_train.csv"))
s_eventlist.to_csv(os.path.join(self.project_dir, "b_eventlist_train.csv"))
def _dump_to_hdf5(self):
for dataset_name in ["x_train", "x_test"]:
with h5py.File(os.path.join(self.project_dir, dataset_name+".h5"), "w") as hf:
hf.create_dataset(dataset_name, data=getattr(self, dataset_name))
def _load_from_hdf5(self):
dataset_names = ["x_train", "x_test"]
# example:
with h5py.File("x_test.h5") as hf:
self.x_test = hf["x_test"][:]
def train(self):
pass
def evaluate(self):
pass
def writeFriendTree(self):
pass
def plotROC(self):
pass
def plotScore(self):
pass
if __name__ == "__main__":
filename = "/project/etp4/nhartmann/trees/allTrees_m1.8_NoSys.root"
c = KerasROOTClassification("test",
signal_trees = [(filename, "GG_oneStep_1705_1105_505_NoSys")],
bkg_trees = [(filename, "ttbar_NoSys"),
(filename, "wjets_Sherpa221_NoSys")
],
branches = ["met", "mt"],
weight_expr = "eventWeight*genWeight",
identifiers = ["DatasetNumber", "EventNumber"])
c.load_data()
print(c.x_train)
print(len(c.x_train))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment