diff --git a/toolkit.py b/toolkit.py index 99a23a5b76615a548f6d2d4e6a079582504b3d6c..ec09631b541986c761c610ad3eebb02184ca4893 100755 --- a/toolkit.py +++ b/toolkit.py @@ -42,6 +42,60 @@ import ROOT class KerasROOTClassification(object): + """Simple framework to load data from ROOT TTrees and train Keras + neural networks for classification according to some global settings. + + See the `Keras documentation <https://keras.io>` for further information + + All needed data that is created is stored in a project dir and can + be used again later without the need to be recreated. + + :param name: Name of the project - this will also be the name of + the project directory in the output dir. If no further arguments + are given, this argument is interpreted as a directory name, from + which a previously created project should be initialised + + :param signal_trees: list of tuples (filename, treename) for the data that should be used as signal + + :param bkg_trees: list of tuples (filename, treename) for the data that should be used as background + + :param branches: list of branch names or expressions to be used as input values for training + + :param weight_expr: expression to weight the events in the loss function + + :param identifiers: list of branches or expressions that uniquely + identify events. This is used to store the list of training + events, such that they can be marked later on, for example when + creating friend trees with output score + + :param selection: selection expression that events have to fulfill to be considered for training + + :param layers: number of layers in the neural network + + :param nodes: number of nodes in each layer + + :param batch_size: size of the training batches + + :param validation_split: split off this fraction of training events for loss evaluation + + :param activation_function: activation function in the hidden layers + + :param out_dir: base directory in which the project directories should be stored + + :param scaler_type: sklearn scaler class name to transform the data before training (options: "StandardScaler", "RobustScaler") + + :param step_signal: step size when selecting signal training events (e.g. 2 means take every second event) + + :param step_bkg: step size when selecting background training events (e.g. 2 means take every second event) + + :param optimizer: name of optimizer class in keras.optimizers + + :param optimizer_opts: dictionary of options for the optimizer + + :param earlystopping_opts: options for the keras EarlyStopping callback + + """ + # Datasets that are stored to (and dynamically loaded from) hdf5 dataset_names = ["x_train", "x_test", "y_train", "y_test", "w_train", "w_test", "scores_train", "scores_test"]