diff --git a/compare.py b/compare.py index a399cc7a11f158555719e60db80b880efe2f037a..bcd2e87e525bd148fbc4c0bcbcd2d071f26b01c4 100755 --- a/compare.py +++ b/compare.py @@ -44,7 +44,7 @@ def overlay_ROC(filename, *projects, **kwargs): colors = prop_cycle.by_key()['color'] for p, color in zip(projects, colors): - fpr, tpr, threshold = roc_curve(p.y_test, p.scores_test, sample_weight = p.w_test) + fpr, tpr, threshold = roc_curve(p.l_test, p.scores_test, sample_weight = p.w_test) fpr = 1.0 - fpr try: roc_auc = auc(tpr, fpr) @@ -61,8 +61,8 @@ def overlay_ROC(filename, *projects, **kwargs): if plot_thresholds: ax2.plot(tpr, threshold, "--", color=color) if lumifactor is not None: - sumw_b = p.w_test[p.y_test==0].sum()*lumifactor - sumw_s = p.w_test[p.y_test==1].sum()*lumifactor + sumw_b = p.w_test[p.l_test==0].sum()*lumifactor + sumw_s = p.w_test[p.l_test==1].sum()*lumifactor ax_abs_b.plot(tpr, (1.-fpr)*sumw_b, alpha=0) ax_abs_b.invert_yaxis() ax_abs_s.plot(tpr*sumw_s, fpr, alpha=0) diff --git a/toolkit.py b/toolkit.py index 164b558f71872e9668f33b07cd0280634a93aa66..c311170e587cd44b99b97a3f53c1079dbaff1ae3 100755 --- a/toolkit.py +++ b/toolkit.py @@ -121,6 +121,8 @@ class ClassificationProject(object): :param branches: list of branch names or expressions to be used as input values for training + :param regression_branches: list of branch names to be used as regression targets + :param rename_branches: dictionary that maps branch expressions to names for better readability :param weight_expr: expression to weight the events in the loss function @@ -201,7 +203,9 @@ class ClassificationProject(object): the first time. This seed (increased by one) is used again before training when keras shuffling is used. - :param loss: loss function name + :param loss: loss function name (or list of names in case of regression targets) + + :param loss_weights: (optional) list of weights to weight the individual losses (for multiple targets) :param mask_value: value that is used for non-existent entries (e.g. 4th jet pt in events with 3 jets) @@ -249,6 +253,7 @@ class ClassificationProject(object): def _init_from_args(self, name, signal_trees, bkg_trees, branches, weight_expr, + regression_branches=None, rename_branches=None, project_dir=None, data_dir=None, @@ -282,6 +287,7 @@ class ClassificationProject(object): shuffle_seed=42, balance_dataset=False, loss='binary_crossentropy', + loss_weights=None, mask_value=None, apply_class_weight=True, normalize_weights=True, @@ -297,6 +303,9 @@ class ClassificationProject(object): if rename_branches is None: rename_branches = {} self.rename_branches = rename_branches + if regression_branches is None: + regression_branches = [] + self.regression_branches = regression_branches self.weight_expr = weight_expr self.selection = selection @@ -367,6 +376,10 @@ class ClassificationProject(object): self.shuffle_seed = shuffle_seed self.balance_dataset = balance_dataset self.loss = loss + self.loss_weights = loss_weights + if self.regression_branches and (not isinstance(self.loss, list)): + self.loss = [self.loss]+["mean_squared_error"]*len(self.regression_branches) + self.mask_value = mask_value self.apply_class_weight = apply_class_weight self.normalize_weights = normalize_weights @@ -395,6 +408,7 @@ class ClassificationProject(object): self._b_eventlist_train = None self._scaler = None + self._scaler_target = None self._class_weight = None self._balanced_class_weight = None self._model = None @@ -411,6 +425,7 @@ class ClassificationProject(object): self.is_training = False self._fields = None + self._target_fields = None @property @@ -423,6 +438,16 @@ class ClassificationProject(object): return self._fields + @property + def target_fields(self): + "Renamed branch expressions for regression targets" + if self._target_fields is None: + self._target_fields = [] + for branch_expr in self.regression_branches: + self._target_fields.append(self.rename_branches.get(branch_expr, branch_expr)) + return self._target_fields + + def rename_fields(self, ar): "Rename fields of structured array" fields = list(ar.dtype.names) @@ -453,19 +478,19 @@ class ClassificationProject(object): for filename, treename in self.bkg_trees: bkg_chain.AddFile(filename, -1, treename) self.s_train = tree2array(signal_chain, - branches=self.branches+[self.weight_expr]+self.identifiers, + branches=set(self.branches+self.regression_branches+[self.weight_expr]+self.identifiers), selection=self.selection, start=0, step=self.step_signal, stop=self.stop_train) self.b_train = tree2array(bkg_chain, - branches=self.branches+[self.weight_expr]+self.identifiers, + branches=set(self.branches+self.regression_branches+[self.weight_expr]+self.identifiers), selection=self.selection, start=0, step=self.step_bkg, stop=self.stop_train) self.s_test = tree2array(signal_chain, - branches=self.branches+[self.weight_expr], + branches=set(self.branches+self.regression_branches+[self.weight_expr]), selection=self.selection, start=1, step=self.step_signal, stop=self.stop_test) self.b_test = tree2array(bkg_chain, - branches=self.branches+[self.weight_expr], + branches=set(self.branches+self.regression_branches+[self.weight_expr]), selection=self.selection, start=1, step=self.step_bkg, stop=self.stop_test) @@ -482,19 +507,27 @@ class ClassificationProject(object): self.b_eventlist_train = self.b_train[self.identifiers].astype(dtype=[(branchName, "u8") for branchName in self.identifiers]) self._dump_training_list() - # now we don't need the identifiers anymore - self.s_train = self.s_train[self.fields+[self.weight_expr]] - self.b_train = self.b_train[self.fields+[self.weight_expr]] - # create x (input), y (target) and w (weights) arrays # the first block will be signals, the second block backgrounds self.x_train = rec2array(self.s_train[self.fields]) self.x_train = np.concatenate((self.x_train, rec2array(self.b_train[self.fields]))) self.w_train = self.s_train[self.weight_expr] self.w_train = np.concatenate((self.w_train, self.b_train[self.weight_expr])) - self.y_train = np.empty(len(self.x_train), dtype=np.bool) - self.y_train[:len(self.s_train)] = 1 - self.y_train[len(self.s_train):] = 0 + + def fill_target(x, s, b): + if not self.target_fields: + y = np.empty(len(x), dtype=np.bool) + y[:len(s)] = 1 + y[len(s):] = 0 + else: + y = np.empty((len(x), 1+len(self.target_fields)), dtype=np.float) + y[:len(s),0] = 1 + y[len(s):,0] = 0 + y[:len(s),1:] = rec2array(s[self.target_fields]) + y[len(s):,1:] = rec2array(b[self.target_fields]) + return y + + self.y_train = fill_target(self.x_train, self.s_train, self.b_train) self.b_train = None self.s_train = None @@ -502,9 +535,8 @@ class ClassificationProject(object): self.x_test = np.concatenate((self.x_test, rec2array(self.b_test[self.fields]))) self.w_test = self.s_test[self.weight_expr] self.w_test = np.concatenate((self.w_test, self.b_test[self.weight_expr])) - self.y_test = np.empty(len(self.x_test), dtype=np.bool) - self.y_test[:len(self.s_test)] = 1 - self.y_test[len(self.s_test):] = 0 + + self.y_test = fill_target(self.x_test, self.s_test, self.b_test) self.b_test = None self.s_test = None @@ -623,6 +655,39 @@ class ClassificationProject(object): return self._scaler + @property + def scaler_target(self): + "same as scaler, but for scaling regression targets" + # create the scaler (and fit to training data) if not existent + if self._scaler_target is None: + filename = os.path.join(self.project_dir, "scaler_target.pkl") + try: + self._scaler_target = joblib.load(filename) + logger.info("Loaded existing scaler from {}".format(filename)) + except IOError: + logger.info("Creating new {} for scaling the targets".format(self.scaler_type)) + scaler_fit_kwargs = dict() + if self.scaler_type == "StandardScaler": + self._scaler_target = StandardScaler() + elif self.scaler_type == "RobustScaler": + self._scaler_target = RobustScaler() + elif self.scaler_type == "WeightedRobustScaler": + self._scaler_target = WeightedRobustScaler() + scaler_fit_kwargs["weights"] = self.w_train_tot + else: + raise ValueError("Scaler type {} unknown".format(self.scaler_type)) + logger.info("Fitting {} to training data".format(self.scaler_type)) + orig_copy_setting = self.scaler.copy + self.scaler.copy = False + self._scaler_target.fit(self.y_train, **scaler_fit_kwargs) + # i don't want to scale the classification target here + self._scaler_target.center_[0] = 0. + self._scaler_target.scale_[0] = 1. + self.scaler.copy = orig_copy_setting + joblib.dump(self._scaler_target, filename) + return self._scaler_target + + def _batch_transform(self, x, fn, batch_size): "Transform array in batches, temporarily setting mask_values to nan" transformed = np.empty(x.shape, dtype=x.dtype) @@ -650,6 +715,24 @@ class ClassificationProject(object): return self.scaler.inverse_transform(x) + def transform_target(self, y, batch_size=10000): + if not self.target_fields: + return y + if self.mask_value is not None: + return self._batch_transform(y, self.scaler_target.transform, batch_size) + else: + return self.scaler_target.transform(y) + + + def inverse_transform_target(self, y, batch_size=10000): + if not self.target_fields: + return y + if self.mask_value is not None: + return self._batch_transform(y, self.scaler_target.inverse_transform, batch_size) + else: + return self.scaler_target.inverse_transform(y) + + @property def history(self): params_file = os.path.join(self.project_dir, "history_params.json") @@ -744,10 +827,24 @@ class ClassificationProject(object): if (dropout_fraction is not None) and (dropout_fraction > 0): hidden_layer = Dropout(rate=dropout_fraction)(hidden_layer) - # one output node for binary classification - output_layer = Dense(1, activation=self.activation_function_output)(hidden_layer) + # optional regression targets + extra_targets = [] + for target_field in self.target_fields: + extra_target = Dense(1, activation="linear", name="target_{}".format(target_field))(hidden_layer) + extra_targets.append(extra_target) - self._model = Model(inputs=[input_layer], outputs=[output_layer]) + if not self.target_fields: + # one output node for binary classification + output_layer = Dense(1, activation=self.activation_function_output)(hidden_layer) + outputs = [output_layer] + else: + # add another hidden layer on top of the regression targets and previous hidden layers + merge = concatenate([hidden_layer]+extra_targets) + hidden_layer2 = Dense(64, activation=self.activation_function)(merge) + output_class = Dense(1, activation=self.activation_function_output)(hidden_layer2) + outputs = [output_class]+extra_targets + + self._model = Model(inputs=[input_layer], outputs=outputs) self._compile_or_load_model() return self._model @@ -762,6 +859,7 @@ class ClassificationProject(object): np.random.seed(self.random_seed) self._model.compile(optimizer=optimizer, loss=self.loss, + loss_weights=self.loss_weights, weighted_metrics=['accuracy'] ) np.random.set_state(rn_state) @@ -792,8 +890,8 @@ class ClassificationProject(object): @property def class_weight(self): if self._class_weight is None: - sumw_bkg = np.sum(self.w_train[self.y_train == 0]) - sumw_sig = np.sum(self.w_train[self.y_train == 1]) + sumw_bkg = np.sum(self.w_train[self.l_train == 0]) + sumw_sig = np.sum(self.w_train[self.l_train == 1]) self._class_weight = [(sumw_sig+sumw_bkg)/(2*sumw_bkg), (sumw_sig+sumw_bkg)/(2*sumw_sig)] logger.debug("Calculated class_weight: {}".format(self._class_weight)) return self._class_weight @@ -808,11 +906,11 @@ class ClassificationProject(object): event with class weights """ if self._balanced_class_weight is None: - sumw_bkg = np.sum(self.w_train[self.y_train == 0]) - sumw_sig = np.sum(self.w_train[self.y_train == 1]) + sumw_bkg = np.sum(self.w_train[self.l_train == 0]) + sumw_sig = np.sum(self.w_train[self.l_train == 1]) # use sumw *per event* in this case - sumw_bkg /= len(self.w_train[self.y_train == 0]) - sumw_sig /= len(self.w_train[self.y_train == 1]) + sumw_bkg /= len(self.w_train[self.l_train == 0]) + sumw_sig /= len(self.w_train[self.l_train == 1]) self._balanced_class_weight = [(sumw_sig+sumw_bkg)/(2*sumw_bkg), (sumw_sig+sumw_bkg)/(2*sumw_sig)] logger.debug("Calculated balanced_class_weight: {}".format(self._balanced_class_weight)) return self._balanced_class_weight @@ -828,6 +926,24 @@ class ClassificationProject(object): self._load_data() + @property + def l_train(self): + "labels (in case y contains regression targets)" + if not self.target_fields: + return self.y_train + else: + return self.y_train[:,0] + + + @property + def l_test(self): + "labels (in case y contains regression targets)" + if not self.target_fields: + return self.y_test + else: + return self.y_test[:,0] + + @property def w_train_tot(self): "(sample weight * class weight), divided by mean" @@ -839,7 +955,7 @@ class ClassificationProject(object): raise ValueError("Data not loaded! can't calculate total weight") if self._w_train_tot is None: if self.apply_class_weight: - self._w_train_tot = self.w_train*np.array(class_weight)[self.y_train.astype(int)] + self._w_train_tot = self.w_train*np.array(class_weight)[self.l_train.astype(int)] else: self._w_train_tot = np.array(self.w_train) if self.normalize_weights: @@ -853,7 +969,9 @@ class ClassificationProject(object): idx = self.train_val_idx[1] x_val, y_val, w_val = self.x_train[idx], self.y_train[idx], self.w_train_tot[idx] x_val_input = self.get_input_list(self.transform(x_val)) - return x_val_input, y_val, w_val + y_val_output = self.get_output_list(self.transform_target(y_val)) + w_val_list = self.get_weight_list(w_val) + return x_val_input, y_val_output, w_val_list @property @@ -862,7 +980,9 @@ class ClassificationProject(object): idx = self.train_val_idx[0] x_train, y_train, w_train = self.x_train[idx], self.y_train[idx], self.w_train_tot[idx] x_train_input = self.get_input_list(self.transform(x_train)) - return x_train_input, y_train, w_train + y_train_output = self.get_output_list(self.transform_target(y_train)) + w_train_list = self.get_weight_list(w_train) + return x_train_input, y_train_output, w_train_list @property @@ -897,14 +1017,30 @@ class ClassificationProject(object): return x + def get_output_list(self, y): + "Split target vector column wise in case of regression targets" + if not self.target_fields: + return y + else: + return np.hsplit(y, len(self.target_fields)+1) + + + def get_weight_list(self, w): + "Repeat weight n times for regression targets" + if not self.target_fields: + return w + else: + return [w]*(len(self.target_fields)+1) + + def yield_batch(self): "Batch generator - optionally shuffle the indices after each epoch" x_train, y_train, w_train = self.x_train, self.y_train, self.w_train_tot train_idx = list(self.train_val_idx[0]) np.random.seed(self.shuffle_seed+1) logger.info("Generating training batches from {} signal and {} background events" - .format(len(np.where(self.y_train[train_idx]==1)[0]), - len(np.where(self.y_train[train_idx]==0)[0]))) + .format(len(np.where(self.l_train[train_idx]==1)[0]), + len(np.where(self.l_train[train_idx]==0)[0]))) while True: if self.shuffle: shuffled_idx = np.random.permutation(train_idx) @@ -915,7 +1051,9 @@ class ClassificationProject(object): y_batch = y_train[shuffled_idx[start:start+int(self.batch_size)]] w_batch = w_train[shuffled_idx[start:start+int(self.batch_size)]] x_input = self.get_input_list(self.transform(x_batch)) - yield (x_input, y_batch, w_batch) + y_output = self.get_output_list(self.transform_target(y_batch)) + w_list = self.get_weight_list(w_batch) + yield (x_input, y_output, w_list) def yield_single_class_batch(self, class_label): @@ -924,7 +1062,8 @@ class ClassificationProject(object): The weights are multiplied by balanced_class_weight. """ x_train, y_train, w_train = self.training_data - class_idx = np.where(y_train==class_label)[0] + l_train = y_train[:,0] if self.target_fields else y_train + class_idx = np.where(l_train==class_label)[0] while True: # shuffle the indices for this class label if self.shuffle: @@ -977,7 +1116,7 @@ class ClassificationProject(object): else: try: self.is_training = True - labels, label_counts = np.unique(self.y_train, return_counts=True) + labels, label_counts = np.unique(self.l_train, return_counts=True) logger.info("Training on balanced batches") # note: the batches have balanced_class_weight already applied self.model.fit_generator(self.yield_balanced_batch(), @@ -1014,20 +1153,6 @@ class ClassificationProject(object): self._write_info("epochs", self.total_epochs) - def evaluate_train_test(self, do_train=True, do_test=True, mode=None): - - if mode is not None: - self._write_info("scores_mode", mode) - - logger.info("Create/Update scores for train/test sample") - if do_test: - self.scores_test = self.predict(self.x_test, mode=mode).reshape(-1) - self._dump_to_hdf5("scores_test") - if do_train: - self.scores_train = self.predict(self.x_train, mode=mode).reshape(-1) - self._dump_to_hdf5("scores_train") - - def evaluate_train_test(self, do_train=True, do_test=True, batch_size=10000, mode=None): "Calculate scores for training and test sample" @@ -1040,12 +1165,15 @@ class ClassificationProject(object): setattr(self, "scores_"+data_name, np.empty(n_events)) for start in range(0, n_events, batch_size): stop = start+batch_size - getattr(self, "scores_"+data_name)[start:stop] = ( - self.predict( - self.get_input_list(self.transform(getattr(self, "x_"+data_name)[start:stop])), - mode=mode - ).reshape(-1) + outputs = self.predict( + self.get_input_list(self.transform(getattr(self, "x_"+data_name)[start:stop])), + mode=mode ) + if not self.target_fields: + scores_batch = outputs.reshape(-1) + else: + scores_batch = outputs[0].reshape(-1) + getattr(self, "scores_"+data_name)[start:stop] = scores_batch self._dump_to_hdf5("scores_"+data_name) if do_test: @@ -1094,6 +1222,7 @@ class ClassificationProject(object): batch_size=100000, score_mode=None, fixed_params=None): + "TODO: doesn't work for regression targets" f = ROOT.TFile.Open(source_filename) tree = f.Get(source_treename) entries = tree.GetEntries() @@ -1179,10 +1308,10 @@ class ClassificationProject(object): fig = None if not from_training_batches: - bkg = self.x_train[:,var_index][self.y_train == 0] - sig = self.x_train[:,var_index][self.y_train == 1] - bkg_weights = self.w_train_tot[self.y_train == 0] - sig_weights = self.w_train_tot[self.y_train == 1] + bkg = self.x_train[:,var_index][self.l_train == 0] + sig = self.x_train[:,var_index][self.l_train == 1] + bkg_weights = self.w_train_tot[self.l_train == 0] + sig_weights = self.w_train_tot[self.l_train == 1] else: bkg = None sig = None @@ -1195,6 +1324,8 @@ class ClassificationProject(object): for i_batch, (x, y, w) in enumerate(self.yield_batch()): if i_batch > n_batches: break + if self.target_fields: + y = y[0] bkg_batch = x[:,var_index][y==0] sig_batch = x[:,var_index][y==1] bkg_weights_batch = w[y==0] @@ -1287,8 +1418,8 @@ class ClassificationProject(object): def plot_weights(self, bins=100, range=None): fig, ax = plt.subplots() - bkg = self.w_train_tot[self.y_train == 0] - sig = self.w_train_tot[self.y_train == 1] + bkg = self.w_train_tot[self.l_train == 0] + sig = self.w_train_tot[self.l_train == 1] ax.hist(bkg, bins=bins, range=range, color="b", alpha=0.5) ax.set_yscale("log") save_show(plt, fig, os.path.join(self.project_dir, "eventweights_bkg.pdf")) @@ -1306,8 +1437,8 @@ class ClassificationProject(object): ax.grid(color='gray', linestyle='--', linewidth=1) for y, scores, weight, label in [ - (self.y_train, self.scores_train, self.w_train, "train"), - (self.y_test, self.scores_test, self.w_test, "test") + (self.l_train, self.scores_train, self.w_train, "train"), + (self.l_test, self.scores_test, self.w_test, "test") ]: fpr, tpr, threshold = roc_curve(y, scores, sample_weight = weight) fpr = 1.0 - fpr # background rejection @@ -1340,10 +1471,10 @@ class ClassificationProject(object): trf = lambda y : y fig, ax = plt.subplots() for scores, weights, y, class_label, fn, opts in [ - (self.scores_train, self.w_train, self.y_train, 1, ax.bar, dict(color="r", label="signal train")), - (self.scores_train, self.w_train, self.y_train, 0, ax.bar, dict(color="b", label="background train")), - (self.scores_test, self.w_test, self.y_test, 1, ax.errorbar, dict(fmt="ro", label="signal test")), - (self.scores_test, self.w_test, self.y_test, 0, ax.errorbar, dict(fmt="bo", label="background test")), + (self.scores_train, self.w_train, self.l_train, 1, ax.bar, dict(color="r", label="signal train")), + (self.scores_train, self.w_train, self.l_train, 0, ax.bar, dict(color="b", label="background train")), + (self.scores_test, self.w_test, self.l_test, 1, ax.errorbar, dict(fmt="ro", label="signal test")), + (self.scores_test, self.w_test, self.l_test, 0, ax.errorbar, dict(fmt="bo", label="background test")), ]: weights = weights[y==class_label] if apply_class_weight is True and (lumifactor is not None): @@ -1396,16 +1527,16 @@ class ClassificationProject(object): else: trf = lambda y : y - centers_sig_train, hist_sig_train, rel_errors_sig_train = self.get_bin_centered_hist(trf(self.scores_train[self.y_train==1].reshape(-1)), weights=self.w_train[self.y_train==1], **plot_opts) - centers_bkg_train, hist_bkg_train, rel_errors_bkg_train = self.get_bin_centered_hist(trf(self.scores_train[self.y_train==0].reshape(-1)), weights=self.w_train[self.y_train==0], **plot_opts) - centers_sig_test, hist_sig_test, rel_errors_sig_test = self.get_bin_centered_hist(trf(self.scores_test[self.y_test==1].reshape(-1)), weights=self.w_test[self.y_test==1], **plot_opts) - centers_bkg_test, hist_bkg_test, rel_errors_bkg_test = self.get_bin_centered_hist(trf(self.scores_test[self.y_test==0].reshape(-1)), weights=self.w_test[self.y_test==0], **plot_opts) + centers_sig_train, hist_sig_train, rel_errors_sig_train = self.get_bin_centered_hist(trf(self.scores_train[self.l_train==1].reshape(-1)), weights=self.w_train[self.l_train==1], **plot_opts) + centers_bkg_train, hist_bkg_train, rel_errors_bkg_train = self.get_bin_centered_hist(trf(self.scores_train[self.l_train==0].reshape(-1)), weights=self.w_train[self.l_train==0], **plot_opts) + centers_sig_test, hist_sig_test, rel_errors_sig_test = self.get_bin_centered_hist(trf(self.scores_test[self.l_test==1].reshape(-1)), weights=self.w_test[self.l_test==1], **plot_opts) + centers_bkg_test, hist_bkg_test, rel_errors_bkg_test = self.get_bin_centered_hist(trf(self.scores_test[self.l_test==0].reshape(-1)), weights=self.w_test[self.l_test==0], **plot_opts) significances_train = [] significances_test = [] - for hist_sig, hist_bkg, rel_errors_sig, rel_errors_bkg, significances, w, y in [ - (hist_sig_train, hist_bkg_train, rel_errors_sig_train, rel_errors_bkg_train, significances_train, self.w_train, self.y_train), - (hist_sig_test, hist_bkg_test, rel_errors_sig_test, rel_errors_bkg_test, significances_test, self.w_test, self.y_test), + for hist_sig, hist_bkg, rel_errors_sig, rel_errors_bkg, significances in [ + (hist_sig_train, hist_bkg_train, rel_errors_sig_train, rel_errors_bkg_train, significances_train), + (hist_sig_test, hist_bkg_test, rel_errors_sig_test, rel_errors_bkg_test, significances_test), ]: # factor to rescale due to using only a fraction of events (training and test samples) # normfactor_sig = (np.sum(self.w_train[self.y_train==1])+np.sum(self.w_test[self.y_test==1]))/np.sum(w[y==1]) @@ -1494,8 +1625,8 @@ class ClassificationProject(object): prop_cycle = plt.rcParams['axes.prop_cycle'] colors = prop_cycle.by_key()['color'] for (scores, y, w, label), col in zip( - [(self.scores_train, self.y_train, self.w_train, "train"), - (self.scores_test, self.y_test, self.w_test, "test")], + [(self.scores_train, self.l_train, self.w_train, "train"), + (self.scores_test, self.l_test, self.w_test, "test")], colors ): scores = trf(scores) @@ -1617,7 +1748,7 @@ class ClassificationProject(object): df = pd.DataFrame(np.concatenate([self.x_train, self.x_test]), columns=self.fields) df["weight"] = np.concatenate([self.w_train, self.w_test]) df["labels"] = pd.Categorical.from_codes( - np.concatenate([self.y_train, self.y_test]), + np.concatenate([self.l_train, self.l_test]), categories=["background", "signal"] ) for identifier in self.identifiers: @@ -1883,7 +2014,14 @@ class ClassificationProjectRNN(ClassificationProject): if (dropout_fraction is not None) and (dropout_fraction > 0): combined = Dropout(rate=dropout_fraction)(combined) combined = Dense(1, activation=self.activation_function_output)(combined) - self._model = Model(inputs=rnn_inputs+[flat_input], outputs=combined) + outputs = [combined] + + # optional regression targets + for target_field in self.target_fields: + extra_target = Dense(1, activation="linear", name="target_{}".format(target_field))(combined) + outputs.append(extra_target) + + self._model = Model(inputs=rnn_inputs+[flat_input], outputs=outputs) self._compile_or_load_model() return self._model