diff --git a/plotting.py b/plotting.py index cdcde3f5e763874e4e0905a47591347f2b3dc612..101fad04491a37b376b67b6876234ad00f64632f 100644 --- a/plotting.py +++ b/plotting.py @@ -20,9 +20,9 @@ logger.addHandler(logging.NullHandler()) Some further plotting functions """ -def save_show(plt, fig, filename): +def save_show(plt, fig, filename, **kwargs): "Save a figure and show it in case we are in ipython or jupyter notebook." - fig.savefig(filename) + fig.savefig(filename, **kwargs) try: get_ipython plt.show() @@ -234,6 +234,109 @@ def plot_NN_vs_var_2D_all(plotname, model, means, save_show(plt, fig, plotname, bbox_inches='tight') +def plot_profile_2D_all(plotname, model, events, + valsx, valsy, + nbinsx, xmin, xmax, + nbinsy, ymin, ymax, + transform_function=None, + varx_label=None, + vary_label=None, + zrange=None, logz=False, + plot_last_layer=False, + log_default_ymin=1e-5, + cmap="inferno", **kwargs): + + "Similar to plot_profile_2D, but creates a grid of plots for all neurons." + + # transform + if transform_function is not None: + events = transform_function(events) + + logger.info("Reading activations for all neurons") + acts = get_activations(model, events, print_shape_only=True) + logger.info("Done") + + if plot_last_layer: + n_neurons = [len(i[0]) for i in acts] + else: + n_neurons = [len(i[0]) for i in acts[:-1]] + layers = len(n_neurons) + + nrows_ncols = (layers, max(n_neurons)) + fig = plt.figure(1, figsize=nrows_ncols) + grid = ImageGrid(fig, 111, nrows_ncols=nrows_ncols[::-1], axes_pad=0, + label_mode="1", + aspect=False, + cbar_location="top", + cbar_mode="single", + cbar_pad=.2, + cbar_size="5%",) + grid_array = np.array(grid) + grid_array = grid_array.reshape(*nrows_ncols[::-1]) + + global_min = None + global_max = None + + logger.info("Creating profile histograms") + ims = [] + reg_plots = [] + for layer in range(layers): + for neuron in range(len(acts[layer][0])): + acts_neuron = acts[layer][:,neuron] + ax = grid_array[neuron][layer] + extra_opts = {} + if not (plot_last_layer and layer == layers-1): + # for hidden layers, plot the same z-scale + if logz: + norm = matplotlib.colors.LogNorm + else: + norm = matplotlib.colors.Normalize + if zrange is not None: + extra_opts["norm"] = norm(vmin=zrange[0], vmax=zrange[1]) + else: + extra_opts["norm"] = norm(vmin=global_min, vmax=global_max) + hist, xedges, yedges = get_profile_2D( + valsx, valsy, acts_neuron, + nbinsx, xmin, xmax, + nbinsy, ymin, ymax, + **kwargs + ) + if global_min is None or hist.min() < global_min: + global_min = hist.min() + if global_max is None or hist.max() > global_max: + global_max = hist.max() + X, Y = np.meshgrid(xedges, yedges) + reg_plots.append((layer, neuron, ax, (X, Y, hist), dict(cmap="inferno", linewidth=0, rasterized=True, **extra_opts))) + logger.info("Done") + + logger.info("global_min: {}".format(global_min)) + logger.info("global_max: {}".format(global_max)) + + if global_min <= 0 and logz: + global_min = log_default_ymin + logger.info("Changing global_min to {}".format(log_default_ymin)) + + for layer, neuron, ax, args, kwargs in reg_plots: + if zrange is None: + kwargs["norm"].vmin = global_min + kwargs["norm"].vmax = global_max + im = ax.pcolormesh(*args, **kwargs) + ax.set_facecolor("black") + if varx_label is not None: + ax.set_xlabel(varx_label) + if vary_label is not None: + ax.set_ylabel(vary_label) + ax.text(0., 0.5, "{}, {}".format(layer, neuron), transform=ax.transAxes, color="white") + + cb = fig.colorbar(im, cax=grid[0].cax, orientation="horizontal") + cb.ax.xaxis.set_ticks_position('top') + cb.ax.xaxis.set_label_position('top') + + logger.info("Rendering") + save_show(plt, fig, plotname, bbox_inches='tight') + logger.info("Done") + + def plot_hist_2D(plotname, xedges, yedges, hist, varx_label=None, vary_label=None, log=False, zlabel="# of events"): X, Y = np.meshgrid(xedges, yedges) diff --git a/scripts/generate_actmax.py b/scripts/generate_actmax.py new file mode 100755 index 0000000000000000000000000000000000000000..21db4e4a09ff5c7424b19370abfbd30f23221ff6 --- /dev/null +++ b/scripts/generate_actmax.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python + +import sys, argparse, re, random + +parser = argparse.ArgumentParser(description="generate events that maximise the activation for a given neuron") +parser.add_argument("input_project") +parser.add_argument("output_file") +parser.add_argument("-n", "--nevents", default=100000, type=int) +parser.add_argument("-j", "--mask-jets", action="store_true", + help="mask variables called jet*Pt/Eta/Phi and generate a random uniform distribution of the number of jets (only nescessary for non-recurrent NN)") +args = parser.parse_args() + +import logging +logging.basicConfig() +logging.getLogger().setLevel(logging.INFO) + +import h5py +import numpy as np + +from KerasROOTClassification.utils import ( + weighted_quantile, + get_max_activation_events, + create_random_event, + get_ranges +) +from KerasROOTClassification import load_from_dir +import meme + +meme.setOptions(deactivated=True) + +input_project = args.input_project +output_file = args.output_file + +c = load_from_dir(input_project) + +ranges, mask_probs = get_ranges(c.transform(c.x_train), [0.01, 0.99], c.w_train_tot, mask_value=c.mask_value, max_evts=10000) + +def mask_uniform(x, mask_value, recurrent_field_idx): + """ + Mask recurrent fields with a random (uniform) number of objects. Works in place. + """ + for rec_idx in recurrent_field_idx: + for evt in x: + masked = False + nobj = int(random.random()*(rec_idx.shape[1]+1)) + for obj_number, line_idx in enumerate(rec_idx.reshape(*rec_idx.shape[1:])): + if obj_number == nobj: + masked=True + if masked: + evt[line_idx] = mask_value + +def get_input_flat(x): + return x[0].reshape(-1, len(c.fields)) + + +if args.mask_jets: + jet_fields = {} + for field_name in c.fields: + if any(field_name.startswith("jet") and field_name.endswith(suffix) for suffix in ["Pt", "Eta", "Phi"]): + jet_number = re.findall("[0-9]+", field_name)[0] + if not jet_number in jet_fields: + jet_fields[jet_number] = [] + jet_fields[jet_number].append(c.fields.index(field_name)) + jet_fields = [np.array([[v for k, v in sorted(jet_fields.items(), key=lambda l:l[0])]])] + + +def input_transform(x): + x = np.array(x) + if hasattr(c, "mask_uniform"): + c.mask_uniform(x) + return c.get_input_list(x) + elif args.mask_jets: + mask_uniform(x, c.mask_value, jet_fields) + return x + + +opt_kwargs = dict() +if hasattr(c, "mask_uniform"): + opt_kwargs["input_transform"] = input_transform + opt_kwargs["input_inverse_transform"] = c.get_input_flat +if args.mask_jets: + opt_kwargs["input_transform"] = input_transform + opt_kwargs["input_inverse_transform"] = get_input_flat + + +evts = get_max_activation_events( + c.model, ranges, + ntries=args.nevents, + layer=len(c.model.layers)-1, + neuron=0, + maxit=10, + seed=45, + threshold=0, + **opt_kwargs +) + +with h5py.File(output_file, "w") as f: + f.create_dataset("actmax", data=evts[1])