Skip to content
Snippets Groups Projects
Commit afce6631 authored by Thomas Weber's avatar Thomas Weber
Browse files

Added new genetic algorithm, might be better

parent d6b7647e
No related branches found
No related tags found
No related merge requests found
...@@ -4,3 +4,4 @@ run.py ...@@ -4,3 +4,4 @@ run.py
*.swp *.swp
*.pyc *.pyc
*.pdf *.pdf
*.txt
#!/usr/bin/env python
""" Class that keeps track of all genomes trained so far, and their scores.
Among other things, ensures that genomes are unique.
"""
import random
import logging
from genome import Genome
class AllGenomes(object):
"""Store all genomes
"""
def __init__(self, firstgenome):
"""Initialize
"""
self.population = []
self.population.append(firstgenome)
def add_genome(self, genome):
"""Add the genome to our population.
"""
for i in range(0,len(self.population)):
if (genome.hash == self.population[i].hash):
logging.info("add_genome() ERROR: hash clash - duplicate genome")
return False
self.population.append(genome)
return True
def set_accuracy(self, genome):
"""Add the genome to our population.
"""
for i in range(0,len(self.population)):
if (genome.hash == self.population[i].hash):
self.population[i].accuracy = genome.accuracy
return
logging.info("set_accuracy() ERROR: Genome not found")
def is_duplicate(self, genome):
"""Add the genome to our population.
"""
for i in range(0,len(self.population)):
if (genome.hash == self.population[i].hash):
return True
return False
def print_all_genomes(self):
"""Print out a genome.
"""
for genome in self.population:
genome.print_genome_ma()
#!/usr/bin/env python
"""
Class that holds a genetic algorithm for evolving a network.
Inspiration:
http://lethain.com/genetic-algorithms-cool-name-damn-simple/
"""
from __future__ import print_function
import random
import logging
import copy
from functools import reduce
from operator import add
from genome import Genome
from idgen import IDgen
from allgenomes import AllGenomes
class Evolver(object):
"""Class that implements genetic algorithm."""
def __init__(self, all_possible_genes, retain=0.15, random_select=0.1, mutate_chance=0.3):
"""Create an optimizer.
Args:
all_possible_genes (dict): Possible genome parameters
retain (float): Percentage of population to retain after
each generation
random_select (float): Probability of a rejected genome
remaining in the population
mutate_chance (float): Probability a genome will be
randomly mutated
"""
self.all_possible_genes = all_possible_genes
self.retain = retain
self.random_select = random_select
self.mutate_chance = mutate_chance
#set the ID gen
self.ids = IDgen()
def create_population(self, count):
"""Create a population of random networks.
Args:
count (int): Number of networks to generate, aka the
size of the population
Returns:
(list): Population of network objects
"""
pop = []
i = 0
while i < count:
# Initialize a new genome.
genome = Genome( self.all_possible_genes, {}, self.ids.get_next_ID(), 0, 0, self.ids.get_Gen() )
# Set it to random parameters.
genome.set_genes_random()
if i == 0:
#this is where we will store all genomes
self.master = AllGenomes( genome )
else:
# Make sure it is unique....
while self.master.is_duplicate( genome ):
genome.mutate_one_gene()
# Add the genome to our population.
pop.append(genome)
# and add to the master list
if i > 0:
self.master.add_genome(genome)
i += 1
#self.master.print_all_genomes()
#exit()
return pop
@staticmethod
def fitness(genome):
"""Return the accuracy, which is our fitness function."""
return genome.accuracy
def grade(self, pop):
"""Find average fitness for a population.
Args:
pop (list): The population of networks/genome
Returns:
(float): The average accuracy of the population
"""
summed = reduce(add, (self.fitness(genome) for genome in pop))
return summed / float((len(pop)))
def breed(self, mom, dad):
"""Make two children from parental genes.
Args:
mother (dict): genome parameters
father (dict): genome parameters
Returns:
(list): Two network objects
"""
children = []
#where do we recombine? 0, 1, 2, 3, 4... N?
#with four genes, there are three choices for the recombination
# ___ * ___ * ___ * ___
#0 -> no recombination, and N == length of dictionary -> no recombination
#0 and 4 just (re)create more copies of the parents
#so the range is always 1 to len(all_possible_genes) - 1
pcl = len(self.all_possible_genes)
recomb_loc = random.randint(1,pcl - 1)
#for _ in range(2): #make _two_ children - could also make more
child1 = {}
child2 = {}
#enforce defined genome order using list
#keys = ['nb_neurons', 'nb_layers', 'activation', 'optimizer']
keys = list(self.all_possible_genes)
keys = sorted(keys) #paranoia - just to make sure we do not add unintentional randomization
#*** CORE RECOMBINATION CODE ****
for x in range(0, pcl):
if x < recomb_loc:
child1[keys[x]] = mom.geneparam[keys[x]]
child2[keys[x]] = dad.geneparam[keys[x]]
else:
child1[keys[x]] = dad.geneparam[keys[x]]
child2[keys[x]] = mom.geneparam[keys[x]]
# Initialize a new genome
# Set its parameters to those just determined
# they both have the same mom and dad
genome1 = Genome( self.all_possible_genes, child1, self.ids.get_next_ID(), mom.u_ID, dad.u_ID, self.ids.get_Gen() )
genome2 = Genome( self.all_possible_genes, child2, self.ids.get_next_ID(), mom.u_ID, dad.u_ID, self.ids.get_Gen() )
#at this point, there is zero guarantee that the genome is actually unique
# Randomly mutate one gene
if self.mutate_chance > random.random():
genome1.mutate_one_gene()
if self.mutate_chance > random.random():
genome2.mutate_one_gene()
#do we have a unique child or are we just retraining one we already have anyway?
while self.master.is_duplicate(genome1):
genome1.mutate_one_gene()
self.master.add_genome(genome1)
while self.master.is_duplicate(genome2):
genome2.mutate_one_gene()
self.master.add_genome(genome2)
children.append(genome1)
children.append(genome2)
return children
def evolve(self, pop):
"""Evolve a population of genomes.
Args:
pop (list): A list of genome parameters
Returns:
(list): The evolved population of networks
"""
#increase generation
self.ids.increase_Gen()
# Get scores for each genome
graded = [(self.fitness(genome), genome) for genome in pop]
#and use those scores to fill in the master list
for genome in pop:
self.master.set_accuracy(genome)
# Sort on the scores.
graded = [x[1] for x in sorted(graded, key=lambda x: x[0], reverse=True)]
# Get the number we want to keep unchanged for the next cycle.
retain_length = int(len(graded)*self.retain)
# In this first step, we keep the 'top' X percent (as defined in self.retain)
# We will not change them, except we will update the generation
new_generation = graded[:retain_length]
# For the lower scoring ones, randomly keep some anyway.
# This is wasteful, since we _know_ these are bad, so why keep rescoring them without modification?
# At least we should mutate them
for genome in graded[retain_length:]:
if self.random_select > random.random():
gtc = copy.deepcopy(genome)
while self.master.is_duplicate(gtc):
gtc.mutate_one_gene()
gtc.set_generation( self.ids.get_Gen() )
new_generation.append(gtc)
self.master.add_genome(gtc)
# Now find out how many spots we have left to fill.
ng_length = len(new_generation)
desired_length = len(pop) - ng_length
children = []
# Add children, which are bred from pairs of remaining (i.e. very high or lower scoring) genomes.
while len(children) < desired_length:
# Get a random mom and dad, but, need to make sure they are distinct
parents = random.sample(range(ng_length-1), k=2)
i_male = parents[0]
i_female = parents[1]
male = new_generation[i_male]
female = new_generation[i_female]
# Recombine and mutate
babies = self.breed(male, female)
# the babies are guaranteed to be novel
# Add the children one at a time.
for baby in babies:
# Don't grow larger than desired length.
#if len(children) < desired_length:
children.append(baby)
new_generation.extend(children)
return new_generation
#!/usr/bin/env python
"""The genome to be evolved."""
import random
import logging
import hashlib
import copy
from train import train_and_score
class Genome(object):
"""
Represents one genome and all relevant utility functions (add, mutate, etc.).
"""
def __init__( self, all_possible_genes = None, geneparam = {}, u_ID = 0, mom_ID = 0, dad_ID = 0, gen = 0 ):
"""Initialize a genome.
Args:
all_possible_genes (dict): Parameters for the genome, includes:
gene_nb_neurons (list): [64, 128, 256]
gene_nb_layers (list): [1, 2, 3, 4]
gene_activation (list): ['relu', 'elu']
gene_optimizer (list): ['rmsprop', 'adam']
"""
self.accuracy = 0.0
self.all_possible_genes = all_possible_genes
self.geneparam = geneparam #(dict): represents actual genome parameters
self.u_ID = u_ID
self.parents = [mom_ID, dad_ID]
self.generation = gen
#hash only makes sense when we have specified the genes
if not geneparam:
self.hash = 0
else:
self.update_hash()
def update_hash(self):
"""
Refesh each genome's unique hash - needs to run after any genome changes.
"""
# + str(self.geneparam['optimizer'])
genh = str(self.geneparam['nb_neurons']) + self.geneparam['activation'] \
+ str(self.geneparam['nb_layers']) \
+ str(self.geneparam['lr']) \
+ str(self.geneparam['decay']) \
+ str(self.geneparam['momentum'])
self.hash = hashlib.md5(genh.encode("UTF-8")).hexdigest()
self.accuracy = 0.0
def set_genes_random(self):
"""Create a random genome."""
#print("set_genes_random")
self.parents = [0,0] #very sad - no parents :(
for key in self.all_possible_genes:
self.geneparam[key] = random.choice(self.all_possible_genes[key])
self.update_hash()
def mutate_one_gene(self):
"""Randomly mutate one gene in the genome.
Args:
network (dict): The genome parameters to mutate
Returns:
(Genome): A randomly mutated genome object
"""
# Which gene shall we mutate? Choose one of N possible keys/genes.
gene_to_mutate = random.choice( list(self.all_possible_genes.keys()) )
# And then let's mutate one of the genes.
# Make sure that this actually creates mutation
current_value = self.geneparam[gene_to_mutate]
possible_choices = copy.deepcopy(self.all_possible_genes[gene_to_mutate])
possible_choices.remove(current_value)
self.geneparam[gene_to_mutate] = random.choice( possible_choices )
self.update_hash()
def set_generation(self, generation):
"""needed when a genome is passed on from one generation to the next.
the id stays the same, but the generation is increased"""
self.generation = generation
#logging.info("Setting Generation to %d" % self.generation)
def set_genes_to(self, geneparam, mom_ID, dad_ID):
"""Set genome properties.
this is used when breeding kids
Args:
genome (dict): The genome parameters
IMPROVE
"""
self.parents = [mom_ID, dad_ID]
self.geneparam = geneparam
self.update_hash()
def train(self):
"""Train the genome and record the accuracy.
Args:
"""
if self.accuracy == 0.0: #don't bother retraining ones we already trained
self.accuracy = train_and_score(self.geneparam)
def print_genome(self):
"""Print out a genome."""
logging.info(self.geneparam)
logging.info("Acc: %.2f%%" % (self.accuracy * 100))
logging.info("UniID: %d" % self.u_ID)
logging.info("Mom and Dad: %d %d" % (self.parents[0], self.parents[1]))
logging.info("Gen: %d" % self.generation)
logging.info("Hash: %s" % self.hash)
def print_genome_ma(self):
"""Print out a genome."""
logging.info(self.geneparam)
logging.info("Acc: %.2f%% UniID: %d Mom and Dad: %d %d Gen: %d" % (self.accuracy * 100, self.u_ID, self.parents[0], self.parents[1], self.generation))
logging.info("Hash: %s" % self.hash)
#!/usr/bin/env python
"""Provide unique genome IDs."""
import logging
class IDgen():
"""Generate unique IDs.
"""
def __init__(self):
"""Keep track of IDs.
"""
self.currentID = 0
self.currentGen = 1
def get_next_ID(self):
self.currentID += 1
return self.currentID
def increase_Gen(self):
self.currentGen += 1
def get_Gen(self):
return self.currentGen
This diff is collapsed.
#!/usr/bin/env python #!/usr/bin/env python
import logging """Entry point to evolving the neural network. Start here."""
from optimizer import Optimizer from __future__ import print_function
from evolver import Evolver
from tqdm import tqdm from tqdm import tqdm
import logging
import sys
# Setup logging. # Setup logging.
logging.basicConfig( logging.basicConfig(
format='%(asctime)s - %(levelname)s - %(message)s', format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%m/%d/%Y %I:%M:%S %p', datefmt='%m/%d/%Y %I:%M:%S %p',
level=logging.DEBUG, level=logging.INFO#,
filename='log.txt' filename='log.txt'
) )
def train_networks(networks): def train_genomes(genomes):
"""Train each network. """Train each genome.
Args: Args:
networks (list): Current population of networks networks (list): Current population of genomes
""" """
pbar = tqdm(total=len(networks)) logging.info("***train_networks(networks)***")
for network in networks:
network.train() pbar = tqdm(total=len(genomes))
for genome in genomes:
genome.train()
pbar.update(1) pbar.update(1)
pbar.close() pbar.close()
def get_average_accuracy(networks): def get_average_accuracy(genomes):
"""Get the average accuracy for a group of networks. """Get the average accuracy for a group of networks/genomes.
Args: Args:
networks (list): List of networks networks (list): List of networks/genomes
Returns: Returns:
float: The average accuracy of a population of networks. float: The average accuracy of a population of networks/genomes.
""" """
total_accuracy = 0 total_accuracy = 0
for network in networks:
total_accuracy += network.accuracy
return total_accuracy / len(networks) for genome in genomes:
total_accuracy += genome.accuracy
def generate(generations, population, nn_param_choices): return total_accuracy / len(genomes)
def generate(generations, population, all_possible_genes):
"""Generate a network with the genetic algorithm. """Generate a network with the genetic algorithm.
Args: Args:
generations (int): Number of times to evole the population generations (int): Number of times to evolve the population
population (int): Number of networks in each generation population (int): Number of networks in each generation
nn_param_choices (dict): Parameter choices for networks all_possible_genes (dict): Parameter choices for networks
""" """
optimizer = Optimizer(nn_param_choices) logging.info("***generate(generations, population, all_possible_genes)***")
networks = optimizer.create_population(population)
evolver = Evolver(all_possible_genes)
genomes = evolver.create_population(population)
# Evolve the generation. # Evolve the generation.
for i in range(generations): for i in range( generations ):
logging.info("***Doing generation %d of %d***" %
(i + 1, generations)) logging.info("***Now in generation %d of %d***" % (i + 1, generations))
# Train and get accuracy for networks. print_genomes(genomes)
train_networks(networks)
# Train and get accuracy for networks/genomes.
train_genomes(genomes)
# Get the average accuracy for this generation. # Get the average accuracy for this generation.
average_accuracy = get_average_accuracy(networks) average_accuracy = get_average_accuracy(genomes)
# Print out the average accuracy each generation. # Print out the average accuracy each generation.
logging.info("Generation average: %.2f%%" % (average_accuracy * 100)) logging.info("Generation average: %.2f%%" % (average_accuracy * 100))
logging.info('-'*80) logging.info('-'*80) #-----------
# Evolve, except on the last iteration. # Evolve, except on the last iteration.
if i != generations - 1: if i != generations - 1:
# Do the evolution. # Evolve!
networks = optimizer.evolve(networks) genomes = evolver.evolve(genomes)
# Sort our final population. # Sort our final population according to performance.
networks = sorted(networks, key=lambda x: x.accuracy, reverse=True) genomes = sorted(genomes, key=lambda x: x.accuracy, reverse=True)
# Print out the top 5 networks. # Print out the top 5 networks/genomes.
print_networks(networks[:5]) print_genomes(genomes[:5])
def print_networks(networks): #save_path = saver.save(sess, '/output/model.ckpt')
"""Print a list of networks. #print("Model saved in file: %s" % save_path)
def print_genomes(genomes):
"""Print a list of genomes.
Args: Args:
networks (list): The population of networks genomes (list): The population of networks/genomes
""" """
logging.info('-'*80) logging.info('-'*80)
for network in networks:
network.print_network() for genome in genomes:
genome.print_genome()
def main(): def main():
"""Evolve a network.""" """Evolve a genome."""
generations = 7 # Number of times to evolve the population. population = 20 # Number of networks/genomes in each generation.
population = 5 # Number of networks in each generation. #we only need to train the new ones....
generations = 2 # Number of times to evolve the population.
nn_param_choices = { all_possible_genes = {
'nb_neurons': [8, 16, 32, 64, 128, 256, 512, 768, 1024], 'nb_neurons': [8, 16, 32, 64, 128, 256, 512, 1024],
'nb_layers': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'nb_layers': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'activation': ['relu', 'elu', 'tanh', 'sigmoid'], 'activation': ['relu', 'elu', 'tanh', 'sigmoid', 'hard_sigmoid','softplus','linear'],
#'optimizer': ['rmsprop', 'adam', 'sgd', 'adagrad', #'optimizer': ['rmsprop', 'adam', 'sgd', 'adagrad','adadelta', 'adamax', 'nadam']
# 'adadelta', 'adamax', 'nadam'], 'lr': [0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 1.0, 10.0, 100.0],
#'optimizer_opts': {'lr': [0.1, 0.5, 1.0, 10.0, 100.0],
# 'decay': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7],
# 'momentum': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,
# 0.8, 0.9, 1.0]},
'lr': [0.1, 0.5, 1.0, 10.0, 100.0],
'decay': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7], 'decay': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7],
'momentum': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] 'momentum': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
} }
logging.info("***Evolving %d generations with population %d***" % print("***Evolving for %d generations with population size = %d***" % (generations, population))
(generations, population))
generate(generations, population, nn_param_choices) generate(generations, population, all_possible_genes)
if __name__ == '__main__': if __name__ == '__main__':
main() main()
#!/usr/bin/env python
import logging
from optimizer import Optimizer
from tqdm import tqdm
# Setup logging.
logging.basicConfig(
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%m/%d/%Y %I:%M:%S %p',
level=logging.DEBUG,
filename='log.txt'
)
def train_networks(networks):
"""Train each network.
Args:
networks (list): Current population of networks
"""
pbar = tqdm(total=len(networks))
for network in networks:
network.train()
pbar.update(1)
pbar.close()
def get_average_accuracy(networks):
"""Get the average accuracy for a group of networks.
Args:
networks (list): List of networks
Returns:
float: The average accuracy of a population of networks.
"""
total_accuracy = 0
for network in networks:
total_accuracy += network.accuracy
return total_accuracy / len(networks)
def generate(generations, population, nn_param_choices):
"""Generate a network with the genetic algorithm.
Args:
generations (int): Number of times to evole the population
population (int): Number of networks in each generation
nn_param_choices (dict): Parameter choices for networks
"""
optimizer = Optimizer(nn_param_choices)
networks = optimizer.create_population(population)
# Evolve the generation.
for i in range(generations):
logging.info("***Doing generation %d of %d***" %
(i + 1, generations))
# Train and get accuracy for networks.
train_networks(networks)
# Get the average accuracy for this generation.
average_accuracy = get_average_accuracy(networks)
# Print out the average accuracy each generation.
logging.info("Generation average: %.2f%%" % (average_accuracy * 100))
logging.info('-'*80)
# Evolve, except on the last iteration.
if i != generations - 1:
# Do the evolution.
networks = optimizer.evolve(networks)
# Sort our final population.
networks = sorted(networks, key=lambda x: x.accuracy, reverse=True)
# Print out the top 5 networks.
print_networks(networks[:5])
def print_networks(networks):
"""Print a list of networks.
Args:
networks (list): The population of networks
"""
logging.info('-'*80)
for network in networks:
network.print_network()
def main():
"""Evolve a network."""
generations = 7 # Number of times to evolve the population.
population = 5 # Number of networks in each generation.
nn_param_choices = {
'nb_neurons': [8, 16, 32, 64, 128, 256, 512, 768, 1024],
'nb_layers': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'activation': ['relu', 'elu', 'tanh', 'sigmoid'],
#'optimizer': ['rmsprop', 'adam', 'sgd', 'adagrad',
# 'adadelta', 'adamax', 'nadam'],
#'optimizer_opts': {'lr': [0.1, 0.5, 1.0, 10.0, 100.0],
# 'decay': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7],
# 'momentum': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,
# 0.8, 0.9, 1.0]},
'lr': [0.1, 0.5, 1.0, 10.0, 100.0],
'decay': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7],
'momentum': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
}
logging.info("***Evolving %d generations with population %d***" %
(generations, population))
generate(generations, population, nn_param_choices)
if __name__ == '__main__':
main()
File moved
File moved
#!/usr/bin/env python
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)
import toolkit
from toolkit import KerasROOTClassification
def init_model(geneparam):
nb_layers = geneparam['nb_layers']
nb_neurons = geneparam['nb_neurons']
activation = geneparam['activation']
optimizer = geneparam['optimizer']
#lr = network['lr']
#decay = network['decay']
#momentum = network['momentum']
filename = "/project/etp4/nhartmann/trees/allTrees_m1.8_NoSys.root"
c = KerasROOTClassification("",
signal_trees = [(filename, "GG_oneStep_1545_1265_985_NoSys")],
bkg_trees = [(filename, "ttbar_NoSys"),
(filename, "wjets_Sherpa221_NoSys"),
(filename, "zjets_Sherpa221_NoSys"),
(filename, "diboson_Sherpa221_NoSys"),
(filename, "ttv_NoSys"),
(filename, "singletop_NoSys")
],
dumping_enabled=False,
optimizer=optimizer,
layers=nb_layers,
nodes=nb_neurons,
activation_function=activation,
# optimizer_opts=dict(lr=lr, decay=decay,
# momentum=momentum),
earlystopping_opts=dict(monitor='val_loss',
min_delta=0, patience=2, verbose=0, mode='auto'),
# optimizer="Adam",
selection="lep1Pt<5000", # cut out a few very weird outliers
branches = ["met", "mt"],
weight_expr = "eventWeight*genWeight",
identifiers = ["DatasetNumber", "EventNumber"],
step_bkg = 100)
return c
def train_and_score(geneparam):
model = init_model(geneparam)
model.train(epochs=20)
score = model.score
return score[1] # 1 is accuracy. 0 is loss.
...@@ -7,15 +7,15 @@ sys.path.insert(0,parentdir) ...@@ -7,15 +7,15 @@ sys.path.insert(0,parentdir)
import toolkit import toolkit
from toolkit import KerasROOTClassification from toolkit import KerasROOTClassification
def init_model(network): def init_model(geneparam):
nb_layers = network['nb_layers'] nb_layers = geneparam['nb_layers']
nb_neurons = network['nb_neurons'] nb_neurons = geneparam['nb_neurons']
activation = network['activation'] activation = geneparam['activation']
# optimizer = network['optimizer'] #optimizer = geneparam['optimizer']
lr = network['lr'] lr = geneparam['lr']
decay = network['decay'] decay = geneparam['decay']
momentum = network['momentum'] momentum = geneparam['momentum']
filename = "/project/etp4/nhartmann/trees/allTrees_m1.8_NoSys.root" filename = "/project/etp4/nhartmann/trees/allTrees_m1.8_NoSys.root"
...@@ -28,6 +28,16 @@ def init_model(network): ...@@ -28,6 +28,16 @@ def init_model(network):
(filename, "ttv_NoSys"), (filename, "ttv_NoSys"),
(filename, "singletop_NoSys") (filename, "singletop_NoSys")
], ],
branches = ["jet1Pt", "jet1Phi==-999?0:jet1Phi", "jet1Eta==-999?0:jet1Eta",
"jet2Pt", "jet2Phi==-999?0:jet2Phi", "jet2Eta==-999?0:jet2Eta",
"jet3Pt", "jet3Phi==-999?0:jet3Phi", "jet3Eta==-999?0:jet3Eta",
"jet4Pt", "jet4Phi==-999?0:jet4Phi", "jet4Eta==-999?0:jet4Eta",
"jet5Pt", "jet5Phi==-999?0:jet5Phi", "jet5Eta==-999?0:jet5Eta",
"jet6Pt", "jet6Phi==-999?0:jet6Phi", "jet6Eta==-999?0:jet6Eta",
"jet7Pt", "jet7Phi==-999?0:jet7Phi", "jet7Eta==-999?0:jet7Eta",
"jet8Pt", "jet8Phi==-999?0:jet8Phi", "jet8Eta==-999?0:jet8Eta",
"lep1Pt", "lep1Phi", "lep1Eta", "nJet30",
"met", "met_Phi"],
dumping_enabled=False, dumping_enabled=False,
optimizer="SGD", optimizer="SGD",
layers=nb_layers, layers=nb_layers,
...@@ -37,19 +47,17 @@ def init_model(network): ...@@ -37,19 +47,17 @@ def init_model(network):
momentum=momentum), momentum=momentum),
earlystopping_opts=dict(monitor='val_loss', earlystopping_opts=dict(monitor='val_loss',
min_delta=0, patience=2, verbose=0, mode='auto'), min_delta=0, patience=2, verbose=0, mode='auto'),
# optimizer="Adam",
selection="lep1Pt<5000", # cut out a few very weird outliers selection="lep1Pt<5000", # cut out a few very weird outliers
branches = ["met", "mt"],
weight_expr = "eventWeight*genWeight", weight_expr = "eventWeight*genWeight",
identifiers = ["DatasetNumber", "EventNumber"], identifiers = ["DatasetNumber", "EventNumber"],
step_bkg = 100) step_bkg = 100)
return c return c
def train_and_score(network): def train_and_score(geneparam):
model = init_model(network) model = init_model(geneparam)
model.train(epochs=20) model.train(epochs=20)
model.evaluate()
score = model.score score = model.score
return score[1] # 1 is accuracy. 0 is loss. return score[1] # 1 is accuracy. 0 is loss.
...@@ -17,8 +17,9 @@ from sklearn.preprocessing import StandardScaler, RobustScaler ...@@ -17,8 +17,9 @@ from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.externals import joblib from sklearn.externals import joblib
from sklearn.metrics import roc_curve, auc from sklearn.metrics import roc_curve, auc
from keras import backend as K
from keras.models import Sequential from keras.models import Sequential
from keras.layers import Dense from keras.layers import Dense, Dropout
from keras.models import model_from_json from keras.models import model_from_json
from keras.callbacks import History, EarlyStopping from keras.callbacks import History, EarlyStopping
from keras.optimizers import SGD from keras.optimizers import SGD
...@@ -343,6 +344,7 @@ class KerasROOTClassification(object): ...@@ -343,6 +344,7 @@ class KerasROOTClassification(object):
# the other hidden layers # the other hidden layers
for layer_number in range(self.layers-1): for layer_number in range(self.layers-1):
self._model.add(Dense(self.nodes, activation=self.activation_function)) self._model.add(Dense(self.nodes, activation=self.activation_function))
self._model.add(Dropout(0.2)) # hard-coded dropout for each layer
# last layer is one neuron (binary classification) # last layer is one neuron (binary classification)
self._model.add(Dense(1, activation='sigmoid')) self._model.add(Dense(1, activation='sigmoid'))
logger.info("Using {}(**{}) as Optimizer".format(self.optimizer, self.optimizer_opts)) logger.info("Using {}(**{}) as Optimizer".format(self.optimizer, self.optimizer_opts))
...@@ -406,7 +408,7 @@ class KerasROOTClassification(object): ...@@ -406,7 +408,7 @@ class KerasROOTClassification(object):
for branch_index, branch in enumerate(self.branches): for branch_index, branch in enumerate(self.branches):
self.plot_input(branch_index) self.plot_input(branch_index)
self.total_epochs = self._read_info("epochs", 0) self.total_epochs = self._read_info("epochs", 0)
logger.info("Train model") logger.info("Train model")
try: try:
...@@ -436,20 +438,28 @@ class KerasROOTClassification(object): ...@@ -436,20 +438,28 @@ class KerasROOTClassification(object):
self.total_epochs += epochs self.total_epochs += epochs
self._write_info("epochs", self.total_epochs) self._write_info("epochs", self.total_epochs)
#logger.info("Create/Update predictions for ROC curve") logger.info("Create/Update predictions for ROC curve")
#self.pred_test = self.model.predict(self.x_test) self.pred_test = self.model.predict(self.x_test)
#self.pred_train = self.model.predict(self.x_train) self.pred_train = self.model.predict(self.x_train)
logger.info("Get test loss and metrics of the model")
self.score = self.model.evaluate(self.x_test, self.y_test, verbose=0, sample_weight=None)
if self.dumping_enabled: if self.dumping_enabled:
self._dump_to_hdf5("pred_train", "pred_test") self._dump_to_hdf5("pred_train", "pred_test")
def evaluate(self): def evaluate(self):
pass logger.info("Get test loss and metrics of the model")
self.score = self.model.evaluate(self.x_test, self.y_test, verbose=0,
sample_weight=self.w_train)
print('Test loss:', self.score[0])
print('Test accuracy:', self.score[1])
#we do not care about keeping any of this in memory -
#we just need to know the final scores and the architecture
K.clear_session()
def write_friend_tree(self): def write_friend_tree(self):
pass pass
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment