Compare revisions

Thomas Weber · Thomas Weber · Thomas Weber · Thomas Weber · 904d1f9d · 904d1f9d
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ run.py
 *.swp
 *.pyc
 *.pdf
+*.txt
--- a/Genetic_Algorithm/allgenomes.py
+++ b/Genetic_Algorithm/allgenomes.py
+#!/usr/bin/env python
+
+""" Class that keeps track of all genomes trained so far, and their scores.
+    Among other things, ensures that genomes are unique.
+"""
+
+import random
+import logging
+
+from genome import Genome
+
+class AllGenomes(object):
+    """Store all genomes
+    """
+
+    def __init__(self, firstgenome):
+        """Initialize
+        """
+
+        self.population = []
+        self.population.append(firstgenome)
+        
+    def add_genome(self, genome):
+        """Add the genome to our population.
+        """
+
+        for i in range(0,len(self.population)):
+            if (genome.hash == self.population[i].hash):
+                logging.info("add_genome() ERROR: hash clash - duplicate genome")
+                return False
+
+        self.population.append(genome)
+
+        return True
+        
+    def set_accuracy(self, genome):
+        """Add the genome to our population.
+        """
+        
+        for i in range(0,len(self.population)):
+            if (genome.hash == self.population[i].hash):
+                self.population[i].accuracy = genome.accuracy
+                return
+    
+        logging.info("set_accuracy() ERROR: Genome not found")
+
+    def is_duplicate(self, genome):
+        """Add the genome to our population.
+        """
+
+        for i in range(0,len(self.population)):
+            if (genome.hash == self.population[i].hash):
+                return True
+    
+        return False
+
+    def print_all_genomes(self):
+        """Print out a genome.
+        """
+
+        for genome in self.population:
+            genome.print_genome_ma()
--- a/Genetic_Algorithm/evolver.py
+++ b/Genetic_Algorithm/evolver.py
+#!/usr/bin/env python
+
+"""
+Class that holds a genetic algorithm for evolving a network.
+
+Inspiration:
+
+    http://lethain.com/genetic-algorithms-cool-name-damn-simple/
+"""
+from __future__ import print_function
+
+import random
+import logging
+import copy
+
+from functools  import reduce
+from operator   import add
+from genome     import Genome
+from idgen      import IDgen
+from allgenomes import AllGenomes
+
+
+class Evolver(object):
+    """Class that implements genetic algorithm."""
+
+    def __init__(self, all_possible_genes, retain=0.15, random_select=0.1, mutate_chance=0.3):
+        """Create an optimizer.
+
+        Args:
+            all_possible_genes (dict): Possible genome parameters
+            retain (float): Percentage of population to retain after
+                each generation
+            random_select (float): Probability of a rejected genome
+                remaining in the population
+            mutate_chance (float): Probability a genome will be
+                randomly mutated
+
+        """
+
+        self.all_possible_genes = all_possible_genes
+        self.retain             = retain
+        self.random_select      = random_select
+        self.mutate_chance      = mutate_chance
+
+        #set the ID gen
+        self.ids = IDgen()
+        
+    def create_population(self, count):
+        """Create a population of random networks.
+
+        Args:
+            count (int): Number of networks to generate, aka the
+                size of the population
+
+        Returns:
+            (list): Population of network objects
+
+        """
+        pop = []
+
+        i = 0
+
+        while i < count:
+            
+            # Initialize a new genome.
+            genome = Genome( self.all_possible_genes, {}, self.ids.get_next_ID(), 0, 0, self.ids.get_Gen() )
+
+            # Set it to random parameters.
+            genome.set_genes_random()
+
+            if i == 0:
+                #this is where we will store all genomes
+                self.master = AllGenomes( genome )
+            else:
+                # Make sure it is unique....
+                while self.master.is_duplicate( genome ):
+                    genome.mutate_one_gene()
+
+            # Add the genome to our population.
+            pop.append(genome)
+
+            # and add to the master list
+            if i > 0:
+                self.master.add_genome(genome)
+
+            i += 1
+
+        #self.master.print_all_genomes()
+        
+        #exit()
+
+        return pop
+
+    @staticmethod
+    def fitness(genome):
+        """Return the accuracy, which is our fitness function."""
+        return genome.accuracy
+
+    def grade(self, pop):
+        """Find average fitness for a population.
+
+        Args:
+            pop (list): The population of networks/genome
+
+        Returns:
+            (float): The average accuracy of the population
+
+        """
+        summed = reduce(add, (self.fitness(genome) for genome in pop))
+        return summed / float((len(pop)))
+
+    def breed(self, mom, dad):
+        """Make two children from parental genes.
+
+        Args:
+            mother (dict): genome parameters
+            father (dict): genome parameters
+
+        Returns:
+            (list): Two network objects
+
+        """
+        children = []
+
+        #where do we recombine? 0, 1, 2, 3, 4... N?
+        #with four genes, there are three choices for the recombination
+        # ___ * ___ * ___ * ___ 
+        #0 -> no recombination, and N == length of dictionary -> no recombination
+        #0 and 4 just (re)create more copies of the parents
+        #so the range is always 1 to len(all_possible_genes) - 1
+        pcl = len(self.all_possible_genes)
+        
+        recomb_loc = random.randint(1,pcl - 1) 
+
+        #for _ in range(2): #make _two_ children - could also make more
+        child1 = {}
+        child2 = {}
+
+        #enforce defined genome order using list 
+        #keys = ['nb_neurons', 'nb_layers', 'activation', 'optimizer']
+        keys = list(self.all_possible_genes)
+        keys = sorted(keys) #paranoia - just to make sure we do not add unintentional randomization
+
+        #*** CORE RECOMBINATION CODE ****
+        for x in range(0, pcl):
+            if x < recomb_loc:
+                child1[keys[x]] = mom.geneparam[keys[x]]
+                child2[keys[x]] = dad.geneparam[keys[x]]
+            else:
+                child1[keys[x]] = dad.geneparam[keys[x]]
+                child2[keys[x]] = mom.geneparam[keys[x]]
+
+        # Initialize a new genome
+        # Set its parameters to those just determined
+        # they both have the same mom and dad
+        genome1 = Genome( self.all_possible_genes, child1, self.ids.get_next_ID(), mom.u_ID, dad.u_ID, self.ids.get_Gen() )
+        genome2 = Genome( self.all_possible_genes, child2, self.ids.get_next_ID(), mom.u_ID, dad.u_ID, self.ids.get_Gen() )
+
+        #at this point, there is zero guarantee that the genome is actually unique
+
+        # Randomly mutate one gene
+        if self.mutate_chance > random.random(): 
+        	genome1.mutate_one_gene()
+
+        if self.mutate_chance > random.random(): 
+        	genome2.mutate_one_gene()
+
+        #do we have a unique child or are we just retraining one we already have anyway?
+        while self.master.is_duplicate(genome1):
+            genome1.mutate_one_gene()
+
+        self.master.add_genome(genome1)
+        
+        while self.master.is_duplicate(genome2):
+            genome2.mutate_one_gene()
+
+        self.master.add_genome(genome2)
+        
+        children.append(genome1)
+        children.append(genome2)
+
+        return children
+
+    def evolve(self, pop):
+        """Evolve a population of genomes.
+
+        Args:
+            pop (list): A list of genome parameters
+
+        Returns:
+            (list): The evolved population of networks
+
+        """
+        #increase generation 
+        self.ids.increase_Gen()
+
+        # Get scores for each genome
+        graded = [(self.fitness(genome), genome) for genome in pop]
+
+        #and use those scores to fill in the master list
+        for genome in pop:
+            self.master.set_accuracy(genome)
+
+        # Sort on the scores.
+        graded = [x[1] for x in sorted(graded, key=lambda x: x[0], reverse=True)]
+
+        # Get the number we want to keep unchanged for the next cycle.
+        retain_length = int(len(graded)*self.retain)
+
+        # In this first step, we keep the 'top' X percent (as defined in self.retain)
+        # We will not change them, except we will update the generation
+        new_generation = graded[:retain_length]
+
+        # For the lower scoring ones, randomly keep some anyway.
+        # This is wasteful, since we _know_ these are bad, so why keep rescoring them without modification?
+        # At least we should mutate them
+        for genome in graded[retain_length:]:
+            if self.random_select > random.random():
+                gtc = copy.deepcopy(genome)
+                
+                while self.master.is_duplicate(gtc):
+                    gtc.mutate_one_gene()
+
+                gtc.set_generation( self.ids.get_Gen() )
+                new_generation.append(gtc)
+                self.master.add_genome(gtc)
+        
+        # Now find out how many spots we have left to fill.
+        ng_length      = len(new_generation)
+
+        desired_length = len(pop) - ng_length
+
+        children       = []
+
+        # Add children, which are bred from pairs of remaining (i.e. very high or lower scoring) genomes.
+        while len(children) < desired_length:
+
+            # Get a random mom and dad, but, need to make sure they are distinct
+            parents  = random.sample(range(ng_length-1), k=2)
+            
+            i_male   = parents[0]
+            i_female = parents[1]
+
+            male   = new_generation[i_male]
+            female = new_generation[i_female]
+
+            # Recombine and mutate
+            babies = self.breed(male, female)
+            # the babies are guaranteed to be novel
+
+            # Add the children one at a time.
+            for baby in babies:
+                # Don't grow larger than desired length.
+                #if len(children) < desired_length:
+                children.append(baby)
+
+        new_generation.extend(children)
+
+        return new_generation
--- a/Genetic_Algorithm/genome.py
+++ b/Genetic_Algorithm/genome.py
+#!/usr/bin/env python
+
+"""The genome to be evolved."""
+
+import random
+import logging
+import hashlib
+import copy
+
+from train import train_and_score
+
+class Genome(object):
+    """
+    Represents one genome and all relevant utility functions (add, mutate, etc.).
+    """
+
+    def __init__( self, all_possible_genes = None, geneparam = {}, u_ID = 0, mom_ID = 0, dad_ID = 0, gen = 0 ):
+        """Initialize a genome.
+
+        Args:
+            all_possible_genes (dict): Parameters for the genome, includes:
+                gene_nb_neurons (list): [64, 128, 256]
+                gene_nb_layers (list):  [1, 2, 3, 4]
+                gene_activation (list): ['relu', 'elu']
+                gene_optimizer (list):  ['rmsprop', 'adam']
+        """
+        self.accuracy         = 0.0
+        self.all_possible_genes = all_possible_genes
+        self.geneparam        = geneparam #(dict): represents actual genome parameters
+        self.u_ID             = u_ID
+        self.parents          = [mom_ID, dad_ID]
+        self.generation       = gen
+        
+        #hash only makes sense when we have specified the genes
+        if not geneparam:
+            self.hash = 0
+        else:
+            self.update_hash()
+        
+    def update_hash(self):
+        """
+        Refesh each genome's unique hash - needs to run after any genome changes.
+        """
+        # + str(self.geneparam['optimizer']) 
+        genh = str(self.geneparam['nb_neurons']) + self.geneparam['activation'] \
+                + str(self.geneparam['nb_layers']) \
+                + str(self.geneparam['lr']) \
+                + str(self.geneparam['decay']) \
+                + str(self.geneparam['momentum'])
+
+        self.hash = hashlib.md5(genh.encode("UTF-8")).hexdigest()
+
+        self.accuracy = 0.0
+            
+    def set_genes_random(self):
+        """Create a random genome."""
+        #print("set_genes_random")
+        self.parents = [0,0] #very sad - no parents :(
+
+        for key in self.all_possible_genes:
+            self.geneparam[key] = random.choice(self.all_possible_genes[key])
+                
+        self.update_hash()
+        
+    def mutate_one_gene(self):
+        """Randomly mutate one gene in the genome.
+
+        Args:
+            network (dict): The genome parameters to mutate
+
+        Returns:
+            (Genome): A randomly mutated genome object
+
+        """
+        # Which gene shall we mutate? Choose one of N possible keys/genes.
+        gene_to_mutate = random.choice( list(self.all_possible_genes.keys()) )
+
+        # And then let's mutate one of the genes.
+        # Make sure that this actually creates mutation
+        current_value    = self.geneparam[gene_to_mutate]
+        possible_choices = copy.deepcopy(self.all_possible_genes[gene_to_mutate])
+        
+        possible_choices.remove(current_value)
+        
+        self.geneparam[gene_to_mutate] = random.choice( possible_choices )
+
+        self.update_hash()
+    
+    def set_generation(self, generation):
+        """needed when a genome is passed on from one generation to the next.
+        the id stays the same, but the generation is increased"""   
+
+        self.generation = generation
+        #logging.info("Setting Generation to %d" % self.generation)
+
+    def set_genes_to(self, geneparam, mom_ID, dad_ID):
+        """Set genome properties.
+        this is used when breeding kids
+
+        Args:
+            genome (dict): The genome parameters
+        IMPROVE
+        """
+        self.parents  = [mom_ID, dad_ID]
+        
+        self.geneparam = geneparam
+
+        self.update_hash()
+
+    def train(self):
+        """Train the genome and record the accuracy.
+
+        Args:
+
+        """
+        if self.accuracy == 0.0: #don't bother retraining ones we already trained 
+            self.accuracy = train_and_score(self.geneparam)
+
+    def print_genome(self):
+        """Print out a genome."""
+        logging.info(self.geneparam)
+        logging.info("Acc: %.2f%%" % (self.accuracy * 100))
+        logging.info("UniID: %d" % self.u_ID)
+        logging.info("Mom and Dad: %d %d" % (self.parents[0], self.parents[1]))
+        logging.info("Gen: %d" % self.generation)
+        logging.info("Hash: %s" % self.hash)
+
+    def print_genome_ma(self):
+        """Print out a genome."""
+        logging.info(self.geneparam)
+        logging.info("Acc: %.2f%% UniID: %d Mom and Dad: %d %d Gen: %d" % (self.accuracy * 100, self.u_ID, self.parents[0], self.parents[1], self.generation))
+        logging.info("Hash: %s" % self.hash)    
--- a/Genetic_Algorithm/idgen.py
+++ b/Genetic_Algorithm/idgen.py
+#!/usr/bin/env python
+
+"""Provide unique genome IDs."""
+
+import logging
+
+class IDgen():
+    """Generate unique IDs.
+    """
+
+    def __init__(self):
+        """Keep track of IDs.
+        """
+        self.currentID  = 0
+        self.currentGen = 1
+
+    def get_next_ID(self):
+
+        self.currentID += 1
+
+        return self.currentID
+  
+    def increase_Gen(self):
+
+        self.currentGen += 1
+        
+    def get_Gen(self):
+
+        return self.currentGen
+        
+
--- a/Genetic_Algorithm/main.py
+++ b/Genetic_Algorithm/main.py
+#!/usr/bin/env python
+
+"""Entry point to evolving the neural network. Start here."""
+from __future__ import print_function
+
+from evolver import Evolver
+
+from tqdm import tqdm
+
+import logging
+
+import sys
+
+# Setup logging.
+logging.basicConfig(
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    datefmt='%m/%d/%Y %I:%M:%S %p',
+    level=logging.INFO,
+    filename='log.txt'
+)
+
+def train_genomes(genomes):
+    """Train each genome.
+
+    Args:
+        networks (list): Current population of genomes
+
+    """
+    logging.info("***train_networks(networks)***")
+
+    pbar = tqdm(total=len(genomes))
+
+    for genome in genomes:
+        genome.train()
+        pbar.update(1)
+    
+    pbar.close()
+
+def get_average_accuracy(genomes):
+    """Get the average accuracy for a group of networks/genomes.
+
+    Args:
+        networks (list): List of networks/genomes
+
+    Returns:
+        float: The average accuracy of a population of networks/genomes.
+
+    """
+    total_accuracy = 0
+
+    for genome in genomes:
+        total_accuracy += genome.accuracy
+
+    return total_accuracy / len(genomes)
+
+def generate(generations, population, all_possible_genes):
+    """Generate a network with the genetic algorithm.
+
+    Args:
+        generations (int): Number of times to evolve the population
+        population (int): Number of networks in each generation
+        all_possible_genes (dict): Parameter choices for networks
+
+    """
+    logging.info("***generate(generations, population, all_possible_genes)***")
+    
+    evolver = Evolver(all_possible_genes)
+    
+    genomes = evolver.create_population(population)
+
+    # Evolve the generation.
+    for i in range( generations ):
+
+        logging.info("***Now in generation %d of %d***" % (i + 1, generations))
+
+        print_genomes(genomes)
+        
+        # Train and get accuracy for networks/genomes.
+        train_genomes(genomes)
+
+        # Get the average accuracy for this generation.
+        average_accuracy = get_average_accuracy(genomes)
+
+        # Print out the average accuracy each generation.
+        logging.info("Generation average: %.2f%%" % (average_accuracy * 100))
+        logging.info('-'*80) #-----------
+
+        # Evolve, except on the last iteration.
+        if i != generations - 1:
+            # Evolve!
+            genomes = evolver.evolve(genomes)
+
+    # Sort our final population according to performance.
+    genomes = sorted(genomes, key=lambda x: x.accuracy, reverse=True)
+
+    # Print out the top 5 networks/genomes.
+    print_genomes(genomes[:5])
+
+    #save_path = saver.save(sess, '/output/model.ckpt')
+    #print("Model saved in file: %s" % save_path)
+
+def print_genomes(genomes):
+    """Print a list of genomes.
+
+    Args:
+        genomes (list): The population of networks/genomes
+
+    """
+    logging.info('-'*80)
+
+    for genome in genomes:
+        genome.print_genome()
+
+def main():
+    """Evolve a genome."""
+    population = 20 # Number of networks/genomes in each generation.
+    #we only need to train the new ones....
+    generations = 2 # Number of times to evolve the population.
+    all_possible_genes = {
+        'nb_neurons': [8, 16, 32, 64, 128, 256, 512, 1024],
+        'nb_layers':  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+        'activation': ['relu', 'elu', 'tanh', 'sigmoid', 'hard_sigmoid','softplus','linear'],
+        #'optimizer':  ['rmsprop', 'adam', 'sgd', 'adagrad','adadelta', 'adamax', 'nadam']
+        'lr': [0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 1.0, 10.0, 100.0],
+        'decay': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7],
+        'momentum': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
+    }
+
+    print("***Evolving for %d generations with population size = %d***" % (generations, population))
+
+    generate(generations, population, all_possible_genes)
+
+if __name__ == '__main__':
+    main()
--- a/Genetic_Algorithm/old/main.py
+++ b/Genetic_Algorithm/old/main.py
+#!/usr/bin/env python
+
+import logging
+from optimizer import Optimizer
+from tqdm import tqdm
+
+# Setup logging.
+logging.basicConfig(
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    datefmt='%m/%d/%Y %I:%M:%S %p',
+    level=logging.DEBUG,
+    filename='log.txt'
+)
+
+def train_networks(networks):
+    """Train each network.
+
+    Args:
+        networks (list): Current population of networks
+    """
+    pbar = tqdm(total=len(networks))
+    for network in networks:
+        network.train()
+        pbar.update(1)
+    pbar.close()
+
+def get_average_accuracy(networks):
+    """Get the average accuracy for a group of networks.
+
+    Args:
+        networks (list): List of networks
+
+    Returns:
+        float: The average accuracy of a population of networks.
+
+    """
+    total_accuracy = 0
+    for network in networks:
+        total_accuracy += network.accuracy
+
+    return total_accuracy / len(networks)
+
+def generate(generations, population, nn_param_choices):
+    """Generate a network with the genetic algorithm.
+
+    Args:
+        generations (int): Number of times to evole the population
+        population (int): Number of networks in each generation
+        nn_param_choices (dict): Parameter choices for networks
+
+    """
+    optimizer = Optimizer(nn_param_choices)
+    networks = optimizer.create_population(population)
+
+    # Evolve the generation.
+    for i in range(generations):
+        logging.info("***Doing generation %d of %d***" %
+                     (i + 1, generations))
+
+        # Train and get accuracy for networks.
+        train_networks(networks)
+
+        # Get the average accuracy for this generation.
+        average_accuracy = get_average_accuracy(networks)
+
+        # Print out the average accuracy each generation.
+        logging.info("Generation average: %.2f%%" % (average_accuracy * 100))
+        logging.info('-'*80)
+
+        # Evolve, except on the last iteration.
+        if i != generations - 1:
+            # Do the evolution.
+            networks = optimizer.evolve(networks)
+
+    # Sort our final population.
+    networks = sorted(networks, key=lambda x: x.accuracy, reverse=True)
+
+    # Print out the top 5 networks.
+    print_networks(networks[:5])
+
+def print_networks(networks):
+    """Print a list of networks.
+
+    Args:
+        networks (list): The population of networks
+
+    """
+    logging.info('-'*80)
+    for network in networks:
+        network.print_network()
+
+def main():
+    """Evolve a network."""
+    generations = 7  # Number of times to evolve the population.
+    population = 5  # Number of networks in each generation.
+
+    nn_param_choices = {
+        'nb_neurons': [8, 16, 32, 64, 128, 256, 512, 768, 1024],
+        'nb_layers': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+        'activation': ['relu', 'elu', 'tanh', 'sigmoid'],
+        #'optimizer': ['rmsprop', 'adam', 'sgd', 'adagrad',
+        #              'adadelta', 'adamax', 'nadam'],
+        #'optimizer_opts': {'lr': [0.1, 0.5, 1.0, 10.0, 100.0],
+        #                    'decay': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7],
+        #                    'momentum': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,
+        #                        0.8, 0.9, 1.0]},
+        'lr': [0.1, 0.5, 1.0, 10.0, 100.0],
+        'decay': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7],
+        'momentum': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
+
+    }
+
+    logging.info("***Evolving %d generations with population %d***" %
+                 (generations, population))
+
+    generate(generations, population, nn_param_choices)
+
+if __name__ == '__main__':
+    main()
--- a/Genetic_Algorithm/old/network.py
+++ b/Genetic_Algorithm/old/network.py
+#!/usr/bin/env python
+
+"""Class that represents the network to be evolved."""
+import random
+import logging
+from train import train_and_score
+
+class Network(object):
+    """Represent a network and let us operate on it.
+
+    Currently only works for an MLP.
+    """
+
+    def __init__(self, nn_param_choices=None):
+        """Initialize our network.
+
+        Args:
+            nn_param_choices (dict): Parameters for the network, includes:
+                nb_neurons (list): [64, 128, 256]
+                nb_layers (list): [1, 2, 3, 4]
+                activation (list): ['relu', 'elu']
+                optimizer (list): ['rmsprop', 'adam']
+                optimizer_opts (dict(list)): {'lr': [0.5, ...], 'decay', ...}
+        """
+        self.accuracy = 0.
+        self.nn_param_choices = nn_param_choices
+        self.network = {}  # (dic): represents MLP network parameters
+
+    def create_random(self):
+        """Create a random network."""
+        for key in self.nn_param_choices:
+            self.network[key] = random.choice(self.nn_param_choices[key])
+
+    def create_set(self, network):
+        """Set network properties.
+
+        Args:
+            network (dict): The network parameters
+
+        """
+        self.network = network
+
+    def train(self):
+        """Train the network and record the accuracy.
+
+        Args:
+
+        """
+        if self.accuracy == 0.:
+            self.accuracy = train_and_score(self.network)
+
+    def print_network(self):
+        """Print out a network."""
+        logging.info(self.network)
+        logging.info("Network accuracy: %.2f%%" % (self.accuracy * 100))
--- a/Genetic_Algorithm/old/optimizer.py
+++ b/Genetic_Algorithm/old/optimizer.py
+#!/usr/bin/env python
+
+"""
+Class that holds a genetic algorithm for evolving a network.
+
+Credit:
+    A lot of those code was originally inspired by:
+    http://lethain.com/genetic-algorithms-cool-name-damn-simple/
+"""
+from functools import reduce
+from operator import add
+import random
+from network import Network
+
+class Optimizer(object):
+    """Class that implements genetic algorithm for MLP optimization."""
+
+    def __init__(self, nn_param_choices, retain=0.4,
+                 random_select=0.1, mutate_chance=0.2):
+        """Create an optimizer.
+
+        Args:
+            nn_param_choices (dict): Possible network paremters
+            retain (float): Percentage of population to retain after
+                each generation
+            random_select (float): Probability of a rejected network
+                remaining in the population
+            mutate_chance (float): Probability a network will be
+                randomly mutated
+
+        """
+        self.mutate_chance = mutate_chance
+        self.random_select = random_select
+        self.retain = retain
+        self.nn_param_choices = nn_param_choices
+
+    def create_population(self, count):
+        """Create a population of random networks.
+
+        Args:
+            count (int): Number of networks to generate, aka the
+                size of the population
+
+        Returns:
+            (list): Population of network objects
+
+        """
+        pop = []
+        for _ in range(0, count):
+            # Create a random network.
+            network = Network(self.nn_param_choices)
+            network.create_random()
+
+            # Add the network to our population.
+            pop.append(network)
+
+        return pop
+
+    @staticmethod
+    def fitness(network):
+        """Return the accuracy, which is our fitness function."""
+        return network.accuracy
+
+    def grade(self, pop):
+        """Find average fitness for a population.
+
+        Args:
+            pop (list): The population of networks
+
+        Returns:
+            (float): The average accuracy of the population
+
+        """
+        summed = reduce(add, (self.fitness(network) for network in pop))
+        return summed / float((len(pop)))
+
+    def breed(self, mother, father):
+        """Make two children as parts of their parents.
+
+        Args:
+            mother (dict): Network parameters
+            father (dict): Network parameters
+
+        Returns:
+            (list): Two network objects
+
+        """
+        children = []
+        for _ in range(2):
+
+            child = {}
+
+            # Loop through the parameters and pick params for the kid.
+            for param in self.nn_param_choices:
+                child[param] = random.choice(
+                    [mother.network[param], father.network[param]]
+                )
+
+            # Now create a network object.
+            network = Network(self.nn_param_choices)
+            network.create_set(child)
+
+            # Randomly mutate some of the children.
+            if self.mutate_chance > random.random():
+                network = self.mutate(network)
+
+            children.append(network)
+
+        return children
+
+    def mutate(self, network):
+        """Randomly mutate one part of the network.
+
+        Args:
+            network (dict): The network parameters to mutate
+
+        Returns:
+            (Network): A randomly mutated network object
+
+        """
+        # Choose a random key.
+        mutation = random.choice(list(self.nn_param_choices.keys()))
+
+        # Mutate one of the params.
+        network.network[mutation] = random.choice(self.nn_param_choices[mutation])
+
+        return network
+
+    def evolve(self, pop):
+        """Evolve a population of networks.
+
+        Args:
+            pop (list): A list of network parameters
+
+        Returns:
+            (list): The evolved population of networks
+
+        """
+        # Get scores for each network.
+        graded = [(self.fitness(network), network) for network in pop]
+
+        # Sort on the scores.
+        graded = [x[1] for x in sorted(graded, key=lambda x: x[0], reverse=True)]
+
+        # Get the number we want to keep for the next gen.
+        retain_length = int(len(graded)*self.retain)
+
+        # The parents are every network we want to keep.
+        parents = graded[:retain_length]
+
+        # For those we aren't keeping, randomly keep some anyway.
+        for individual in graded[retain_length:]:
+            if self.random_select > random.random():
+                parents.append(individual)
+
+        # Now find out how many spots we have left to fill.
+        parents_length = len(parents)
+        desired_length = len(pop) - parents_length
+        children = []
+
+        # Add children, which are bred from two remaining networks.
+        while len(children) < desired_length:
+
+            # Get a random mom and dad.
+            male = random.randint(0, parents_length-1)
+            female = random.randint(0, parents_length-1)
+
+            # Assuming they aren't the same network...
+            if male != female:
+                male = parents[male]
+                female = parents[female]
+
+                # Breed them.
+                babies = self.breed(male, female)
+
+                # Add the children one at a time.
+                for baby in babies:
+                    # Don't grow larger than desired length.
+                    if len(children) < desired_length:
+                        children.append(baby)
+
+        parents.extend(children)
+
+        return parents
--- a/Genetic_Algorithm/old/train.py
+++ b/Genetic_Algorithm/old/train.py
+#!/usr/bin/env python
+
+import os,sys,inspect
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(currentdir)
+sys.path.insert(0,parentdir) 
+import toolkit
+from toolkit import KerasROOTClassification
+
+def init_model(geneparam):
+    
+    nb_layers = geneparam['nb_layers']
+    nb_neurons = geneparam['nb_neurons']
+    activation = geneparam['activation']
+    optimizer = geneparam['optimizer']
+    #lr = network['lr']
+    #decay = network['decay']
+    #momentum = network['momentum']
+
+    filename = "/project/etp4/nhartmann/trees/allTrees_m1.8_NoSys.root"
+    
+    c = KerasROOTClassification("",
+                                signal_trees = [(filename, "GG_oneStep_1545_1265_985_NoSys")],
+                                bkg_trees = [(filename, "ttbar_NoSys"),
+                                     (filename, "wjets_Sherpa221_NoSys"),
+                                     (filename, "zjets_Sherpa221_NoSys"),
+                                     (filename, "diboson_Sherpa221_NoSys"),
+                                     (filename, "ttv_NoSys"),
+                                     (filename, "singletop_NoSys")
+                                ],
+                                dumping_enabled=False,
+                                optimizer=optimizer,
+                                layers=nb_layers,
+                                nodes=nb_neurons,
+                                activation_function=activation,
+                               # optimizer_opts=dict(lr=lr, decay=decay,
+                               #     momentum=momentum),
+                                earlystopping_opts=dict(monitor='val_loss',
+                                    min_delta=0, patience=2, verbose=0, mode='auto'),
+                                # optimizer="Adam",
+                                selection="lep1Pt<5000", # cut out a few very weird outliers
+                                branches = ["met", "mt"],
+                                weight_expr = "eventWeight*genWeight",
+                                identifiers = ["DatasetNumber", "EventNumber"],
+                                step_bkg = 100)
+    return c
+
+def train_and_score(geneparam):
+    model = init_model(geneparam)
+
+    model.train(epochs=20)
+
+    score = model.score
+
+    return score[1]  # 1 is accuracy. 0 is loss.
--- a/Genetic_Algorithm/train.py
+++ b/Genetic_Algorithm/train.py
+#!/usr/bin/env python
+
+import os,sys,inspect
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(currentdir)
+sys.path.insert(0,parentdir) 
+import toolkit
+from toolkit import KerasROOTClassification
+
+def init_model(geneparam):
+    
+    nb_layers = geneparam['nb_layers']
+    nb_neurons = geneparam['nb_neurons']
+    activation = geneparam['activation']
+    #optimizer = geneparam['optimizer']
+    lr = geneparam['lr']
+    decay = geneparam['decay']
+    momentum = geneparam['momentum']
+
+    filename = "/project/etp4/nhartmann/trees/allTrees_m1.8_NoSys.root"
+    
+    c = KerasROOTClassification("",
+                                signal_trees = [(filename, "GG_oneStep_1545_1265_985_NoSys")],
+                                bkg_trees = [(filename, "ttbar_NoSys"),
+                                     (filename, "wjets_Sherpa221_NoSys"),
+                                     (filename, "zjets_Sherpa221_NoSys"),
+                                     (filename, "diboson_Sherpa221_NoSys"),
+                                     (filename, "ttv_NoSys"),
+                                     (filename, "singletop_NoSys")
+                                ],
+                                branches = ["jet1Pt", "jet1Phi==-999?0:jet1Phi", "jet1Eta==-999?0:jet1Eta",
+                                            "jet2Pt", "jet2Phi==-999?0:jet2Phi", "jet2Eta==-999?0:jet2Eta",
+                                            "jet3Pt", "jet3Phi==-999?0:jet3Phi", "jet3Eta==-999?0:jet3Eta",
+                                            "jet4Pt", "jet4Phi==-999?0:jet4Phi", "jet4Eta==-999?0:jet4Eta",
+                                            "jet5Pt", "jet5Phi==-999?0:jet5Phi", "jet5Eta==-999?0:jet5Eta",
+                                            "jet6Pt", "jet6Phi==-999?0:jet6Phi", "jet6Eta==-999?0:jet6Eta",
+                                            "jet7Pt", "jet7Phi==-999?0:jet7Phi", "jet7Eta==-999?0:jet7Eta",
+                                            "jet8Pt", "jet8Phi==-999?0:jet8Phi", "jet8Eta==-999?0:jet8Eta",
+                                            "lep1Pt", "lep1Phi", "lep1Eta", "nJet30",
+                                            "met", "met_Phi"],
+                                dumping_enabled=False,
+                                optimizer="SGD",
+                                layers=nb_layers,
+                                nodes=nb_neurons,
+                                activation_function=activation,
+                                optimizer_opts=dict(lr=lr, decay=decay,
+                                    momentum=momentum),
+                                earlystopping_opts=dict(monitor='val_loss',
+                                    min_delta=0, patience=2, verbose=0, mode='auto'),
+                                selection="lep1Pt<5000", # cut out a few very weird outliers
+                                weight_expr = "eventWeight*genWeight",
+                                identifiers = ["DatasetNumber", "EventNumber"],
+                                step_bkg = 100)
+    return c
+
+def train_and_score(geneparam):
+    model = init_model(geneparam)
+
+    model.train(epochs=20)
+    model.evaluate()
+    score = model.score
+
+    return score[1]  # 1 is accuracy. 0 is loss.
--- a/toolkit.py
+++ b/toolkit.py
@@ -17,8 +17,9 @@ from sklearn.preprocessing import StandardScaler, RobustScaler
 from sklearn.externals import joblib
 from sklearn.metrics import roc_curve, auc

+from keras import backend as K
 from keras.models import Sequential
-from keras.layers import Dense
+from keras.layers import Dense, Dropout
 from keras.models import model_from_json
 from keras.callbacks import History, EarlyStopping
 from keras.optimizers import SGD
@@ -44,19 +45,21 @@ class KerasROOTClassification(object):


    # Datasets that are stored to (and dynamically loaded from) hdf5
-    dataset_names = ["x_train", "x_test", "y_train", "y_test", "w_train", "w_test", "scores_train", "scores_test"]
+    dataset_names = ["x_train", "x_test", "y_train", "y_test", "w_train", "w_test", "pred_train", "pred_test"]

    # Datasets that are retrieved from ROOT trees the first time
    dataset_names_tree = ["x_train", "x_test", "y_train", "y_test", "w_train", "w_test"]

    def __init__(self, name, *args, **kwargs):
        self._init_from_args(name, *args, **kwargs)
-        with open(os.path.join(self.project_dir, "options.json"), "w") as of:
-            json.dump(dict(args=args, kwargs=kwargs), of)
+        if self.dumping_enabled:
+            with open(os.path.join(self.project_dir, "options.json"), "w") as of:
+                json.dump(dict(args=args, kwargs=kwargs), of)


    def _init_from_args(self, name,
                        signal_trees, bkg_trees, branches, weight_expr, identifiers,
+                        dumping_enabled=True,
                        selection=None,
                        layers=3,
                        nodes=64,
@@ -72,6 +75,7 @@ class KerasROOTClassification(object):
                        earlystopping_opts=None):

        self.name = name
+        self.dumping_enabled = dumping_enabled
        self.signal_trees = signal_trees
        self.bkg_trees = bkg_trees
        self.branches = branches
@@ -114,8 +118,9 @@ class KerasROOTClassification(object):
        self._y_test = None
        self._w_train = None
        self._w_test = None
-        self._scores_train = None
-        self._scores_test = None
+        self.pred_train = None
+        self.pred_test = None
+        self.score = None

        self.s_eventlist_train = None
        self.b_eventlist_train = None
@@ -169,8 +174,8 @@ class KerasROOTClassification(object):
                                     branches=self.branches+[self.weight_expr],
                                     selection=self.selection,
                                     start=1, step=self.step_bkg)
-
-            self._dump_training_list()
+            if self.dumping_enabled:
+                self._dump_training_list()
            self.s_eventlist_train = self.s_train[self.identifiers]
            self.b_eventlist_train = self.b_train[self.identifiers]

@@ -196,7 +201,8 @@ class KerasROOTClassification(object):
            self.y_test[:len(self.s_test)] = 1
            self.y_test[len(self.s_test):] = 0

-            self._dump_to_hdf5(*self.dataset_names_tree)
+            if self.dumping_enabled:
+                self._dump_to_hdf5(*self.dataset_names_tree)

        self.data_loaded = True

@@ -261,7 +267,8 @@ class KerasROOTClassification(object):
                # probably we either want to fit only training data or training and test data together
                # logger.info("Fitting StandardScaler to test data")
                # self._scaler.fit(self.x_test)
-                joblib.dump(self._scaler, filename)
+                if self.dumping_enabled:
+                    joblib.dump(self._scaler, filename)
        return self._scaler


@@ -337,6 +344,7 @@ class KerasROOTClassification(object):
            # the other hidden layers
            for layer_number in range(self.layers-1):
                self._model.add(Dense(self.nodes, activation=self.activation_function))
+                self._model.add(Dropout(0.2))  # hard-coded dropout for each layer
            # last layer is one neuron (binary classification)
            self._model.add(Dense(1, activation='sigmoid'))
            logger.info("Using {}(**{}) as Optimizer".format(self.optimizer, self.optimizer_opts))
@@ -354,8 +362,9 @@ class KerasROOTClassification(object):
                logger.info("No weights found, starting completely new model")

            # dump to json for documentation
-            with open(os.path.join(self.project_dir, "model.json"), "w") as of:
-                of.write(self._model.to_json())
+            if self.dumping_enabled:
+                with open(os.path.join(self.project_dir, "model.json"), "w") as of:
+                    of.write(self._model.to_json())

        return self._model

@@ -386,19 +395,20 @@ class KerasROOTClassification(object):
        np.random.shuffle(self.y_train)
        np.random.set_state(rn_state)
        np.random.shuffle(self.w_train)
-        if self._scores_test is not None:
+        if self.pred_test is not None:
            np.random.set_state(rn_state)
-            np.random.shuffle(self._scores_test)
+            np.random.shuffle(self.pred_test)


    def train(self, epochs=10):

        self.load()
+        
+        if self.dumping_enabled:
+            for branch_index, branch in enumerate(self.branches):
+                self.plot_input(branch_index)

-        for branch_index, branch in enumerate(self.branches):
-            self.plot_input(branch_index)
-
-        self.total_epochs = self._read_info("epochs", 0)
+            self.total_epochs = self._read_info("epochs", 0)

        logger.info("Train model")
        try:
@@ -418,25 +428,38 @@ class KerasROOTClassification(object):
        except KeyboardInterrupt:
            logger.info("Interrupt training - continue with rest")

-        logger.info("Save history")
-        self._dump_history()
+        if self.dumping_enabled:
+            logger.info("Save history")
+            self._dump_history()

-        logger.info("Save weights")
-        self.model.save_weights(os.path.join(self.project_dir, "weights.h5"))
+            logger.info("Save weights")
+            self.model.save_weights(os.path.join(self.project_dir, "weights.h5"))

-        self.total_epochs += epochs
-        self._write_info("epochs", self.total_epochs)
+            self.total_epochs += epochs
+            self._write_info("epochs", self.total_epochs)

-        logger.info("Create/Update scores for ROC curve")
-        self.scores_test = self.model.predict(self.x_test)
-        self.scores_train = self.model.predict(self.x_train)
+            logger.info("Create/Update predictions for ROC curve")
+            self.pred_test = self.model.predict(self.x_test)
+            self.pred_train = self.model.predict(self.x_train)

-        self._dump_to_hdf5("scores_train", "scores_test")
+       
+        if self.dumping_enabled:
+            self._dump_to_hdf5("pred_train", "pred_test")



    def evaluate(self):
-        pass
+        logger.info("Get test loss and metrics of the model")
+        self.score = self.model.evaluate(self.x_test, self.y_test, verbose=0,
+                sample_weight=self.w_train)
+        
+        print('Test loss:', self.score[0])
+        print('Test accuracy:', self.score[1])
+        
+        #we do not care about keeping any of this in memory - 
+        #we just need to know the final scores and the architecture
+        K.clear_session()
+ 

    def write_friend_tree(self):
        pass
@@ -521,7 +544,7 @@ class KerasROOTClassification(object):
    def plot_ROC(self):

        logger.info("Plot ROC curve")
-        fpr, tpr, threshold = roc_curve(self.y_test, self.scores_test, sample_weight = self.w_test)
+        fpr, tpr, threshold = roc_curve(self.y_test, self.pred_test, sample_weight = self.w_test)

        fpr = 1.0 - fpr
        roc_auc = auc(tpr, fpr)
@@ -571,7 +594,10 @@ class KerasROOTClassification(object):
 def create_getter(dataset_name):
    def getx(self):
        if getattr(self, "_"+dataset_name) is None:
-            self._load_from_hdf5(dataset_name)
+            try:
+                self._load_from_hdf5(dataset_name)
+            except KeyError:
+                logger.info("KeyError")
        return getattr(self, "_"+dataset_name)
    return getx

@@ -580,11 +606,12 @@ def create_setter(dataset_name):
        setattr(self, "_"+dataset_name, value)
    return setx

+'''
 # define getters and setters for all datasets
 for dataset_name in KerasROOTClassification.dataset_names:
    setattr(KerasROOTClassification, dataset_name, property(create_getter(dataset_name),
                                                            create_setter(dataset_name)))
-
+'''

 if __name__ == "__main__":
No results found