/* The LSDGeneTreeGenerator generates gene tree in a specifed species tree
 * with consideration to large scale duplication (LSD) events. Trees are
 * generated by a birth-death process (gene evolution model) inside the
 * species tree. The LSD events are specified time points in the species tree
 * where there is a certain probability for the birth-death process to duplicate
 * while passing. The generator can also generate lengths and rates for a
 * given gene tree with node times.
 *
 * File:   LSDGeneTreeGenerator.hh
 * Author: fmattias
 *
 * Created on November 27, 2009, 4:02 PM
 */

#ifndef _LSDGENETREEGENERATOR_HH
#define	_LSDGENETREEGENERATOR_HH

#include <string>

#include "BirthDeathProbs.hh"
#include "Density2P.hh"
#include "PRNG.hh"
#include "Tree.hh"
#include "TreeDiscretizers.hh"
#include "Probability.hh"
#include "StrStrMap.hh"
#include "lsd/LSDTimeProbs.hh"

namespace beep {

// Constant for representing an extinct subtree (a null pointer)
static Node * const EXTINCT_SUBTREE = 0;
    
class LSDGeneTreeGenerator {
public:

    /**
     * Constructor
     *
     * Constructs an object for generating gene trees in the given species
     * tree according to the specified birth-death process. This generator
     * also considers large scale duplication events, where there is a
     * certain probability of duplication at a given time point.
     *
     * speciesTree - The species tree in which the gene trees will be generated.
     * birthRate - Birth rate of the birth-death process.
     * deathRate - Death rate of the birth-death process.
     * lsdTimes - The times of lsd events.
     */
    LSDGeneTreeGenerator(Tree &speciesTree,
                         Real birthRate,
                         Real deathRate,
                         LSDTimeProbs &lsdTimes);

    /**
     * Copy constructor
     *
     * Creates an object equal to the given object.
     */
    LSDGeneTreeGenerator(const LSDGeneTreeGenerator& other);

    /**
     * Destructor
     */
    virtual ~LSDGeneTreeGenerator();

    /**
     * generateTree
     *
     * Generates a gene tree by the birth-death process specified in the
     * constructor. The root of the gene tree will have an edge time specified
     * by topTime.
     *
     * NOTE: If topTime = 0.0 then the generation will start from the root.
     *
     * A generated tree will only be returned if it satisfies the maximum and
     * minimum number of leaves requirements. If a tree cannot be generated
     * in the specified maximum number of tries an error will be thrown.
     *
     * geneTree - The generated gene tree will be stored here.
     * topTime - The time of the top edge in the gene tree.
     */
    void generateTree(Tree &geneTree, Real topTime);

    /**
     * generateTree
     *
     * Generates a gene tree by the birth-death process specified in the
     * constructor. The root of the gene tree will have an edge time generated
     * by the birth death process.
     *
     * A generated tree will only be returned if it satisfies the maximum and
     * minimum number of leaves requirements. If a tree cannot be generated
     * in the specified maximum number of tries an error will be thrown.
     *
     * geneTree - The generated gene tree will be stored here.
     */
    void generateTree(Tree &geneTree);

   /**
     * generateRatesAndLengths
     *
     * Generates lengths and rates for the given gene tree (with specified
     * node times) according to the given rate density function.
     *
     * geneTree - The gene tree where lengths and rates are going to be
     *            generated.
     * rateDensity - The density function to generate rates from.
     */
    void generateRatesAndLengths(Tree &geneTree, Density2P &rateDensity);

    /**
     * exportGeneSpeciesMap
     *
     * Returns a gene species map for the _last_ generated tree.
     */
    StrStrMap exportGeneSpeciesMap();

    /**
     * getSpeciations
     *
     * Returns the speciation nodes for the _last_ generated tree.
     */
    std::vector<Node *> getSpeciations();

    /**
     * setSeed
     *
     * Sets the seed of the pseudo random generator.
     */
    void setSeed(unsigned long seed);

    /**
     * setMaxNumberOfTries
     *
     * Sets the maximum number of tries for generating a gene tree that
     * satisfies the min leaves and max leaves requirements.
     *
     * maxNumberOfTries - Maximum number of tries for generating a tree.
     *
     */
    void setMaxNumberOfTries(unsigned int maxNumberOfTries);

    /**
     * setMinNumberOfLeaves
     *
     * Sets the minimum number of leaves to be generated. I.e. the tree
     * generator will try to generate trees until the generated tree has
     * at least minNumberOfLeaves leaves.
     *
     * minNumberOfLeaves - The minimum number of leaves that the trees are
     *                     allowed to have.
     */
    void setMinNumberOfLeaves(unsigned int minNumberOfLeaves);

    /**
     * setMaxNumberOfLeaves
     *
     * Sets the maximum number of leaves to be generated. I.e. the tree
     * generator will try to generate trees until the generated tree has
     * at most maxNumberOfLeaves leaves.
     *
     * maxNumberOfLeaves - The maximum number of leaves that the trees are
     *                     allowed to have.
     */
    void setMaxNumberOfLeaves(unsigned int maxNumberOfLeaves);

private:
    void resetState();
    /**
     * Generates a gene tree topology in the given species tree, where the
     * first vertex starts at topTime. If no tree with at least two leaves
     * could be generated in MAX_TRIES tries an empty tree is returned.
     *
     * geneTree - The generated gene tree topology will be stored here.
     * speciesTree - The species tree where the topology will be generated.
     * topTime - Time of the top edge in the generated gene tree.
     */
    void generateTreeTopology(Tree &geneTree, Tree &speciesTree, Real topTime);

    /**
     * evolveInEdge
     *
     * Recursively generates a gene tree from in the subtree S^x starting from
     * edgeTimeLeft in x. This is done in the following procedure:
     *
     * 1. Generated edge time t
     * 2. if goes extinct in t return EXTINCT_SUBTREE
     * 3. if t is larger than edgeTimeLeft start a new process in the left
     *    and right child of x by calling evolveInEdge for the respective edge.
     * 4. otherwise start two new processes at edgeTimeLeft - t by calling
     *    evolveInEdge with x and edgeTimeLeft - t.
     *
     * geneTree - The gene tree that is being generated.
     * x - The edge in the species tree where the birth death process is
     *     currently evolving.
     * edgeTimeLeft - Time left on x for the birth-death process to evolve.
     */
    Node * evolveInEdge(Tree &geneTree, Node *x, Real edgeTimeLeft);

    /**
     * createSubTree
     *
     * Creates a subtree from leftSubTree and rightSubTree depending on if
     * they are extinct or not, the following cases are handled:
     *
     * 1. leftSubTree and rightSubTree are not extinct, their root is created
     *    and the node time of the root is set to nodeTime
     * 2. leftSubTree or rightSubTree is extinct, the non-extinct subTree is
     *    returned and no root is created (nodeTime is not used).
     * 3. Both subtrees are extinct, EXTINCT_SUBTREE is returned.
     *
     * geneTree - Gene tree in which the subtrees are being generated.
     * leftSubTree - Left part of the generated tree.
     * rightSubTree - Right part of the generated tree.
     * nodeTime - Time of the root node created if the left and right subtrees
     *            are not extinct.
     *
     */
    Node * createSubTree(Tree &geneTree, Node *leftSubTree, Node *rightSubTree, Real nodeTime);

    /**
     * setNodeTime
     *
     * Sets the node time of a _generated_ node.
     *
     * u - Node in the _generated_ gene tree.
     */
    void setNodeTime(Node *u, Real nodeTime);

    /**
     * getNodeTime
     *
     * Returns the node time for a _generated_ node that has been specified
     * earlier.
     *
     * u - Node in the _generated_ gene tree.
     */
    Real getNodeTime(Node *u);

    /**
     * setNodeTimes
     *
     * Assigns the node times stored in this class to the gene tree.
     *
     * geneTree - The gene tree which the node times should be assigned to.
     */
    void setNodeTimes(Tree &geneTree);

    /**
     * getLeafName
     *
     * Generates a new leaf name.
     */
    std::string getLeafName();

    /**
     * sampleTimeToDuplication
     *
     * Returns a new edge time based on the parameters for the birth-death
     * process.
     */
    Real sampleTimeToDuplication();

    /**
     * becomesExtinctIn
     *
     * Returns true if a lineage should go extinct in the specified time.
     *
     * edgeTime - The time in which the process may become extinct.
     */
    bool becomesExtinctIn(Real edgeTime);

    /**
     * overDuplication
     *
     * Returns true if the generated edge time passes a LSD associated with
     * the given edge in S.
     *
     * x - The edge the may contain an LSD.
     * timeLeft - The time left for the birth-death process in x.
     * edgetime - The generated edgeTime.
     */
    bool overDuplication(Node *x, Real timeLeft, Real edgeTime);

    /**
     * timeToLSD
     *
     * Returns the distance to the LSD on the given edge x.
     *
     * x - Edge in the species tree where edgeTime has been generated
     * timeLeft - The time left for the birth-death process in x.
     */
    Real timeToLSD(Node *x, Real timeLeft);

    /**
     * generateLSD
     *
     * Returns true if a LSD duplication should occur at x.
     *
     * x - The edge which has an associated LSD probability.
     */
    bool generateLSD(Node *x);

    // Random generator
    PRNG m_randomGenerator;

    // Associated probabilities of the birth-death process
    BirthDeathProbs m_birthDeathModel;

    // Current time of each node
    std::map<Node*, Real> m_nodeTimes;

    // Current gene species map
    StrStrMap m_geneSpeciesMap;

    // Time of LSD duplication on an edge in the species tree
    LSDTimeProbs &m_lsdTimes;

    // The speciations of the last generated tree
    std::vector<Node *> m_speciations;

    // Used for generating unique names for the leaves
    unsigned int m_currentGeneID;

    // Max number of tries before giving up
    unsigned int m_maxTries;

    // Minimum number of leaves
    unsigned int m_minNumberOfLeaves;

    // Maximum number of leaves
    unsigned int m_maxNumberOfLeaves;

    // Maximum number of tries before giving up on generating a gene tree with
    // at least two leaves with the given parameters
    static const unsigned int DEFAULT_MAX_TRIES = 500;

    // The default maximum number of leaves in a generated tree
    static const unsigned int DEFAULT_MAX_NUMBER_OF_LEAVES = 500;

    // The default minimum number of leaves in a generated tree.
    static const unsigned int DEFAULT_MIN_NUMBER_OF_LEAVES = 2;
};

}

#endif	/* _LSDGENETREEGENERATOR_HH */

