#ifndef _repeat_machine_h_
#define _repeat_machine_h_

#include <cstring>
#include <list>
#include <string>

#include "hash_map.h"


#define START 0 
#define END 1
#define CONTINUE 2
#define MATCH 3
#define CHANGE 4
#define INDEL 5

#define DIRECT_REPEAT 0
#define INVERTED_REPEAT 1
#define MIRROR_REPEAT 2

#define NUM_COUNTS 6
#define NUM_MACHINES 3

#define FORWARD true
#define BACKWARD false

using namespace std;

namespace papi
{
    /**
     * @class RepeatMachine
     * 
     * Base class for a repeat machine in the repeat graph
     */
    class RepeatMachine
    {
    public:
        struct State;
        struct Region;
        
        /**
         * Constructor
         * 
         * @param type_id Type ID of the repeat machine
         * @param word_pos_map Map of all qgram positions
         * @param minimum_region_size Size of the region which is activated around relevant states
         * @param minimum_hit_length Mindest length of an exact repeat that can activate a region
         * @param minimum_relative_probability States with probability lower than this become insignificant
         * @param prob_char Character distribution
         * @param prob_markov Conditional character distribution (markov transition probabilities)
         * @param markov_order Order of the markov model
         */
        RepeatMachine(
                      int type_id,
                      unsigned_cstring_fixed_length_list_long_long_hash_map *word_pos_map, 
                      long minimum_region_size, 
                      long minimum_hit_length,
                      float minimum_relative_probability, 
                      float *prob_char,
                      unsigned_cstring_fixed_length_ptr_float_hash_map *prob_markov, 
                      int markov_order);
        
        /**
         * Destructor
         */
        virtual ~RepeatMachine();

        /**
         * Add new repeat start states in the active regions and merge them with the corresponding internal states of the repeat graph 
         *
         * @param[in] baseState Current base state of the repeat graph
         * @param[in] pos Current buffer position
         */
        virtual void mergeWithNewStartStates(State& baseState,long long pos) = 0;
        
        /**
         * Finish calculations in current row and traverse repeat end edges to the base state
         *
         * @param[in,out] baseState Base state of the current row
         */
        virtual void finishRow(State& baseState) = 0;
        
        /**
         * Update state list with current active regions
         */
        void updateStateList();
        
        /**
         * Update the age of the active regions
         */
        void updateAge(); 
        
        /**
         * Find exact matching which are at least as long as the minimum hit length and activate corresponding regions
         * @param[in] buffer Text buffer
         * @param[in] pos Current position in the buffer
         */
        virtual void determineNewRegions(const unsigned char *buffer,long long pos) = 0;
        
        /**
         * Calculate next row of the repeat graph by traversing the edges. update base state
         *
         * @param[in,out] baseState Base state of the current row -> base state of the next row
         * @param[in] buffer Text buffer
         * @param[in] pos Position in the buffer
         */
        virtual void calcNextRowAndUpdateBaseState(State & baseState, const unsigned char* buffer, long long pos) = 0;
        
        /**
         * Deactivate insignificant old regions and let remaining active regions grow in repeat direction
         */
        virtual void updateOldRegions()= 0;
        
        /**
         * Activate all states of the current row. Use for full algorithm.
         *
         * @param[in] pos Current buffer position
         */
        virtual void addCompleteRegion(long long pos) = 0;
        
        
        /**
         * Normalize state probabilities relatively to the base state
         *
         * @param[in] baseValue Probability of the base state
         */
        void normalize(float baseValue);
        
        /**
         * Reestimate repeat machine parameters after iteration of the EM algorithm
         *
         * @param[in] baseState Base state of the last row and therefore the end state  of the graph
         */
        void reestimateParameters(State &baseState);
        
        /**
         * Merge new regions and old regions
         */
        void mergeRegions();
        
        /**
         * Finish calculations of the last row in the repeat graph before the iteration ends and calculate end state
         * 
         * @param[in,out] baseState Basiszustand, der nach den Abschlussberechnungen der Endzustand des Repeatgraphen ist.
         */
        void finishLastRow(State& baseState);
        
        /// Type id of the repeat machine
        int type_id;
        
        /**
         * Transition probabilities within the repeat graph (repeat machine parameters):
         * - Start
         * - End
         * - Continue
         * - Match
         * - Change
         * - Indel (in this model implementation insert and delete are symmetric and have the same probabilities)
         */
        float *prob;
        
    protected:
        /// Current active regions
        list<Region> *active_regions;
        
        /// Saves old active regions temporarily before replacing with new active regions
        list<Region> *active_regions_save;
        
        /// Newly activated regions by exact matching
        list<Region> new_hit_regions;
        
        /**
         * List off all active states in all active regions
         * The states are stored in the correct order.
         * At the end of the list unused states are saved which can be used in the future
         * in order to decrease constructor and destructor costs.
         */
        list<State> states;
        
        ///Delimiting iterator separating active states from unused states
        list<State>::iterator unused_states;
        
        ///number of unused states
        int num_unused_states;
        
        /// Order of the underlying markov model
        int markov_order;
        
        /// Conditional distribution (markov chain transitions)
        unsigned_cstring_fixed_length_ptr_float_hash_map *prob_markov;
        
        /// Character distribution
        float *prob_char;
        
        /// Size of the region which is activated left and right of an activated state
        long minimum_region_size;

        /// Minimum lenght of an exact repeat to activate a region
        long minimum_hit_length;
        
        /// States with probabilities below the threshold become insignificant
        float minimum_relative_probability;
        
        ///Tabelle aller Wortpositionen
        unsigned_cstring_fixed_length_list_long_long_hash_map *word_pos_map;
            
        /**
         * Helper function for derived classes in order to calculate #finishRow
         *
         * @param[in,out] baseState Base state of the current row
         * @param[in] dir Direction of the repeat (forward or backward)
         */
        void finishRowDir(State& baseState, bool dir);
        
        /**
         * Helper function for derived classes in order to calculate #mergeWithNewStartStates
         *
         * @param[in,out] baseState Base state of the current row
         * @param[in] pos Current position in the buffer
         * @param[in] direction Direction of the repeat (forward or backward)
         */
        void mergeWithNewStartStatesDir(State& baseState,long long pos,bool direction);
        
        /**
         * Helper function for derived classes in order to calculate #updateOldRegions
         *
         * @param[in] dir Direction of the repeat (forward or backward)
         */
        void updateOldRegionsDir(bool dir);
        
        /**
         * Calculate probability of single character at certain position if another character is excluded from the alternatives
         *
         * @param[in] buffer The text buffer
         * @param[in] pos Position of the character in the buffer
         * @param[in] excluding_char Excluded character
         * @return Probability of the character at the given position provided excluded_char is excluded
         */
        float getProbCharNormalized(const unsigned char* buffer, long long pos,int excluding_char);
        
        /**
         * Helper function for derived classes in order to calculate #addCompleteRegion
         *
         * @param[in] pos Current position in the buffer
         * @param[in] dir Direction of the repeat (forward or backward)
         */
        void addCompleteRegionDir(long long pos,bool dir);
        
        /**
         * Insert states at a given position in the state list. First unused states are
         * used. If not enough new states are created.
         *
         * @param[in,out] pos Position at which to insert the states
         * @param[in] num Number of states to insert
         */
        void insertStates(list<State>::iterator pos,int num);
    
        /**
         * Remove states from the state list.
         *
         * Removed states are put at the end of the list as unused states.
         *
         * @param[in,out] pos Start position for removal
         * @param[in] num Number of states to remove
         * @return position after the last removed state
         */
        list<State>::iterator removeStates(list<State>::iterator pos,int num);


    } ;

    /**
     * @struct RepeatMachine::State
     *
     * Represents a node in the repeat graph
     */
    struct RepeatMachine::State
    {
        /// Probability of the state as the sum of all weighted paths to this state
        float prob;
        
        /**
         * Weighted counts of repeat machinetransitions of all paths that lead to this state
         *
         * Transitions:
         * - Start
         * - End
         * - Continue
         * - Match
         * - Change
         * - Indel
         */
        float counts[NUM_MACHINES][NUM_COUNTS];
        
        /// Weighted counts of markov chain transitions outside of repeats in all paths that lead to the state
        float count_random;
        
        /**
         * Constructor
         */
        State():
        prob(0),count_random(0)
        {
            memset(counts,0,sizeof(counts));
        }
        
        /**
         * Tests for equality of states
         *
         * @param[in] rhs The other state to compare to
         * @return true, iff equal.
         */
        State & operator=(const State &rhs);		
        
        /**
         * Follow a transition and update the state at the end of the transition
         *
         * @param[in] s The state at the end of the transition
         * @param[in] edge_prob Weight of the transition (probability of the transition)
         * @param[in] edge_type Transition type
         * @param[in] machine_id Type id of the repeat machine whose transition this is
         */
        void merge(const State &s, float edge_prob, int edge_type, int machine_id);
        
        /**
         * Merge two states and update probabilities and counts
         *
         * @param[in] s The state to merge
         */
        void merge(const State &s);
        
        /**
         * Reinitialize state
         */
        void reset();
    } ;

    /**
     * @class RepeatMachine::Region
     *
     * Represents a region in the repeat graph consisting of neighbouring states in the same row
     */
    struct RepeatMachine::Region
    {
        /// start index of the region in the row (inclusive)
        long long start;
        /// end index of the region in the row (inclusive)
        long long end; 
        /// age of the regionn
        int age;
        
        /**
         * Constructor
         *
         * @param[in] start start index
         * @param[in] end end index
         * @param[in] age initial age
         */
        Region(long long start,long long end, int age):
        start(start),end(end),age(age)
        {}
        
        /**
         * This operator compares the start positions of the regions
         *
         * @param[in] r The region to compare to
         */
        bool operator<(Region &r)
        {
            return start < r.start;
        }
    };
        
	
}

#endif

