#ifndef _approximate_repeats_h_
#define _approximate_repeats_h_

#include <algorithm>
#include <iostream>
#include <iterator>
#include <list>
#include <string>

#include "analyze_setting.h"
#include "csv_out_stream.h"
#include "direct_repeat_machine.h"
#include "format_helper_out.h"
#include "inverted_repeat_machine.h"
#include "mirror_repeat_machine.h"
#include "module.h"
#include "repeat_machine.h"
#include "stack_fixed_size.h"


#define START_STRING "Start"
#define END_STRING "End"
#define CONTINUE_STRING "Continue"
#define INDEL_STRING "Indel"
#define MATCH_STRING "Match"
#define CHANGE_STRING "Change"


namespace papi
{
    /**
     * @class ApproximateRepeats
     *
     * Module to estimate the parameters of the repeat model according to Allison
     * et. al (1998): Compression of Strings with Approximate Repeats in
     * Intelligent Systems in Mol. Biol. ISMB98, Montreal, pp8–16. with speedups applied.
     */
    class ApproximateRepeats : public Module
    {

    protected:
        
        /// Mapping from parameter id to parameter name
        const char *string_params[NUM_COUNTS];
        
        /// module name
        std::string module_id;
        
        /// Minimum exact repeat length to activate a region. Only active regions are considered in calculations.
        long minimum_hit_length;
        
        /// Distribution of characters
        float *prob_char;
        
        /// Conditional distribution of characters (markov chain)
        unsigned_cstring_fixed_length_ptr_float_hash_map *prob_markov;
        
        /// order of the underlying markov chain
        int markov_order;
        
        /// probability that no repeat starts here
        float prob_no_start;
        
        /// Positions of all qgrams
        unsigned_cstring_fixed_length_list_long_long_hash_map *word_pos_map;
        
        /// Index map
        const IndexMap<char> *ind_map;
        
        /// Active repeat machines
        list<RepeatMachine*> repeatMachines;
        
        /// base state. all repeats begin and end in the base state
        RepeatMachine::State baseState;
        
        /// disable inverted repeat
        bool disable_inverted_repeat;
        
        /// disable direct repeat
        bool disable_direct_repeat;
        
        /// disable mirror repeat
        bool disable_mirror_repeat;

        /// disable character distribution
        bool disable_character_distribution;
        
        /// disable qgram distribution
        bool disable_qgram_distribution;
              
        /// Maximum number of iterations for the EM algorithm
        int iterations;

        ///Save the last k characters in reversed order
        StackFixedSizeRandomAccess<unsigned char> *mirrored_last_k;
        
        ///Save the last k characters in reversed order and all characters inverted
        StackFixedSizeRandomAccess<unsigned char> *inverted_last_k;
        
        /// Complement for each character
        unsigned char *complement;
        
        /// Concatenation of last and first word to get irreducible markov chains when estimating conditional probabilities
        unsigned char *tail;
        
        /// Buffer of the text (indices of IndexMap instead of characters)
        unsigned char const *buffer;
        
        /**
         * Calculate probability of single character at certain position if another character is excluded from the alternatives
         *
         * @param[in] buffer The text buffer
         * @param[in] pos Position of the character in the buffer
         * @param[in] excluding_char Excluded character
         * @return Probability of the character at the given position provided excluded_char is excluded
         */
        float getProbCharNormalized(const unsigned char* buffer, long long pos,int excluding_char);
        
        /**
         * Initialize csv stream for repeat iterations files, write headers and start parameters.
         *
         * @param[in] directory Output directory
         * @param[in] file_id File ID
         * @param[in] file_path Path to input file
         * @param[in] file_length File length
         * @param[in] oFile_direct_repeat stream to direct repeat output
         * @param[in] oFile_mirror_repeat stream to mirror repeat output
         * @param[in] oFile_inverted_repeat  stream to inverted repeat output
         * @param[in,out] csv_direct_repeat csv stream to direct repeat output
         * @param[in,out] csv_mirror_repeat csv stream to mirror repeat output
         * @param[in,out] csv_inverted_repeat csv stream to inverted repeat output
         */
        void initOutputIterations(string & directory, long file_id, string & file_path, long long file_length, 
                                  ofstream & oFile_direct_repeat, ofstream & oFile_mirror_repeat, ofstream & oFile_inverted_repeat,
                                  CsvOutStream & csv_direct_repeat, CsvOutStream & csv_mirror_repeat, CsvOutStream & csv_inverted_repeat);
        
        
        /**
         * Write output except repeat iterations files.
         *
         * @param[in] directory Output directory
         * @param[in] write_stdout True, if printing to stdout
         * @param[in] file_id File ID
         * @param[in] file_path Path to input file
         * @param[in] file_length File length
         */
        void writeOutput(string & directory, bool write_stdout, long file_id, string file_path, long long file_length);
        
        /**
         * Write single iteration into repeat iterations files
         *
         * @param[in] n Current iteration
         * @param[in] csv_direct_repeat Csv stream to direct repeat iterations file
         * @param[in] csv_mirror_repeat Csv stream to mirror repeat iterations file
         * @param[in] csv_inverted_repeat Csv stream to inverted repeat iterations file
         */
        void writeIteration(int n,CsvOutStream & csv_direct_repeat, CsvOutStream & csv_mirror_repeat, CsvOutStream & csv_inverted_repeat);

         /**
          * Finishing calculations of an iteration and parameter erstimation
          */
        void finishIteration();
        
        /**
         * Pre-calculations such as character distribution and word position table.
         * Also writes qgram distribution file
         *
         * @param[in] bufSize Size of the buffer
         * @param[in] directory Output directory
         * @param[in] write_stdout True, if printing to stdout
         * @param[in] file_id File ID
         * @param[in] file_path Path to input file
         * @param[in] file_length File length
         */
        void preprocessing(long long bufSize, string directory, bool write_stdout, long file_id, string file_path, long long file_length);
        
        /**
         * Process single character
         *
         * @param[in] pos Position of the character in the buffer
         * @param[in] bufSize Buffer size
         * @param[in] noHit If true, no regions are activated by exact matching. If false exact matching with mindest length activate the region.
         */
        void processCharacter(long long pos, long long bufSize, bool noHit);
        
        /**
         * Initialize the iteration
         */
        void initIteration();

    public:
        /**
         * Constructor
         *
         * @param[in] name Module name
         */
        ApproximateRepeats( char const* name);
        
        /**
         * Destructor
         */
        ~ApproximateRepeats();
        
        /// Overrides Module::init(AnalyzeSetting & settings, long long file_length,const  IndexMap<char> *ind_map)
        void init(AnalyzeSetting & settings, long long file_length,const  IndexMap<char> *ind_map);
        /// Overrides Module::process(long long bufSize,unsigned char const* buffer,std::string directory, bool write_stdout, long file_id,std::string file_path,long long file_length)
        void process(long long bufSize,unsigned char const* buffer,std::string directory, bool write_stdout, long file_id,std::string file_path,long long file_length);
        /// Overrides Module::getId()
        const std::string getId();
    };
	
}

#endif

