#include "format_helper_in.h"

using namespace std;
using namespace boost;

namespace papi {
    
    void skipCsvHeader(CsvInStream &csv) {
        string s;
        do {
            s = csv.getNextCell();
            csv.skipLine();
        } while(s.compare("content_type") != 0);
    }

    
    void ParameterParser::start() {
        for(unordered_map<string, ParserFunction*>::iterator it=requests.begin();
            it != requests.end();++it) {
            ParserFunction &fct = *(it->second);
            if(fct.file_stream.is_open()) {
                CsvInStream csv(&fct.file_stream);
                skipCsvHeader(csv);
                fct(fct.file_stream);
                delete it->second;
                requests.erase(it);
            }
        }
        CsvInStream csv(&cin);
        while(csv.good() && !requests.empty()) {
            string s = csv.getNextCell();
            if(s.compare("content_type")==0) {
                string content_type = csv.getNextCell();
                for(unordered_map<string, ParserFunction*>::iterator it=requests.begin();
                    it != requests.end();++it) {
                    if(content_type.compare(it->first)==0) {
                        ParserFunction &fct = *(it->second);
                        fct(cin);
                        delete it->second;
                        requests.erase(it);
                        break;
                    }
                }
            }
        }
    }
    
    void ParameterParser::requestCharacterDistribution(const char *file_name,
                                     IndexMap<char> &ind_map,
                                     double *&distribution) {
        requests["character_distribution"] = new ParseCharacterDistribution(file_name, 
                                                                          ind_map, 
                                                                          distribution);        
    }
    void ParameterParser::requestRepeatMachine(const char *file_name,
                                               const char *machine_name,
                                               double &prob_start,
                                               double &prob_end,
                                               double &prob_match,
                                               double &prob_change,
                                               double &prob_insert,
                                               double &prob_delete,
                                               char **complement) {
        requests[machine_name] = new ParseRepeatMachine(file_name, 
                                                        prob_start, 
                                                        prob_end,
                                                        prob_match,
                                                        prob_change,
                                                        prob_insert, 
                                                        prob_delete,
                                                        complement);
        
    }
    void ParameterParser::requestAutocorrelationDar(const char *file_name,
                                                    long double *&autocorrelation_dar,
                                                    int &order) {
        requests["autocorrelation_dar"] = new ParseAutocorrelationDar(file_name, 
                                                                      autocorrelation_dar,
                                                                      order
                                                                      );
    }

    
    void ParameterParser::requestConditionalDistribution(const char *file_name,
                                                         IndexMap<char> &ind_map, 
                                                         int &cp_order, 
                                                         NDimMat<double *> *&conditional_probability,
                                                         char *&start_word) {
        requests["qgram_distribution"] = new ParseConditionalDistribution(file_name, ind_map, 
                                                     cp_order, conditional_probability, start_word);
                        
    }
    
    ParameterParser::ParserFunction::ParserFunction(const char *file_name){
        if(file_name && strlen(file_name) > 0) {
            file_stream.open(file_name);
            if(!file_stream.is_open())
            {
                cout << "Could not open file " << file_name <<endl;
                exit(EXIT_FAILURE);
            }
            CsvInStream csv(&file_stream);
            skipCsvHeader(csv);
        }
    }
    
    ParameterParser::ParserFunction::~ParserFunction() {
        if(file_stream.is_open()) {
            file_stream.close();
        }
    }
    
    ParameterParser::ParseCharacterDistribution::ParseCharacterDistribution (const char *file_name,
                                                                           IndexMap<char> &ind_map,
                                                                           double *&distribution)
        :ParameterParser::ParserFunction(file_name), ind_map(ind_map), distribution(distribution){
    }
    
    void ParameterParser::ParseCharacterDistribution::operator() (istream &stream) {
        CsvInStream csv(&stream);
        string t = csv.getNextCell();
        if(t.compare("alphabet_size")!=0) {
            cerr << "ERROR: alphabet_size information missing" << endl;
            exit(EXIT_FAILURE);
        }
        int size_alphabet = csv.getNextInt();
        if(size_alphabet<=0)
        {
            cerr << "ERROR: Alphabet size must be greater than zero." <<endl;
            exit(EXIT_FAILURE);
        }
        
        csv.skipLine();
        
        distribution = new double[size_alphabet];
        double checksum = 0;
        for(int i=0;i<size_alphabet;++i)
        {
            char c = csv.getNextCell()[0];
            double p = csv.getNextDouble();    
            int index = (int)ind_map.addIndex(c);
            if(index>=size_alphabet)
            {
                cerr<<"ERROR: Incompatible files: Probability and conditional probability have different alphabets."<<endl;
                exit(EXIT_FAILURE);
            }
            distribution[index] =p;
            checksum+=p;
            
        }
        
        if(checksum < 0.99999 || checksum>1.00001)
        {
            cerr << "ERROR: Character probabilities do not sum up to 1: "<< checksum << endl;
            exit(EXIT_FAILURE);
        }

    }
                                                                        
    
    ParameterParser::ParseConditionalDistribution::ParseConditionalDistribution(
                                     const char *file_name, IndexMap<char> &ind_map, 
                                     int &cp_order, NDimMat<double *> *&conditional_probability,
                                     char *&start_word): 
    ParameterParser::ParserFunction(file_name), ind_map(ind_map), cp_order(cp_order), 
    conditional_probability(conditional_probability), start_word(start_word) {
    }
    
    void ParameterParser::ParseConditionalDistribution::operator() (istream &stream) {
        CsvInStream csv(&stream);
        RandomDouble rand(0,1);
        
        double p_start = rand.generate();
        int alphabet_size = -1;
        cp_order = -1;
        for(int i=0;i<2;++i) {
            string s = csv.getNextCell();
            if(s.compare("qgram_length")==0) {
                cp_order = csv.getNextInt() - 1;
            } else if(s.compare("alphabet_size")==0) {
                alphabet_size = csv.getNextInt();
            }
        }
        
        if(alphabet_size == -1) {
            cerr << "Parameter qgram_distribution: alphabet_size not specified" <<endl;
            exit(EXIT_FAILURE);
        }
        if(cp_order == -1) {
            cerr << "Parameter qgram_distribution: qgram_length not specified" <<endl;
            exit(EXIT_FAILURE);
        }
        
        int *word_buf = new int[cp_order+1];
        //        memset(word_buf,0,sizeof(int)*(cp_order+1));
        conditional_probability = new NDimMat<double *>(cp_order,alphabet_size,0);
        start_word = new char[cp_order];
        bool start_word_isset = false;
        
        csv.skipLine();
        while(csv.good())
        {
            string w = csv.getNextCell();
            if(w.empty()) {
                break; 
            }
            for(int i=0;i<=cp_order;++i)
            {
                word_buf[i] = ind_map.addIndex(w[i]);
            }
            double prob = csv.getNextDouble();
            if(prob<p_start)
            {
                if(start_word != NULL) {
                    for(int i=0;i<cp_order;++i) {
                        start_word[i] = ind_map.getValue(word_buf[i]);
                        start_word_isset = true;
                    }
                }
                p_start = 0;
            }
            else {
                p_start-=prob;
            }
            
            double **a = &(conditional_probability->get(word_buf));
            if(!(*a))
                *a = new double[alphabet_size];
            
            (*a)[word_buf[cp_order]] = prob;
        }
        if(start_word != NULL && !start_word_isset) {
            for(int i=0;i<cp_order;++i) {
                start_word[i] = ind_map.getValue(word_buf[i]);
            }
        }
        if(word_buf) 
            delete [] word_buf;
    }
    
    ParameterParser::ParseAutocorrelationDar::ParseAutocorrelationDar(const char *file_name,
                                                                      long double *&autocorrelation_dar,
                                                                      int &order):
    ParameterParser::ParserFunction(file_name), autocorrelation_dar(autocorrelation_dar),
    order(order){
    }
    
    void ParameterParser::ParseAutocorrelationDar::operator() (istream &stream) {
        CsvInStream csv(&stream);
        string t = csv.getNextCell();
        if(t.compare("number_entries")!=0) {
            cerr << "ERROR: number_entries missing" << endl;
            exit(EXIT_FAILURE);
        }
        order = csv.getNextInt() -1 ;
        
        autocorrelation_dar = new long double[order + 1];
        
        for(int i=0;i<=order;++i) {
            autocorrelation_dar[csv.getNextInt()] = csv.getNextDouble();
        }
    }
    
    ParameterParser::ParseRepeatMachine::ParseRepeatMachine(const char *file_name,
                                                            double &prob_start,
                                                            double &prob_end,
                                                            double &prob_match,
                                                            double &prob_change,
                                                            double &prob_insert,
                                                            double &prob_delete,
                                                            char **complement):
    ParameterParser::ParserFunction(file_name),prob_start(prob_start),
    prob_end(prob_end), prob_match(prob_match), prob_change(prob_change),
    prob_insert(prob_insert), prob_delete(prob_delete), complement(complement) {
        prob_start = prob_end = prob_match = prob_change = prob_delete = prob_insert = -1;
    }
    
    void ParameterParser::ParseRepeatMachine::operator() (istream &stream) {
        CsvInStream csv(&stream);
        string t;
        if(complement != NULL) {
            t = csv.getNextCell();
            if(t.compare("alphabet_size") != 0) {
                cerr << "ERROR: alphabet_size not specified" << endl;
                exit(EXIT_FAILURE);
            }
            int size = csv.getNextInt();
            *complement = new char[size];
            t = csv.getNextCell();
            if(t.compare("inverted_alphabet") != 0) {
                cerr << "ERROR: inverted_alphabet not specified" << endl;
                exit(EXIT_FAILURE);
            }
            for(int i=0; i<size; ++i) {
                unsigned char c = csv.getNextCell()[0];
                unsigned char c_compl = csv.getNextCell()[0];
                (*complement)[c] = c_compl;
            }
        }
        t = csv.getNextCell();
        if(t.compare("transition_probabilities") != 0) {
            cerr << "ERROR: transition_probabilities not specified" << endl;
            exit(EXIT_FAILURE);
        }
        int num_entries = csv.getNextInt();

        for(int i=0; i < num_entries; ++i){
            t = csv.getNextCell();
            if(t.compare("Start")==0)
            {
                prob_start = csv.getNextDouble();
            }
            else if(t.compare("End")==0)
            {
                if(prob_end == -1) {
                    prob_end = csv.getNextDouble();
                } else {
                    double j = 1 - prob_end + csv.getNextDouble();
                    if( j < 0.99999 || j > 1.00001 ) {
                        cerr << "ERROR in repeat machine file: ";
                        cerr << "Continue and End probabilities do not sum up to 1" << endl;
                    }
                }
            }
            else if(t.compare("Continue")==0) {
                if(prob_end == -1) {
                    prob_end = 1 - csv.getNextDouble();
                } else {
                    double j = prob_end + csv.getNextDouble();
                    if( j < 0.99999 || j > 1.00001 ) {
                        cerr << "ERROR in repeat machine file: ";
                        cerr << "Continue and End probabilities do not sum up to 1" << endl;
                    }
                }
            }
            else if(t.compare("Match")==0)
            {
                prob_match = csv.getNextDouble();
            }
            else if(t.compare("Change")==0)
            {
                prob_change = csv.getNextDouble();
            }
            else if(t.compare("Indel")==0)
            {
                prob_insert = prob_delete = csv.getNextDouble();
            }
            else if(!t.empty())
            {
                cerr << "ERROR: Illegal keyword in repeat machine parameter file: "<<t<<endl;
                exit(EXIT_FAILURE);
            }

        }
        
        if(prob_start == -1 || prob_end == -1  || prob_match == -1 || prob_change == -1 || prob_insert == -1 || prob_delete == -1)
        {
            cerr << "ERROR: Repeat machine initialization: Parameter(s) missing" <<endl;
            exit(EXIT_FAILURE);
        }
        
    }


    
}
