// ==========================================================================
//                             find_index_approx
// ==========================================================================
// Copyright (c) 2006-2011, Knut Reinert, FU Berlin
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
//     * Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above copyright
//       notice, this list of conditions and the following disclaimer in the
//       documentation and/or other materials provided with the distribution.
//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
//       its contributors may be used to endorse or promote products derived
//       from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
// DAMAGE.
//
// ==========================================================================
// Author: Johannes Krugel <krugel@in.tum.de>
// ==========================================================================

// This file contains helper functions to read text and FASTA files with plain or UTF-8 encoding

#ifndef SANDBOX_TUM_INCLUDE_SEQAN_FIND_INDEX_APPROX_FILE_READ_TEXT_H_
#define SANDBOX_TUM_INCLUDE_SEQAN_FIND_INDEX_APPROX_FILE_READ_TEXT_H_

// TODO(krugel) Switch depending on whether Boost or C++11 or system locale is available
#define BOOST_UTF8_BEGIN_NAMESPACE namespace seqan { namespace boost_utf8 {
#define BOOST_UTF8_END_NAMESPACE } }
#define BOOST_UTF8_DECL
#include "utf8_codecvt_facet.cpp"

using namespace seqan;

namespace seqan {

// ============================================================================
// Tags, Classes, Enums
// ============================================================================

struct EncodingDirect_;
struct EncodingUtf8_;

typedef Tag<EncodingDirect_> const              EncodingDirect;
typedef Tag<EncodingUtf8_> const                EncodingUtf8;


// ============================================================================
// Functions
// ============================================================================

// ----------------------------------------------------------------------------
// readTextFile(Raw, EncodingDirect)
// ----------------------------------------------------------------------------

/*
    Example:
    
    String<wchar_t> txt;
    readTextFile(argv[1], txt, "raw", "utf-8");
    std::wcout << length(txt) << std::endl;
    
    String<Dna5> txt;
    readTextFile(argv[1], txt, Fasta(), EncodingDirect());
    std::wcout << length(txt) << std::endl;
*/

template <typename TFilenameString, typename TText, typename TChar, typename TSpec>
inline bool
readTextFile(TFilenameString const & filename, TText & txt, String<TChar, TSpec> format, String<TChar, TSpec> encoding) {
    if (format == "raw") {
        return readTextFile(filename, txt, Raw(), encoding);
    } else if (format == "fasta") {
        return readTextFile(filename, txt, Fasta(), encoding);
    } else {
        return false;
    }
}

template <typename TFilenameString, typename TText, typename TFileFormat, typename TChar, typename TSpec>
inline bool
readTextFile(TFilenameString const & filename, TText & txt, TFileFormat format, String<TChar, TSpec> encoding) {
    if (encoding == "direct") {
        return readTextFile(filename, txt, format, EncodingDirect());
    } else if (encoding == "utf-8") {
        return readTextFile(filename, txt, format, EncodingUtf8());
    } else {
        return false;
    }
}

template <typename TFilenameString, typename TChar, typename TTextSpec>
inline bool
readTextFile(TFilenameString const & filename, String<TChar, TTextSpec> & txt, Raw, EncodingDirect) {
    typedef String<char, MMap<> >                          TFileString;
    
    TFileString mmapString;
    if (!open(mmapString, filename, OPEN_RDONLY)) {
        return false;  // Could not open file.
    }
    txt = mmapString;

    //std::fstream inStream(filename, std::ios::in | std::ios::binary);
    //if (!inStream.good()) {
    //    std::cerr << "Could not open file." << std::endl;
    //    return false;
    //}
    //
    //std::string line;
    //while(std::getline(inStream, line)) {
    //    append(txt, line);
    //    append(txt, "\n");
    //}
    return true;
}

// Additional specialization for String<bool>, because at the moment conversion is not done automatically
template <typename TFilenameString, typename TTextSpec>
inline bool
readTextFile(TFilenameString const & filename, String<bool, TTextSpec> & txt, Raw, EncodingDirect) {
    typedef String<char, MMap<> >                          TFileString;
    typedef String<bool, TTextSpec>                        TText;
    typedef typename Iterator<TFileString>::Type           TFileIter;
    typedef typename Iterator<TText>::Type                 TTextIter;

    TFileString mmapString;
    if (!open(mmapString, filename, OPEN_RDONLY)) {
        return false;  // Could not open file.
    }

    resize(txt, length(mmapString), Exact());
    TTextIter it2 = begin(txt);
    for (TFileIter it1 = begin(mmapString); it1 != end(mmapString); ++it1, ++it2) {
        value(it2) = (value(it1) == '1') ? true : false;
    }

    return true;
}

// ----------------------------------------------------------------------------
// readTextFile(Raw, EncodingUtf8)
// ----------------------------------------------------------------------------

template <typename TFilenameString, typename TText>
inline bool
readTextFile(TFilenameString const & filename, TText & txt, Raw, EncodingUtf8) {
    std::wfstream inStream(filename, std::ios::in | std::ios::binary);
    if (!inStream.good()) {
        return false;
    }

    // TODO(krugel) Switch depending on whether Boost or C++11 or system locale is available
    
    // Set UTF-8 locale...
    // ... using Boost
    std::locale loc(std::locale(), new seqan::boost_utf8::utf8_codecvt_facet);
    // ... using an operating system locale
    //std::locale loc("en_US.UTF-8");
    // ... using C++11
    //std::locale loc(std::locale(), new std::codecvt_utf8<wchar_t>);
    //std::locale loc(std::locale(), new std::codecvt_byname(".utf8"));
    //std::locale::global(loc);

    inStream.imbue(loc);
    //std::wcout.imbue(loc);
    
    // TODO(krugel) Don't use getline() but rather read() ?
    std::wstring line;
    while(std::getline(inStream, line)) {
        append(txt, line);
        append(txt, L"\n");
    }
    return true;
}

// ----------------------------------------------------------------------------
// readTextFile(Fasta, EncodingDirect)
// ----------------------------------------------------------------------------

template <typename TFilenameString, typename TText>
inline bool
readTextFile(TFilenameString const & filename, TText & txt, Fasta, EncodingDirect) {
    // Open memory mapped string.
    typedef String<char, MMap<> >                           TFileString;
    
    TFileString mmapString;
    if (!open(mmapString, filename, OPEN_RDONLY)) {
        return false;  // Could not open file.
    }

    // Create RecordReader.
    RecordReader<TFileString, DoublePass<StringReader> > reader(mmapString);
    // Read file in one pass.
    StringSet<CharString, Owner<ConcatDirect<> > > ids;
    StringSet<TText, Owner<ConcatDirect<> > > seqs;
    if (read2(ids, seqs, reader, Fasta()) != 0u) {
        return false;  // Could not read file.
    }
    txt = concat(seqs);

    return true;
}


// ----------------------------------------------------------------------------
// readTextFile(Fasta, EncodingUtf8)
// ----------------------------------------------------------------------------

template <typename TFilenameString, typename TText>
inline bool
readTextFile(TFilenameString const & filename, TText & txt, Fasta, EncodingUtf8) {
    ignoreUnusedVariableWarning(filename);
    ignoreUnusedVariableWarning(txt);
    std::cerr << "Reading FASTA with UTF-8 is not implemented yet." << std::endl;
    return false;
}

}  // namespace seqan

#endif  // SANDBOX_TUM_INCLUDE_SEQAN_FIND_INDEX_APPROX_FILE_READ_TEXT_H_
