// Copyright(c) 2016 YamaArashi
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

#include <cstdio>
#include <cstdint>
#include <cstdarg>
#include "preproc.h"
#include "charmap.h"
#include "char_util.h"
#include "utf8.h"

enum LhsType
{
    Char,
    Escape,
    Constant,
    None
};

struct Lhs
{
    LhsType type;
    std::string name;
    std::int32_t code;
};

class CharmapReader
{
public:
    CharmapReader(std::string filename);
    CharmapReader(const CharmapReader&) = delete;
    ~CharmapReader();
    Lhs ReadLhs();
    void ExpectEqualsSign();
    std::string ReadSequence();
    void ExpectEmptyRestOfLine();
    void RaiseError(const char* format, ...);

private:
    char* m_buffer;
    long m_pos;
    long m_size;
    long m_lineNum;
    std::string m_filename;

    void RemoveComments();
    std::string ReadConstant();
    void SkipWhitespace();
};

CharmapReader::CharmapReader(std::string filename) : m_filename(filename)
{
    FILE *fp = std::fopen(filename.c_str(), "rb");

    if (fp == NULL)
        FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str());

    std::fseek(fp, 0, SEEK_END);

    m_size = std::ftell(fp);

    if (m_size < 0)
        FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str());

    m_buffer = new char[m_size + 1];

    std::rewind(fp);

    if (std::fread(m_buffer, m_size, 1, fp) != 1)
        FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str());

    m_buffer[m_size] = 0;

    std::fclose(fp);

    m_pos = 0;
    m_lineNum = 1;

    RemoveComments();
}

CharmapReader::~CharmapReader()
{
    delete[] m_buffer;
}

Lhs CharmapReader::ReadLhs()
{
    Lhs lhs;

    for (;;)
    {
        SkipWhitespace();

        if (m_buffer[m_pos] == '\n')
        {
            m_pos++;
            m_lineNum++;
        }
        else
        {
            break;
        }
    }
    
    if (m_buffer[m_pos] == '\'')
    {
        m_pos++;

        bool isEscape = (m_buffer[m_pos] == '\\');

        if (isEscape)
        {
            m_pos++;
        }

        unsigned char c = m_buffer[m_pos];

        if (c == 0)
        {
            if (m_pos >= m_size)
                RaiseError("unexpected EOF in UTF-8 character literal");
            else
                RaiseError("unexpected null character in UTF-8 character literal");
        }

        if (IsAscii(c) && !IsAsciiPrintable(c))
            RaiseError("unexpected character U+%X in UTF-8 character literal", c);

        UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]);
        std::int32_t code = unicodeChar.code;

        if (code == -1)
            RaiseError("invalid encoding in UTF-8 character literal");

        m_pos += unicodeChar.encodingLength;

        if (m_buffer[m_pos] != '\'')
            RaiseError("unterminated character literal");

        m_pos++;

        lhs.code = code;

        if (isEscape)
        {
            if (code >= 128)
                RaiseError("escapes using non-ASCII characters are invalid");

            switch (code)
            {
            case '\'':
                lhs.type = LhsType::Char;
                break;
            case '\\':
                lhs.type = LhsType::Char;
            case '"':
                RaiseError("cannot escape double quote");
                break;
            default:
                lhs.type = LhsType::Escape;
            }
        }
        else
        {
            if (code == '\'')
                RaiseError("empty character literal");

            lhs.type = LhsType::Char;
        }
    }
    else if (IsIdentifierStartingChar(m_buffer[m_pos]))
    {
        lhs.type = LhsType::Constant;
        lhs.name = ReadConstant();
    }
    else if (m_buffer[m_pos] == '\r')
    {
        RaiseError("only Unix-style LF newlines are supported");
    }
    else if (m_buffer[m_pos] == 0)
    {
        if (m_pos < m_size)
            RaiseError("unexpected null character");
        lhs.type = LhsType::None;
    }
    else
    {
        RaiseError("junk at start of line");
    }

    return lhs;
}

void CharmapReader::ExpectEqualsSign()
{
    SkipWhitespace();

    if (m_buffer[m_pos] != '=')
        RaiseError("expected equals sign");

    m_pos++;
}

static unsigned int ConvertHexDigit(char c)
{
    unsigned int digit = 0;

    if (c >= '0' && c <= '9')
        digit = c - '0';
    else if (c >= 'A' && c <= 'F')
        digit = 10 + c - 'A';
    else if (c >= 'a' && c <= 'f')
        digit = 10 + c - 'a';

    return digit;
}

std::string CharmapReader::ReadSequence()
{
    SkipWhitespace();

    long startPos = m_pos;

    unsigned int length = 0;

    while (IsAsciiHexDigit(m_buffer[m_pos]) && IsAsciiHexDigit(m_buffer[m_pos + 1]))
    {
        m_pos += 2;
        length++;

        if (length > kMaxCharmapSequenceLength)
            RaiseError("byte sequence too long (max is %lu bytes)", kMaxCharmapSequenceLength);

        SkipWhitespace();
    }

    if (IsAsciiHexDigit(m_buffer[m_pos]))
        RaiseError("each byte must have 2 hex digits");

    if (length == 0)
        RaiseError("expected byte sequence");

    std::string sequence;
    sequence.reserve(length);

    m_pos = startPos;

    for (unsigned int i = 0; i < length; i++)
    {
        unsigned int digit1 = ConvertHexDigit(m_buffer[m_pos]);
        unsigned int digit2 = ConvertHexDigit(m_buffer[m_pos + 1]);
        unsigned char byte = digit1 * 16 + digit2;
        sequence += byte;

        m_pos += 2;
        SkipWhitespace();
    }

    return sequence;
}

void CharmapReader::ExpectEmptyRestOfLine()
{
    SkipWhitespace();

    if (m_buffer[m_pos] == 0)
    {
        if (m_pos < m_size)
            RaiseError("unexpected null character");
    }
    else if (m_buffer[m_pos] == '\n')
    {
        m_pos++;
        m_lineNum++;
    }
    else if (m_buffer[m_pos] == '\r')
    {
        RaiseError("only Unix-style LF newlines are supported");
    }
    else
    {
        RaiseError("junk at end of line");
    }
}

void CharmapReader::RaiseError(const char* format, ...)
{
    const int bufferSize = 1024;
    char buffer[bufferSize];

    std::va_list args;
    va_start(args, format);
    std::vsnprintf(buffer, bufferSize, format, args);
    va_end(args);

    std::fprintf(stderr, "%s:%ld: error: %s\n", m_filename.c_str(), m_lineNum, buffer);

    std::exit(1);
}

void CharmapReader::RemoveComments()
{
    long pos = 0;
    bool inString = false;

    for (;;)
    {
        if (m_buffer[pos] == 0)
            return;

        if (inString)
        {
            if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == '\'')
            {
                pos += 2;
            }
            else
            {
                if (m_buffer[pos] == '\'')
                    inString = false;
                pos++;
            }
        }
        else if (m_buffer[pos] == '@')
        {
            while (m_buffer[pos] != '\n' && m_buffer[pos] != 0)
                m_buffer[pos++] = ' ';
        }
        else
        {
            if (m_buffer[pos] == '\'')
                inString = true;
            pos++;
        }
    }
}

std::string CharmapReader::ReadConstant()
{
    long startPos = m_pos;

    while (IsIdentifierChar(m_buffer[m_pos]))
        m_pos++;

    return std::string(&m_buffer[startPos], m_pos - startPos);
}

void CharmapReader::SkipWhitespace()
{
    while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ')
        m_pos++;
}

Charmap::Charmap(std::string filename)
{
    CharmapReader reader(filename);

    for (;;)
    {
        Lhs lhs = reader.ReadLhs();

        if (lhs.type == LhsType::None)
            return;

        reader.ExpectEqualsSign();

        std::string sequence = reader.ReadSequence();

        switch (lhs.type)
        {
        case LhsType::Char:
            if (m_chars.find(lhs.code) != m_chars.end())
                reader.RaiseError("redefining char");
            m_chars[lhs.code] = sequence;
            break;
        case LhsType::Escape:
            if (m_escapes[lhs.code].length() != 0)
                reader.RaiseError("redefining escape");
            m_escapes[lhs.code] = sequence;
            break;
        case LhsType::Constant:
            if (m_constants.find(lhs.name) != m_constants.end())
                reader.RaiseError("redefining constant");
            m_constants[lhs.name] = sequence;
            break;
        }

        reader.ExpectEmptyRestOfLine();
    }
}