/*
 * Copyright (c) 2001
 * Pavel "EvilOne" Minayev
 *
 * Permission to use, copy, modify, distribute and sell this software
 * and its documentation for any purpose is hereby granted without fee,
 * provided that the above copyright notice appear in all copies and
 * that both that copyright notice and this permission notice appear
 * in supporting documentation.  Author makes no representations about
 * the suitability of this software for any purpose. It is provided
 * "as is" without express or implied warranty.
 */

import std.c.stdio;

import std.conv;
import std.string;
import std.stream;    //   don't forget to link with stream.obj!


// colors for syntax highlighting, default values are

// my preferences in Microsoft Visual Studio editor

class Colors
{
    static string keyword = "0000FF";
    static string number  = "008000";
    static string astring = "000080";
    static string comment = "808080";
}

const int tabsize = 4;  // number of spaces in tab

const char[24] symbols = "()[]{}.,;:=<>+-*/%&|^!~?";
string[] keywords;

// true if c is whitespace, false otherwise

byte isspace(char c)
{
    return indexOf(whitespace, c) >= 0;
}

// true if c is a letter or an underscore, false otherwise

byte isalpha(char c)
{
    // underscore doesn't differ from letters in D anyhow...

    return c == '_' || indexOf(letters, c) >= 0;
}

// true if c is a decimal digit, false otherwise

byte isdigit(char c)
{
    return indexOf(digits, c) >= 0;
}

// true if c is a hexadecimal digit, false otherwise

byte ishexdigit(char c)
{
    return indexOf(hexdigits, c) >= 0;
}

// true if c is an octal digit, false otherwise

byte isoctdigit(char c)
{
    return indexOf(octdigits, c) >= 0;
}

// true if c is legal D symbol other than above, false otherwise

byte issymbol(char c)
{
    return indexOf(symbols, c) >= 0;
}

// true if token is a D keyword, false otherwise

byte iskeyword(string token)
{
    foreach (index, key; keywords)
    {
        if (!cmp(keywords[index], token))
            return true;
    }

    return false;
}

int main(string[] args)
{
    // need help?

    if (args.length < 2 || args.length > 3)
    {
        printf("D to HTML converter\n"
               "Usage: D2HTML <program>.d [<file>.htm]\n");
        return 0;
    }

    // auto-name output file

    if (args.length == 2)
        args ~= args[1] ~ ".htm";

    // load keywords

    File kwd = new File("d2html.kwd");

    while (!kwd.eof())
        keywords ~= to!string(kwd.readLine());

    kwd.close();

    // open input and output files

    File src = new File(args[1]), dst = new File;
    dst.create(args[2]);

    // write HTML header

    dst.writeLine("<html><head><title>" ~ args[1] ~ "</title></head>");
    dst.writeLine("<body color='#000000' bgcolor='#FFFFFF'><pre><code>");

    // the main part is wrapped into try..catch block because

    // when end of file is reached, an exception is raised;

    // so we can omit any checks for EOF inside this block...

    try
    {
        ulong linestart = 0;             // for tabs

        char c;
        src.read(c);

        while (true)
        {
            if (isspace(c))                     // whitespace

            {
                do
                {
                    if (c == 9)
                    {
                        // expand tabs to spaces

                        auto spaces = tabsize -
                                     (src.position() - linestart) % tabsize;

                        for (int i = 0; i < spaces; i++)
                            dst.writeString(" ");

                        linestart = src.position() - tabsize + 1;
                    }
                    else
                    {
                        // reset line start on newline

                        if (c == 10 || c == 13)
                            linestart = src.position() + 1;

                        dst.write(c);
                    }

                    src.read(c);
                } while (isspace(c));
            }
            else if (isalpha(c))                // keyword or identifier

            {
                string token;

                do
                {
                    token ~= c;
                    src.read(c);
                } while (isalpha(c) || isdigit(c));

                if (iskeyword(token))                   // keyword

                    dst.writeString("<font color='#" ~ Colors.keyword ~
                                    "'>" ~ token ~ "</font>");
                else                    // simple identifier

                    dst.writeString(token);
            }
            else if (c == '0')                  // binary, octal or hexadecimal number

            {
                dst.writeString("<font color='#" ~ Colors.number ~ "008000'>");
                dst.write(c);
                src.read(c);

                if (c == 'X' || c == 'x')                       // hexadecimal

                {
                    dst.write(c);
                    src.read(c);

                    while (ishexdigit(c))
                        dst.write(c);

                    // TODO: add support for hexadecimal floats

                }
                else if (c == 'B' || c == 'b')                  // binary

                {
                    dst.write(c);
                    src.read(c);

                    while (c == '0' || c == '1')
                        dst.write(c);
                }
                else                    // octal

                {
                    do
                    {
                        dst.write(c);
                        src.read(c);
                    } while (isoctdigit(c));
                }

                dst.writeString("</font>");
            }
            else if (c == '#')                // hash

            {
                dst.write(c);
                src.read(c);
            }
            else if (c == '\\')                // backward slash

            {
                dst.write(c);
                src.read(c);
            }
            else if (isdigit(c))                // decimal number

            {
                dst.writeString("<font color='#" ~ Colors.number ~ "'>");

                // integral part

                do
                {
                    dst.write(c);
                    src.read(c);
                } while (isdigit(c));

                // fractional part

                if (c == '.')
                {
                    dst.write(c);
                    src.read(c);

                    while (isdigit(c))
                    {
                        dst.write(c);
                        src.read(c);
                    }
                }

                // scientific notation

                if (c == 'E' || c == 'e')
                {
                    dst.write(c);
                    src.read(c);

                    if (c == '+' || c == '-')
                    {
                        dst.write(c);
                        src.read(c);
                    }

                    while (isdigit(c))
                    {
                        dst.write(c);
                        src.read(c);
                    }
                }

                // suffices

                while (c == 'U' || c == 'u' || c == 'L' ||
                       c == 'l' || c == 'F' || c == 'f')
                {
                    dst.write(c);
                    src.read(c);
                }

                dst.writeString("</font>");
            }
            else if (c == '\'')                 // string without escape sequences
            {
                dst.writeString("<font color='#" ~ Colors.astring ~ "'>");

                do
                {
                    if (c == '<')                       // special symbol in HTML
                        dst.writeString("<");
                    else
                        dst.write(c);

                    src.read(c);
                } while (c != '\'');
                dst.write(c);
                src.read(c);
                dst.writeString("</font>");
            }
            else if (c == 34)                   // string with escape sequences

            {
                dst.writeString("<font color='#" ~ Colors.astring ~ "'>");
                char prev;                      // used to handle \" properly


                do
                {
                    if (c == '<')                       // special symbol in HTML

                        dst.writeString("<");
                    else
                        dst.write(c);

                    prev = c;
                    src.read(c);
                } while (!(c == 34 && prev != '\\'));                   // handle \"

                dst.write(c);
                src.read(c);
                dst.writeString("</font>");
            }
            else if (issymbol(c))               // either operator or comment

            {
                if (c == '<')                   // special symbol in HTML

                {
                    dst.writeString("<");
                    src.read(c);
                }
                else if (c == '/')                      // could be a comment...

                {
                    src.read(c);

                    if (c == '/')                       // single-line one

                    {
                        dst.writeString("<font color='#" ~ Colors.comment ~ "'>/");

                        while (c != 10)
                        {
                            if (c == '<')                               // special symbol in HTML

                                dst.writeString("<");
                            else if (c == 9)
                            {
                                // expand tabs

                                auto spaces2 = tabsize -
                                              (src.position() - linestart) % tabsize;

                                for (int i2 = 0; i2 < spaces2; i2++)
                                    dst.writeString(" ");

                                linestart = src.position() - tabsize + 1;
                            }
                            else
                                dst.write(c);

                            src.read(c);
                        }

                        dst.writeString("</font>");
                    }
                    else if (c == '*')                          // multi-line one

                    {
                        dst.writeString("<font color='#" ~ Colors.comment ~ "'>/");
                        char prev2;

                        do
                        {
                            if (c == '<')                               // special symbol in HTML

                                dst.writeString("<");
                            else if (c == 9)
                            {
                                // expand tabs

                                auto spaces3 = tabsize -
                                              (src.position() - linestart) % tabsize;

                                for (int i3 = 0; i3 < spaces3; i3++)
                                    dst.writeString(" ");

                                linestart = src.position() - tabsize + 1;
                            }
                            else
                            {
                                // reset line start on newline

                                if (c == 10 || c == 13)
                                    linestart = src.position() + 1;

                                dst.write(c);
                            }

                            prev2 = c;
                            src.read(c);
                        } while (!(c == '/' && prev2 == '*'));
                        dst.write(c);
                        dst.writeString("</font>");
                        src.read(c);
                    }
                    else                        // just an operator

                        dst.write(cast(char) '/');
                }
                else                    // just an operator

                {
                    dst.write(c);
                    src.read(c);
                }
            }
            else
                // whatever it is, it's not a valid D token

                throw new Error("unrecognized token");
                //~ break;

        }
    }

    // if end of file is reached and we try to read something

    // with typed read(), a ReadError is thrown; in our case,

    // this means that job is successfully done

    catch (Exception e)
    {
        // write HTML footer

        dst.writeLine("</code></pre></body></html>");
    }
    return 0;
}