/* --------------------------------------------------------------------------
 *
 * Copyright (C) 2007 Leif Erik Larsen, Kjerringvik, Norway.
 *
 * This file is part of the Open Source Edition of Larsen Commander, as
 * available from http://home.online.no/~leifel/lcmd/.  This code is free 
 * software; you can redistribute it and/or modify it under the terms of 
 * the GNU General Public License version 3 only, as published by the 
 * Free Software Foundation.  
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 3 at http://www.gnu.org/licenses/gpl-3.0.txt for more details 
 * (a copy is included in the LICENSE file that accompanied this code).
 *
 * ------------------------------------------------------------------------ */

#include "glib/util/GLog.h"
#include "glib/util/GTokenizer.h"
#include "glib/exceptions/GSyntaxErrorException.h"
#include "glib/primitives/GInteger.h"
#include "glib/io/GStringStream.h"
#include "glib/io/GInputStream.h"

const GString GTokenizer::DefaultSpecialTokens = ",;:/-+*=?!()[]{}<>%&|\\";

const GString GTokenizer::Token_eq("=");

GTokenizer::GTokenizer ( GInputStream& stream, bool whiteSpace )
           :nextByte(0),
            tokenizeWhiteSpace(whiteSpace),
            inputstream(&stream),
            autoDeleteStream(false),
            specialTokens(GTokenizer::DefaultSpecialTokens),
            currentLineNum(0),
            currentColumnPos(0),
            specialSymbols(16),
            ignoreForcingCharacter(false),
            forcingCharacter('\\')
{
}

GTokenizer::GTokenizer ( const GString& str, bool whiteSpace )
           :nextByte(0),
            tokenizeWhiteSpace(whiteSpace),
            inputstream(new GStringStream(str)),
            autoDeleteStream(true),
            specialTokens(GTokenizer::DefaultSpecialTokens),
            currentLineNum(0),
            currentColumnPos(0),
            specialSymbols(16),
            ignoreForcingCharacter(false),
            forcingCharacter('\\')
{
}

GTokenizer::GTokenizer ( const GString& str, 
                         const GString& specialTokens, 
                         bool whiteSpace )
           :nextByte(0),
            tokenizeWhiteSpace(whiteSpace),
            inputstream(new GStringStream(str)),
            autoDeleteStream(true),
            specialTokens(specialTokens),
            currentLineNum(0),
            currentColumnPos(0),
            specialSymbols(16),
            ignoreForcingCharacter(false),
            forcingCharacter('\\')
{
}

GTokenizer::GTokenizer ( const GString& str, 
                         const GString& deliminators, 
                         const GString& specialTokens, 
                         bool ignoreForcingCharacter )
           :nextByte(0),
            tokenizeWhiteSpace(false),
            inputstream(null),
            autoDeleteStream(true),
            deliminators(deliminators),
            specialTokens(specialTokens),
            currentLineNum(0),
            currentColumnPos(0),
            specialSymbols(16),
            ignoreForcingCharacter(ignoreForcingCharacter),
            forcingCharacter('\\')
{
   inputstream = new GStringStream(str);
}

GTokenizer::~GTokenizer ()
{
   if (autoDeleteStream)
      delete inputstream;
}

bool GTokenizer::isWhiteSpace ( int chr ) const 
{ 
   if (deliminators.length() == 0)
      return chr <= 32;
   else
      return chr <= -1 || deliminators.indexOf(char(chr)) >= 0; 
}

void GTokenizer::setDeliminators ( const GString& delm ) 
{ 
   deliminators = delm; 
}

void GTokenizer::setSpecialTokens ( const GString& stok ) 
{ 
   specialTokens = stok; 
}

void GTokenizer::setSpecialSymbols ( const GArray<GString>& ssym )
{
   specialSymbols.removeAll();
   const int num = ssym.getCount();
   for (int i=0; i<num; i++)
   {
      GString* s = new GString(ssym[i]);
      specialSymbols.add(s);
   }
}

bool GTokenizer::isTokenizeWhiteSpace () const 
{ 
   return tokenizeWhiteSpace; 
}

int GTokenizer::getNextByte ()
{
   if (nextByte != 0)
   {
      int ret = nextByte;
      nextByte = 0;
      return ret;
   }
   else
   {
      // Get the next byte. This will possibly throw GIOException, but
      // not upon end-of-stream.
      int ret = inputstream->readByte();
      if (ret <= -1) // If end of stream.
         return -1;
      if (ret == '\n')
      {
         currentLineNum++;
         currentColumnPos = 0;
      }
      else
      {
         currentColumnPos++;
      }
      return ret;
   }
}

const GToken* GTokenizer::getNextToken ()
{
   try {
      return getNextToken(!tokenizeWhiteSpace);
   } catch (GIOException& e) {
      // There was some error reading the tokenizer input stream.
      // The best way to report this to the caller from here is to let the
      // method <i>isEmpty()</i> of the returned token return true.
      GLog::Log(this, e.getStackTrace(e.toString()));
      currentToken.clear();
      return &currentToken;
   }
}

const GToken* GTokenizer::getNextToken ( bool ignoreWhiteSpace )
{
   int initialQuoteChar = 0; // The char that initiated the quoted string
   bool isWithinQuotes = false;

   currentToken.clear();

   for (;;)
   {
      if (isWithinQuotes) // If we are currently within a quoted string
      {
         currentToken.quoteChar = char(initialQuoteChar);
         currentToken.quoted = true;

         for (;;) // Find the terminating quote
         {
            int chr = getNextByte();

            // If we have reached the forcing character we must skip and
            // get the next character which is the one to force.
            if (chr == forcingCharacter && !ignoreForcingCharacter)
            {
               chr = getNextByte();
               switch (chr)
               {
                  case 'b':
                     chr = '\b';
                     break;

                  case 'n':
                     chr = '\n';
                     break;

                  case 'r':
                     chr = '\r';
                     break;

                  case 't':
                     chr = '\t';
                     break;

                  case '\\':
                     chr = '\\';
                     break;

                  case '\"':
                     chr = '\"';
                     break;

                  default:
                  {
                     GString octalStr(3);
                     for (;;)
                     {
                        if (chr >= '0' && chr <= '8')
                           octalStr += char(chr);
                        else
                           break;
                        if (octalStr.length() >= 3)
                           break;
                        chr = getNextByte();
                     }
                     if (octalStr.length() > 0)
                        chr = GInteger::ParseInt(octalStr, 8);
                     break;
                  }
               }
            }
            else
            if (chr <= -1 || // If EOF
                chr == initialQuoteChar)
            {
               return &currentToken;
            }

            currentToken.append(char(chr));
         }
      }

      else
      {
         int nextByte = getNextByte();
         if (isWhiteSpace(nextByte))
         {
            if (ignoreWhiteSpace)
            {
               // Ignore all whitespace.
               do {
                  if (currentToken.getLength() > 0)
                     return &currentToken; // We have reached the end of the token.
                  if (nextByte <= -1) // If EOF
                     return &currentToken; // Token is cleared, so isEmpty() will return true.
                  nextByte = getNextByte();
               } while (isWhiteSpace(nextByte));
            }
            else
            {
               // Whitespace is to be returned as tokens just as
               // normal tokens.
               if (currentToken.getLength() <= 0)
               {
                  do
                  {
                     if (nextByte <= -1) // If EOF
                     {
                        if (currentToken.getLength() > 0)
                           break;
                        else
                           return &currentToken; // Token is cleared, so isEmpty() will return true.
                     }
                     currentToken.append((char) nextByte);
                     nextByte = getNextByte();
                  } while (isWhiteSpace(nextByte));
               }
               this->nextByte = nextByte; // Push the character back to the byte stream
               return &currentToken; // We have reached the end of the token
            }
         }

         // If current character is the very first character of the
         // next token and that character is one of the characters
         // that are defined as "special single character tokens" then
         // break the loop as is about to fetch next token since this
         // single character is a token by it self.

         if (specialTokens.indexOf(char(nextByte)) >= 0)
         {
            if (currentToken.getLength() == 0)
            {
               currentToken.append((char) nextByte);
               if (specialSymbols.getCount() > 0)
               {
                  nextByte = getNextByte();
                  if (nextByte >= 0) // If not EOF
                  {
                     GString tok = currentToken.toString() + char(nextByte);
                     const int num = specialSymbols.getCount();
                     for (int i=0; i<num; i++)
                     {
                        if (specialSymbols[i] == tok)
                        {
                           currentToken.append(char(nextByte));
                           nextByte = 0;
                           break;
                        }
                     }
                  }
                  this->nextByte = nextByte; // Push the character back to the byte stream
               }
            }
            else
               this->nextByte = nextByte; // Push the character back to the byte stream
            return &currentToken;
         }

         else
         if (nextByte == '\"' || nextByte == '\'') // If start of new quoted string
         {
            if (currentToken.getLength() > 0)
            {
               this->nextByte = nextByte; // Push the character back to the byte stream
               return &currentToken;
            }
            else
            {
               initialQuoteChar = nextByte;
               isWithinQuotes = true;
               continue;
            }
         }

         else
         {
            currentToken.append(char(nextByte));
         }
      }
   }
}

const GAbstractToken* GTokenizer::getNextAbstractToken () 
{ 
   return getNextToken(); 
}

int GTokenizer::getCurModuleColumn () const 
{ 
   return currentColumnPos; 
}

int GTokenizer::getCurModuleLineNr () const 
{ 
   return currentLineNum; 
}

GString GTokenizer::queryArgValueString ()
{
   const GToken* token = getNextToken(true);
   if (token->isEmpty())
      gthrow_(GSyntaxErrorException("Unexpected end of stream."));
   if (*token != Token_eq)
      gthrow_(GSyntaxErrorException("Expected '=' but found '" + token->toString() + "'"));
   token = getNextToken(true);
   if (token->isEmpty())
      gthrow_(GSyntaxErrorException("Unexpected end of stream."));
   return token->toString();
}

