#drinc:util.g
#toy.g
#externs.g

/*
 * lex.d: lexical scanner - break input stream into tokens.
 */

uint
    DEFAULT_STRING_BUFFER_SIZE = 256,
    SOURCE_BUFFER_SIZE = 512;

/* a buffer to hold an input string constant or identifier */

*char StringBuffer;
ulong StringBufferSize, StringBufferPos;

/* a buffer to hold input text from the source file */

[SOURCE_BUFFER_SIZE]char SourceBuffer;
uint SourcePos, SourceMax;

/* our current state during the lexical scan */

uint CurrentLine, PreviousLine, CurrentColumn, PreviousColumn;
char CurrentChar, NextChar;
enum {Eof_none, Eof_one, Eof_two} EofState;
*SymbolEntry_t CurrentSymbol;
*char CurrentString;
ulong CurrentNumber;
TokenKind_t CurrentToken;


/*
 * errorHere - signal an error occurring at the current input position.
 */

proc errorHere(*char message)void:

    errorPosition(PreviousLine, PreviousColumn);
    printString(message);
    printString("\n");
corp;

/*
 * errorHereThree - a three-part error message.
 */

proc errorHereThree(*char m1, m2, m3)void:

    errorPosition(PreviousLine, PreviousColumn);
    printString(m1);
    printString(m2);
    printString(m3);
    printString("\n");
corp;

/*
 * nextChar - get the next input character.
 */

proc nextChar()void:

    CurrentChar := NextChar;
    PreviousLine := CurrentLine;
    PreviousColumn := CurrentColumn;
    if CurrentChar = '\n' then
	CurrentLine := CurrentLine + 1;
	CurrentColumn := 0;
    elif CurrentChar = '\t' then
	CurrentColumn := CurrentColumn / 8 * 8 + 8;
    else
	CurrentColumn := CurrentColumn + 1;
    fi;
    if EofState = Eof_one then
	EofState := Eof_two;
	NextChar := ' ';
    else
	if SourcePos = SourceMax then
	    SourceMax := readSource(&SourceBuffer[0], SOURCE_BUFFER_SIZE);
	    if SourceMax = 0 then
		SourcePos := 0;
		EofState := Eof_one;
		NextChar := ' ';
	    else
		NextChar := SourceBuffer[0];
		SourcePos := 1;
	    fi;
	else
	    NextChar := SourceBuffer[SourcePos];
	    SourcePos := SourcePos + 1;
	fi;
    fi;
corp;

/*
 * addChar - add a character to the current string buffer.
 */

proc addChar(char ch)void:
    *char newBuffer;
    ulong newSize;

    if StringBufferPos = StringBufferSize then
	newSize := StringBufferSize * 2;
	newBuffer := memAlloc(newSize * sizeof(char));
	BlockCopy(newBuffer, StringBuffer, StringBufferSize * sizeof(char));
	memFree(StringBuffer, StringBufferSize * sizeof(char));
	StringBufferSize := newSize;
	StringBuffer := newBuffer;
    fi;
    (StringBuffer + StringBufferPos * sizeof(char))* := ch;
    StringBufferPos := StringBufferPos + 1;
corp;

/*
 * whiteSpace - skip past spaces, tabs, newlines, comments.
 */

proc whiteSpace()void:
    register uint level;

    level := 0;
    while EofState ~= Eof_two and
	(CurrentChar = ' ' or CurrentChar = '\t' or CurrentChar = '\n' or
	 CurrentChar = '/' and NextChar = '*' or
	 level ~= 0)
    do
	if CurrentChar = '/' and NextChar = '*' then
	    level := level + 1;
	    nextChar();
	elif CurrentChar = '*' and NextChar = '/' then
	    level := level - 1;
	    nextChar();
	fi;
	nextChar();
    od;
corp;

/*
 * nextToken - get the next input token.
 */

proc nextToken()void:
    register TokenKind_t tk;
    register int n;

    whiteSpace();

    if EofState = Eof_two then
	tk := tk_eof;
    else
	tk := tk_illegal;
	case CurrentChar
	incase 'a'..'z':
	incase 'A'..'Z':
	incase '_':
	    StringBufferPos := 0;
	    while (CurrentChar >= 'a' and CurrentChar <= 'z' or
		CurrentChar >= 'A' and CurrentChar <= 'Z' or
		CurrentChar >= '0' and CurrentChar <= '9' or
		CurrentChar = '_') and EofState ~= Eof_two
	    do
		addChar(CurrentChar);
		nextChar();
	    od;
	    addChar('\e');
	    CurrentSymbol := enter(StringBuffer, StringBufferPos);
	    if CurrentSymbol*.se_kind = sk_reservedWord then
		tk := CurrentSymbol*.se_value + tk_eof;
	    else
		tk := tk_id;
	    fi;
	incase '0'..'9':
	    tk := tk_number;
	    CurrentNumber := 0;
	    while CurrentChar >= '0' and CurrentChar <= '9' and
		EofState ~= Eof_two
	    do
		CurrentNumber := CurrentNumber * 10 + (CurrentChar - '0');
		nextChar();
	    od;
	incase '\"':
	    tk := tk_string;
	    StringBufferPos := 0;
	    while
		nextChar();
		while CurrentChar ~= '\"' and EofState ~= Eof_two do
		    if CurrentChar = '\\' then
			nextChar();
			if CurrentChar = 'n' then
			    CurrentChar := '\n';
			elif CurrentChar = 't' then
			    CurrentChar := '\t';
			elif CurrentChar = '(' then
			    nextChar();
			    if CurrentChar >= '0' and CurrentChar <= '9' then
				n := 0;
				while CurrentChar >= '0' and CurrentChar <= '9'
				do
				    n := n * 10 + (CurrentChar - '0');
				    nextChar();
				od;
				if CurrentChar = ')' then
				    CurrentChar := n + '\e';
				else
				    errorHere(
					"missing ')' in escaped character");
				fi;
			    else
				errorHere(
				    "expecting digits for escaped character");
			    fi;
			fi;
		    fi;
		    addChar(CurrentChar);
		    nextChar();
		od;
		nextChar();
		whiteSpace();
		EofState ~= Eof_two and CurrentChar = '\"'
	    do
	    od;
	    addChar('\e');
	    CurrentString := memAlloc(StringBufferPos * sizeof(char));
	    BlockCopy(CurrentString, StringBuffer,
		      StringBufferPos * sizeof(char));
	    CurrentNumber := StringBufferPos;
	incase '=':
	    tk := tk_equal;
	    nextChar();
	incase '~':
	    if NextChar = '=' then
		tk := tk_notEqual;
		nextChar();
		nextChar();
	    else
		nextChar();
	    fi;
	incase '<':
	    if NextChar = '=' then
		tk := tk_lessEqual;
		nextChar();
	    else
		tk := tk_less;
	    fi;
	    nextChar();
	incase '>':
	    if NextChar = '=' then
		tk := tk_greaterEqual;
		nextChar();
	    else
		tk := tk_greater;
	    fi;
	    nextChar();
	incase ':':
	    if NextChar = '=' then
		tk := tk_assign;
		nextChar();
		nextChar();
	    else
		tk := tk_colon;
		nextChar();
	    fi;
	incase '+':
	    tk := tk_plus;
	    nextChar();
	incase '-':
	    tk := tk_minus;
	    nextChar();
	incase '*':
	    tk := tk_star;
	    nextChar();
	incase '/':
	    tk := tk_slash;
	    nextChar();
	incase '(':
	    tk := tk_leftParen;
	    nextChar();
	incase ')':
	    tk := tk_rightParen;
	    nextChar();
	incase ',':
	    tk := tk_comma;
	    nextChar();
	incase ';':
	    tk := tk_semicolon;
	    nextChar();
	default:
	    nextChar();
	esac;
    fi;
    CurrentToken := tk;
corp;

/*
 * getToken - return the current token.
 */

proc getToken(**SymbolEntry_t pSymbol; **char pBuffer;
	      *ulong pNumber)TokenKind_t:

    if pSymbol ~= nil then
	pSymbol* := CurrentSymbol;
    fi;
    if pBuffer ~= nil then
	pBuffer* := CurrentString;
    fi;
    if pNumber ~= nil then
	pNumber* := CurrentNumber;
    fi;
    CurrentToken
corp;

/*
 * getSimpleToken - get a token, ignoring anything except the token kind.
 */

proc getSimpleToken()TokenKind_t:

    CurrentToken
corp;

/*
 * skipToken - totally skip over the current token.
 */

proc skipToken()void:

    if CurrentToken = tk_string then
	memFree(CurrentString, CurrentNumber);
    fi;
    nextToken();
corp;

/*
 * lexInit - initialize for lexical scanning.
 */

proc lexInit()void:
    uint
	MAX_RESERVED_WORD_LENGTH = 6,
	RESERVED_WORD_COUNT = 17;
    type reservedWord_t = struct {
	[MAX_RESERVED_WORD_LENGTH + 1]char rw_name;
	ulong rw_length;
	TokenKind_t rw_token;
    };
    [RESERVED_WORD_COUNT] reservedWord_t RESERVED_WORDS = (
	("proc"  , 4 + 1, tk_proc  ),
	("corp"  , 4 + 1, tk_corp  ),
	("void"  , 4 + 1, tk_void  ),
	("int"	 , 3 + 1, tk_int   ),
	("bool"  , 4 + 1, tk_bool  ),
	("true"  , 4 + 1, tk_true  ),
	("false" , 5 + 1, tk_false ),
	("if"	 , 2 + 1, tk_if    ),
	("then"  , 4 + 1, tk_then  ),
	("elif"  , 4 + 1, tk_elif  ),
	("else"  , 4 + 1, tk_else  ),
	("fi"	 , 2 + 1, tk_fi    ),
	("while" , 5 + 1, tk_while ),
	("do"	 , 2 + 1, tk_do    ),
	("od"	 , 2 + 1, tk_od    ),
	("readln", 6 + 1, tk_readln),
	("write" , 5 + 1, tk_write )
    );
    register *reservedWord_t rw;
    register *SymbolEntry_t se;
    register uint i;

    rw := &RESERVED_WORDS[0];
    for i from 0 upto RESERVED_WORD_COUNT - 1 do
	se := enter(&rw*.rw_name[0], rw*.rw_length);
	se*.se_kind := sk_reservedWord;
	se*.se_value := rw*.rw_token - tk_eof;
	rw := rw + sizeof(reservedWord_t);
    od;
    StringBuffer := memAlloc(DEFAULT_STRING_BUFFER_SIZE);
    StringBufferSize := DEFAULT_STRING_BUFFER_SIZE;
    SourcePos := 0;
    SourceMax := 0;
    EofState := Eof_none;
    nextChar();
    CurrentLine := 1;
    CurrentColumn := 1;
    nextChar();
    nextToken();
corp;

/*
 * lexTerm - terminate the lexical scanner stuff.
 */

proc lexTerm()void:

    if StringBuffer ~= nil then
	memFree(StringBuffer, StringBufferSize * sizeof(char));
    fi;
corp;
