#drinc:util.g #toy.g #externs.g /* * lex.d: lexical scanner - break input stream into tokens. */ uint DEFAULT_STRING_BUFFER_SIZE = 256, SOURCE_BUFFER_SIZE = 512; /* a buffer to hold an input string constant or identifier */ *char StringBuffer; ulong StringBufferSize, StringBufferPos; /* a buffer to hold input text from the source file */ [SOURCE_BUFFER_SIZE]char SourceBuffer; uint SourcePos, SourceMax; /* our current state during the lexical scan */ uint CurrentLine, PreviousLine, CurrentColumn, PreviousColumn; char CurrentChar, NextChar; enum {Eof_none, Eof_one, Eof_two} EofState; *SymbolEntry_t CurrentSymbol; *char CurrentString; ulong CurrentNumber; TokenKind_t CurrentToken; /* * errorHere - signal an error occurring at the current input position. */ proc errorHere(*char message)void: errorPosition(PreviousLine, PreviousColumn); printString(message); printString("\n"); corp; /* * errorHereThree - a three-part error message. */ proc errorHereThree(*char m1, m2, m3)void: errorPosition(PreviousLine, PreviousColumn); printString(m1); printString(m2); printString(m3); printString("\n"); corp; /* * nextChar - get the next input character. */ proc nextChar()void: CurrentChar := NextChar; PreviousLine := CurrentLine; PreviousColumn := CurrentColumn; if CurrentChar = '\n' then CurrentLine := CurrentLine + 1; CurrentColumn := 0; elif CurrentChar = '\t' then CurrentColumn := CurrentColumn / 8 * 8 + 8; else CurrentColumn := CurrentColumn + 1; fi; if EofState = Eof_one then EofState := Eof_two; NextChar := ' '; else if SourcePos = SourceMax then SourceMax := readSource(&SourceBuffer[0], SOURCE_BUFFER_SIZE); if SourceMax = 0 then SourcePos := 0; EofState := Eof_one; NextChar := ' '; else NextChar := SourceBuffer[0]; SourcePos := 1; fi; else NextChar := SourceBuffer[SourcePos]; SourcePos := SourcePos + 1; fi; fi; corp; /* * addChar - add a character to the current string buffer. */ proc addChar(char ch)void: *char newBuffer; ulong newSize; if StringBufferPos = StringBufferSize then newSize := StringBufferSize * 2; newBuffer := memAlloc(newSize * sizeof(char)); BlockCopy(newBuffer, StringBuffer, StringBufferSize * sizeof(char)); memFree(StringBuffer, StringBufferSize * sizeof(char)); StringBufferSize := newSize; StringBuffer := newBuffer; fi; (StringBuffer + StringBufferPos * sizeof(char))* := ch; StringBufferPos := StringBufferPos + 1; corp; /* * whiteSpace - skip past spaces, tabs, newlines, comments. */ proc whiteSpace()void: register uint level; level := 0; while EofState ~= Eof_two and (CurrentChar = ' ' or CurrentChar = '\t' or CurrentChar = '\n' or CurrentChar = '/' and NextChar = '*' or level ~= 0) do if CurrentChar = '/' and NextChar = '*' then level := level + 1; nextChar(); elif CurrentChar = '*' and NextChar = '/' then level := level - 1; nextChar(); fi; nextChar(); od; corp; /* * nextToken - get the next input token. */ proc nextToken()void: register TokenKind_t tk; register int n; whiteSpace(); if EofState = Eof_two then tk := tk_eof; else tk := tk_illegal; case CurrentChar incase 'a'..'z': incase 'A'..'Z': incase '_': StringBufferPos := 0; while (CurrentChar >= 'a' and CurrentChar <= 'z' or CurrentChar >= 'A' and CurrentChar <= 'Z' or CurrentChar >= '0' and CurrentChar <= '9' or CurrentChar = '_') and EofState ~= Eof_two do addChar(CurrentChar); nextChar(); od; addChar('\e'); CurrentSymbol := enter(StringBuffer, StringBufferPos); if CurrentSymbol*.se_kind = sk_reservedWord then tk := CurrentSymbol*.se_value + tk_eof; else tk := tk_id; fi; incase '0'..'9': tk := tk_number; CurrentNumber := 0; while CurrentChar >= '0' and CurrentChar <= '9' and EofState ~= Eof_two do CurrentNumber := CurrentNumber * 10 + (CurrentChar - '0'); nextChar(); od; incase '\"': tk := tk_string; StringBufferPos := 0; while nextChar(); while CurrentChar ~= '\"' and EofState ~= Eof_two do if CurrentChar = '\\' then nextChar(); if CurrentChar = 'n' then CurrentChar := '\n'; elif CurrentChar = 't' then CurrentChar := '\t'; elif CurrentChar = '(' then nextChar(); if CurrentChar >= '0' and CurrentChar <= '9' then n := 0; while CurrentChar >= '0' and CurrentChar <= '9' do n := n * 10 + (CurrentChar - '0'); nextChar(); od; if CurrentChar = ')' then CurrentChar := n + '\e'; else errorHere( "missing ')' in escaped character"); fi; else errorHere( "expecting digits for escaped character"); fi; fi; fi; addChar(CurrentChar); nextChar(); od; nextChar(); whiteSpace(); EofState ~= Eof_two and CurrentChar = '\"' do od; addChar('\e'); CurrentString := memAlloc(StringBufferPos * sizeof(char)); BlockCopy(CurrentString, StringBuffer, StringBufferPos * sizeof(char)); CurrentNumber := StringBufferPos; incase '=': tk := tk_equal; nextChar(); incase '~': if NextChar = '=' then tk := tk_notEqual; nextChar(); nextChar(); else nextChar(); fi; incase '<': if NextChar = '=' then tk := tk_lessEqual; nextChar(); else tk := tk_less; fi; nextChar(); incase '>': if NextChar = '=' then tk := tk_greaterEqual; nextChar(); else tk := tk_greater; fi; nextChar(); incase ':': if NextChar = '=' then tk := tk_assign; nextChar(); nextChar(); else tk := tk_colon; nextChar(); fi; incase '+': tk := tk_plus; nextChar(); incase '-': tk := tk_minus; nextChar(); incase '*': tk := tk_star; nextChar(); incase '/': tk := tk_slash; nextChar(); incase '(': tk := tk_leftParen; nextChar(); incase ')': tk := tk_rightParen; nextChar(); incase ',': tk := tk_comma; nextChar(); incase ';': tk := tk_semicolon; nextChar(); default: nextChar(); esac; fi; CurrentToken := tk; corp; /* * getToken - return the current token. */ proc getToken(**SymbolEntry_t pSymbol; **char pBuffer; *ulong pNumber)TokenKind_t: if pSymbol ~= nil then pSymbol* := CurrentSymbol; fi; if pBuffer ~= nil then pBuffer* := CurrentString; fi; if pNumber ~= nil then pNumber* := CurrentNumber; fi; CurrentToken corp; /* * getSimpleToken - get a token, ignoring anything except the token kind. */ proc getSimpleToken()TokenKind_t: CurrentToken corp; /* * skipToken - totally skip over the current token. */ proc skipToken()void: if CurrentToken = tk_string then memFree(CurrentString, CurrentNumber); fi; nextToken(); corp; /* * lexInit - initialize for lexical scanning. */ proc lexInit()void: uint MAX_RESERVED_WORD_LENGTH = 6, RESERVED_WORD_COUNT = 17; type reservedWord_t = struct { [MAX_RESERVED_WORD_LENGTH + 1]char rw_name; ulong rw_length; TokenKind_t rw_token; }; [RESERVED_WORD_COUNT] reservedWord_t RESERVED_WORDS = ( ("proc" , 4 + 1, tk_proc ), ("corp" , 4 + 1, tk_corp ), ("void" , 4 + 1, tk_void ), ("int" , 3 + 1, tk_int ), ("bool" , 4 + 1, tk_bool ), ("true" , 4 + 1, tk_true ), ("false" , 5 + 1, tk_false ), ("if" , 2 + 1, tk_if ), ("then" , 4 + 1, tk_then ), ("elif" , 4 + 1, tk_elif ), ("else" , 4 + 1, tk_else ), ("fi" , 2 + 1, tk_fi ), ("while" , 5 + 1, tk_while ), ("do" , 2 + 1, tk_do ), ("od" , 2 + 1, tk_od ), ("readln", 6 + 1, tk_readln), ("write" , 5 + 1, tk_write ) ); register *reservedWord_t rw; register *SymbolEntry_t se; register uint i; rw := &RESERVED_WORDS[0]; for i from 0 upto RESERVED_WORD_COUNT - 1 do se := enter(&rw*.rw_name[0], rw*.rw_length); se*.se_kind := sk_reservedWord; se*.se_value := rw*.rw_token - tk_eof; rw := rw + sizeof(reservedWord_t); od; StringBuffer := memAlloc(DEFAULT_STRING_BUFFER_SIZE); StringBufferSize := DEFAULT_STRING_BUFFER_SIZE; SourcePos := 0; SourceMax := 0; EofState := Eof_none; nextChar(); CurrentLine := 1; CurrentColumn := 1; nextChar(); nextToken(); corp; /* * lexTerm - terminate the lexical scanner stuff. */ proc lexTerm()void: if StringBuffer ~= nil then memFree(StringBuffer, StringBufferSize * sizeof(char)); fi; corp;