%{ /* ********************************************* * * * Author: Donald Yessick * * C-- Recursive Descent Parser * * November 11, 2008 * * Credit Keneth Louden For C-- Specification. * Credit Dragon Book (Aho, Sethi, Ullman, et al) On General Principle * * c++ * flex * g++ * * ********************************************* */ #include #include #include #include using namespace std; #define PRINTTOKEN {if (token==T_WORD) cout<<"T_WORD" << flush; \ else if (token==T_NUMBER) cout<<"T_NUMBER" << flush; \ else cout<<(int)token<<(char)token; } #define PRINTEXPECTED {if (expected==T_WORD) cout<<"T_WORD" << flush; \ else if (expected==T_NUMBER) cout<<"T_NUMBER" << flush; \ else cout<<(int)expected<<(char)expected; } int bug(int token); /* ********************************************* * T_WORD keywords and calls and other vars * * T_NUMBER 9999999999 (TEN DIGITS) is max num * * although whitespace is stripped, most recent occurances * can be found in global strings whitespace and newline * * all other characters use ascii values as tokens. * ********************************************* */ #define T_NUMBER 999 #define T_WORD 1001 int pos =0; int token =0; int scanline =0; int linebreak =0; int lastpos =0; int prevpos =0; int lookaheadtoken =0; string newline =""; string whitespace =""; string *hash[255]={0}; %} LETTER [a-zA-Z] DIGIT [0-9] D01 {DIGIT}? D03 {D01}{D01}{D01} D09 {D03}{D03}{D03} IDCHAR ({LETTER}|{DIGIT}|[_]) WHITESPACE [ \t\f\n\b\a] ID ({LETTER}|[_]){IDCHAR}* NUMBER {DIGIT}{D09} COMMENT [/][*]([^*]*|[*]+[^/*])*[*]+[/] %% [\f] ; {ID} {return T_WORD;} {NUMBER} {cout << T_NUMBER << yytext << endl; return T_NUMBER;} {COMMENT} {\ for(char*cp=yytext;*cp;cp++) {\ pos++; if (*cp=='\n') {\ linebreak++;\ }\ }\ } [\n][ \t]* {\ pos+=yyleng;\ whitespace+=yytext;\ if (!linebreak) \ newline = yytext; \ else \ newline += yytext; \ cout << yytext;\ linebreak++;\ } {WHITESPACE} {pos+=yyleng;whitespace+=yytext; } . {\ cout << "\'" << yytext << "\'" << ((int)(yytext[0])) ;\ return yytext[0];\ } %% //unusual stuff (allows next file via yyin and return 0}; int yywrap(){return 1;} int bug(int token){ cout << yytext << "\t"; if (token == ';') cout << endl; } //prototypes; int main(int argc, char * argv[]); void nextToken(); void lookAhead(); bool accept(int expected); bool acceptError(int expected); bool acceptWarning(int expected); bool acceptCritical(int expected); bool in(char*expected); bool match(char*expected); bool accept(char*expected); bool acceptError(char*expected); bool acceptWarning(char*expected); bool acceptCritical(char*expected); bool acceptWarning(int expected){ if (token != expected){ cout << "\nline: " << scanline; cout << " WARNING found: \"" << yytext << "\"<" << token << ">" << flush; cout << " expected : " << flush; PRINTEXPECTED cout << "<" << expected << ">" << endl; return false; } nextToken(); return true; } bool acceptWarning(char*expected){ if (token != T_WORD) { cout << "\nline: " << scanline; cout << " WARNING found: \"" << yytext << "\"<" << token << ">" << flush; cout << " expected: " << flush; cout << "\"" << expected << "\"" << endl; return false; } if (strcmp(yytext, expected)!=0){ cout << "\nline: " << scanline; cout << " WARNING found: \"" << yytext << "\"<" << token << ">" << flush; cout << " expected: " << flush; cout << "\"" << expected << "\"" << endl; return false; } nextToken(); return true; } bool acceptError(int expected){ if (token != expected){ cout << "\nline: " << scanline; cout << " ERROR found: \"" << yytext << "\"<" << token << ">" << flush; cout << " expected: " << flush; PRINTEXPECTED cout << "<" << expected << ">" << endl; return false; } nextToken(); return true; } bool acceptError(char*expected){ if (token != T_WORD) { cout << "\nline: " << scanline; cout << " ERROR found: \"" << yytext << "\"<" << token << ">" << flush; cout << " expected: " << flush; cout << "\"" << expected << "\"" << endl; return false; } if (strcmp(yytext, expected)!=0){ cout << "\nline: " << scanline; cout << " ERROR found: \"" << yytext << "\"<" << token << ">" << flush; cout << " expected: " << flush; cout << "\"" << expected << "\"" << endl; return false; } nextToken(); return true; } bool acceptCritical(int expected){ if (token != expected){ cout << "\nline: " << scanline; cout << " CRITCAL ERROR found: \"" << yytext << "\"<" << token << ">" << flush; cout << " expected: " << flush; PRINTEXPECTED cout << "<" << expected << ">" << endl; exit(token); return false; } nextToken(); return true; } bool acceptCritical(char*expected){ if (token != T_WORD) { cout << "\nline: " << scanline; cout << " CRITCAL ERROR found: \"" << yytext << "\"<" << token << ">" << flush; cout << " expected: " << flush; cout << "\"" << expected << "\"" << endl; exit(token); return false; } if (strcmp(yytext, expected)!=0){ cout << "\nline: " << scanline; cout << " CRITCAL ERROR found: \"" << yytext << "\"<" << token << ">" << flush; cout << " expected: " << flush; cout << "\"" << expected << "\"" << endl; exit(2); return false; } nextToken(); return true; } bool accept(int expected){ if (token != expected) return false; nextToken(); return true; } bool accept(char*expected){ if (token != T_WORD) return false; if (strcmp(yytext, expected)!=0) return false; nextToken(); return true; } void nextToken(){ if (token < 256){//yytext is next token cout << "<'"<<((char) token)<<"'>" << flush; scanline+=linebreak; if (linebreak) {cout << newline;linebreak=0;} token = lookaheadtoken; lookaheadtoken = 0; } else { cout << "<"<" << flush; token = yylex(); } lookAhead(); } void lookAhead(){ if (token && token < 256) lookaheadtoken = yylex(); } bool in(char *testset){ while (*testset){ if (token == *testset) return true; testset++; } return false; } bool match(char * expected){ if (token < 256) return false; //another hard bug return (strcmp(yytext, expected) == 0); } //main and cmm prototypes //int main(char * argv[], int argc); int parseCMM(); int firstVarDeclaration(); bool firstProgram(); bool firstDeclarationList(); bool firstDeclaration(); bool firstVarFunDeclaration(); bool firstTypeSpecifier(); bool firstFunDeclaration(); bool firstParams(); bool firstParamList(); bool firstParam(); bool firstCompoundStmt(); bool firstLocalDeclarations(); bool firstStatementList(); bool firstStatement(); bool firstExpressionStmt(); bool firstSelectionStmt(); bool firstIterationStmt(); bool firstReturnStmt(); bool firstExpression(); bool firstVar(); bool firstSimpleExpression(); bool firstAdditiveExpression(); bool firstTerm(); bool firstFactor(); bool firstCall(); bool firstArgs(); bool firstArgList(); bool firstRelop(); bool firstAddop(); bool firstMulop(); int parseProgram(); int parseDeclarationList(); int parseDeclaration(); int parseVarFunDeclaration(); int parseVarDeclaration(); int parseFunDeclaration(); int parseParams(); int parseParamList(); int parseParam(); int parseCompoundStmt(); int parseLocalDeclarations(); int parseStatementList(); int parseStatement(); int parseExpressionStmt(); int parseSelectionStmt(); int parseIterationStmt(); int parseReturnStmt(); int parseExpression(); int parseVar(); //int parseCall(); int parseArgs(); int parseArgList(); int parseSimpleExpression(int&); int parseAdditiveExpression(int&); int parseTerm(int&); int parseFactor(int&); int parseVarCall(int&); //see OPCODES and TYPES below int parseRelop(); int parseAddop(); int parseMulop(); int parseTypeSpecifier(); #define OPCODE_PLUS 1 #define OPCODE_MINUS 2 #define OPCODE_MULT 3 #define OPCODE_DIV 4 #define OPCODE_EQ 5 #define OPCODE_NE 6 #define OPCODE_LT 7 #define OPCODE_LE 8 #define OPCODE_GT 9 #define OPCODE_GE 10 #define TYPEVOID 11 #define TYPEINT 12 #define TYPEARRAY 13 #define TYPEBOOL 14 #define TYPEADDRESS 15 //code generation ofstream gencode("out/gen.smc"); //functions int fid=0; int fidcnt=0; int paramcnt[100] = {0}; string *fidnames = new string[100]; //environment, runtime stack string * names[1024]={0}; int * loc[1024]={0}; int * vtype[1024]={0}; int topenv = -1; int envsize[1024]={0}; int cntnames[1024]={0}; //helpers void pushEnv(); void pushEnv(int fid); void popEnv(); void makeVar(string name, int size, int vartype); bool getVar(string name, int &address,int &vartype, int &local); void initfids(); int createfid(string funname); int findFun(string name, int &numparams); void emit(int n); void emit(string s); int makeLabel(); string funLabel(int labelnum); string label(int labelnum); void emitReturn(); void emitReturnValue(int); void emitLabel(int labelnum); void emitFunLabel(int labelnum); void emitMakeVar(string name, int size); void emitBranchZero(int branchlabel); void emitJump(int branchlabel); //cleanup void emitPreamble(); void emitCallSwitch(int numfids); void emitPrologue(); void dumpVars(); /* ********************************************* * * * * ********************************************* */ #define TRACE if (trace) int trace(0); int main(int argc, char * argv[]) { trace = (argc > 1); initfids(); token = yylex(); emitPreamble(); cout << "CMM Parser" << endl; parseDeclarationList(); if (token) { cout << endl << "CMM Syntax Error" << endl; cout << endl << "bailing out" << endl; cout << pos << linebreak << scanline ; PRINTTOKEN; exit(token); } emitPrologue(); cout << endl << "build successful" << endl; return 0; } /* ********************************************* * * RECURSIVE DESCENT PARSER FOR CMM * WITH CODE GENERATION * ********************************************* */ int parseDeclarationList(){ //TRACE cout << "\tparseDeclarationList" << flush; TRACE emit("\n/*Declarations Section*/"); TRACE emit("\n/*Function Declarations Section*/"); if (!firstVarFunDeclaration()) { //TRACE cout << "\tparseDeclarationList Error " << yytext << flush; exit(token); } while(firstVarFunDeclaration() ){ parseVarFunDeclaration(); } emitCallSwitch(fidcnt); TRACE emit("\n/*Var Declarations Section*/"); emit("\nINIT:"); for (int i = 0; i < envsize[0]; i++) emit("\n\tpush(0)"); emit("\n\tstart"); return 0; } /* ********************************************* * * * * ********************************************* */ int parseVarFunDeclaration() { //TRACE cout << "\tparseVarFunDeclaration" << flush; int typespecifier; if (!(typespecifier=parseTypeSpecifier()) ) return 0; string name = yytext; if ( !acceptError(T_WORD) ) { cout << "Fatal error: expected id, found: " << yytext << endl; exit(1); } //parseFunDeclaration if ( accept('(') ) { fid = createfid(name); emitFunLabel(fid); pushEnv(fid); int pcnt = parseParams(); accept(')'); TRACE emit("\t/*function: "+name+ " label: "+ funLabel(fid) + " param cnt: "); TRACE emit(pcnt); TRACE emit(" */"); parseCompoundStmt(); TRACE emit("\n\t/* automatic return */"); if (typespecifier == TYPEVOID) emitReturn();//in case we fall through else emitReturnValue(typespecifier);//in case we fall through popEnv(); return 0; } //type checking if (typespecifier != TYPEINT){ cout << "invalid type found" << endl; exit(1); } //parseVarDeclaraton if ( accept('[') ) { if ( token == T_NUMBER ) { int n = atoi(yytext); acceptError(T_NUMBER); makeVar(name,n,TYPEARRAY); } acceptError(']'); TRACE emit("\n/*array "+name+" */"); } else { makeVar(name,1, TYPEINT); TRACE emit("\n/*int "+name+" */"); } accept(';'); return 0; } int parseVarDeclaration() { //TRACE cout << "\tparseVarDeclaration" << flush; if (parseTypeSpecifier() != TYPEINT ) return 0; string name = yytext; if ( !acceptError(T_WORD) ) { cout << "Fatal error: expected id, found: " << yytext << endl; exit(1); } if ( accept('[') ) { if ( token == T_NUMBER ) { int n = atoi(yytext); acceptError(T_NUMBER); makeVar(name,n,TYPEARRAY); } acceptError(']'); } else { makeVar(name,1,TYPEINT); } accept(';'); return 1; } int parseParams() { //TRACE cout << "\tparseParams" << flush; if ( accept("void") ){ paramcnt[fid] = 0; return 0; } //or return parseParamList(); } int parseParamList() { //TRACE cout << "\tparseParamList" << flush; parseParam(); paramcnt[fid] = 1; while (accept(',')) { parseParam(); paramcnt[fid]++; } return paramcnt[fid]; } int parseParam() { //TRACE cout << "\tparseParam" << flush; if ( parseTypeSpecifier() != TYPEINT) { cout << "invalid type found " << endl; exit(1); } string name = yytext; acceptError(T_WORD); if (accept('[')){ acceptError(']'); makeVar(name,1, TYPEARRAY); //TRACE cout << "ARRAY PARAM: ENV="<') ) { accept('='); return OPCODE_GE; } if ( accept('<') ) { accept('='); return OPCODE_LE; } } else { if ( accept('>') ) return OPCODE_GT; if ( accept('<') ) //hard to find bug was here!!! return OPCODE_LT; } return 0; } /* FIRST SETS */ int firstVarDeclaration() { //TRACE cout << "\tfirstVarDeclaration" << flush; if (token != T_WORD) return false; return (match("int") || match("void")); } bool firstVarFunDeclaration() { if (token != T_WORD) return false; return (match("int") || match("void")); } bool firstTypeSpecifier() { if (token != T_WORD) return false; return (match("int") || match("void")); } bool firstParams() { if (firstParamList()) return true; if (token != T_WORD) return false; return match("void"); } bool firstParamList() { return firstParam(); } bool firstParam() { return firstTypeSpecifier(); } bool firstCompoundStmt() { return ( token == '{' ); } bool firstLocalDeclarations() { if (token != T_WORD) return false; return (match("int") || match("void")); return firstVarDeclaration(); } bool firstStatementList() { return firstStatement(); } bool firstStatement() { return ( (token==';') || (token == T_WORD) //implies || match("if") || match("while") || match("return") || (token == T_NUMBER) || (token == '(') ); } bool firstExpression() { return ( token == T_WORD || (token == T_NUMBER) || (token == '(') ); } bool firstVar() { return token == T_WORD; } bool firstSimpleExpression() { if (token == T_NUMBER) return true; if (token == '(') return true; return (token == T_WORD); return firstAdditiveExpression(); } bool firstRelop() { if (lookaheadtoken == '=') return in("<>=!"); else return in("<>!"); } bool firstAdditiveExpression() { if (token == T_NUMBER) return true; if (token == '(') return true; return (token == T_WORD); //return firstTerm(); } bool firstAddop() { return in("+-"); } bool firstTerm() { if (token == T_NUMBER) return true; if (token == '(') return true; return (token == T_WORD); //return firstFactor(); } bool firstMulop() { return in("*/"); } bool firstFactor() { if (token == T_NUMBER) return true; if (token == '(') return true; return (token == T_WORD); } bool firstCall() { return (token == T_WORD); } bool firstArgs() { return firstArgList(); } bool firstArgList() { return firstExpression(); } bool firstIterationStmt() { return match("while"); } bool firstSelectionStmt() { return match("if"); } bool firstReturnStmt() { return match("return"); } bool firstExpressionStmt() { return ( firstExpression() || (token==';')); } /* CODE GEN UTILITIES */ void pushEnv(){ topenv++; envsize[topenv] = envsize[topenv-1]; //extends a function env cntnames[topenv] = 0; names[topenv] = new string[100]; loc[topenv] = new int[100]; vtype[topenv] = new int[100]; } void pushEnv(int dummy){ // a new global env topenv++; envsize[topenv] = 0; cntnames[topenv] = 0; names[topenv] = new string[100]; loc[topenv] = new int[100]; vtype[topenv] = new int[100]; } void popEnv(){ delete[] names[topenv]; delete[] loc[topenv]; topenv--; } void makeVar(string name, int size, int vartype){ //TRACE cout << "\nname \t size \t vartype \t topenv \t cntnames[topenv] \t envsize[topenv];" << endl; //TRACE cout << name <<"\t"<< size <<"\t"<< vartype <<"\t\t"<< topenv <<"\t\t"<< cntnames[topenv] <<"\t\ti\t"<< envsize[topenv] << endl; //name occupies size cells on the stack //registerVar(fidname[fid], name, size, envsize[fid]); for (int i=0; i < cntnames[topenv];i++){ if (name == names[topenv][i]){ cout << name << " has already been declared " << endl; exit(1); } } loc[topenv][cntnames[topenv]]=envsize[topenv]; vtype[topenv][cntnames[topenv]]=vartype; names[topenv][cntnames[topenv]]=name; cntnames[topenv]++; envsize[topenv] += size; } void dumpVars(){ for (int env = topenv; env >= 0; env--){ for (int item = 0; item < cntnames[env]; item++){ TRACE { cout << endl; cout << "env:"<= 0; env--){ for (int item = 0; item < cntnames[env]; item++){ //TRACE cout << "*"<