%option yylineno %option noyywrap %{ #include #include #include #include #include "y.tab.h" struct token yytoken; char * yyfilename; /* the following are macros used in various literal constant token types */ %} O [0-7] D [0-9] L [a-zA-Z_] H [a-fA-F0-9] E [Ee][+-]?{D}+ FS (f|F|l|L) IS (u|U|l|L) W [ \t\f]* LIT \"(\\.|[^\\"])*\" %% [ \t\f\n]+ { } "/*"([^*]|"*"+[^*/])*"*"+"/" { /* comment */ } "/*"([^*]|"*"+[^*/])* { fprintf(stderr, "%s:%d: unterminated comment\n", yyfilename, yylineno); } "auto" { return token(AUTO); } "break" { return token(BREAK); } "case" { return token(CASE); } "char" { return token(CHAR); } "const" { return token(CONST); } "continue" { return token(CONTINUE); } "default" { return token(DEFAULT); } "do" { return token(DO); } "double" { return token(DOUBLE); } "else" { return token(ELSE); } "enum" { return token(ENUM); } "extern" { return token(EXTERN); } "float" { return token(FLOAT); } "for" { return token(FOR); } "goto" { return token(GOTO); } "if" { return token(IF); } "int" { return token(INT); } "long" { return token(LONG); } "register" { return token(REGISTER); } "return" { return token(RETURN); } "short" { return token(SHORT); } "signed" { return token(SIGNED); } "sizeof" { return token(SIZEOF); } "static" { return token(STATIC); } "struct" { return token(STRUCT); } "switch" { return token(SWITCH); } "typedef" { return token(TYPEDEF); } "union" { return token(UNION); } "unsigned" { return token(UNSIGNED); } "void" { return token(VOID); } "volatile" { return token(VOLATILE); } "while" { return token(WHILE); } ">>=" { return token(SREQ); } "<<=" { return token(SLEQ); } "+=" { return token(PLUSEQ); } "-=" { return token(MINUSEQ); } "*=" { return token(MULEQ); } "/=" { return token(DIVEQ); } "%=" { return token(MODEQ); } "&=" { return token(ANDEQ); } "^=" { return token(XOREQ); } "|=" { return token(OREQ); } ">>" { return token(SHIFT_RIGHT); } "<<" { return token(SHIFT_LEFT); } "++" { return token(INCOP); } "--" { return token(DECOP); } "->" { return token(FOLLOW); } "&&" { return token(ANDAND); } "||" { return token(OROR); } "<=" { return token(LE); } ">=" { return token(GE); } "==" { return token(EQEQ); } "!=" { return token(NOTEQ); } ";" { return token(SEMICOL); } "{" { return token(LC); } "}" { return token(RC); } "," { return token(COMMA); } ":" { return token(COLON); } "=" { return token(EQ); } "(" { return token(LP); } ")" { return token(RP); } "[" { return token(LB); } "]" { return token(RB); } "." { return token(DOT); } "&" { return token(AND); } "!" { return token(NOT); } "~" { return token(TILDE); } "-" { return token(MINUS); } "+" { return token(PLUS); } "*" { return token(MUL); } "/" { return token(DIV); } "%" { return token(MOD); } "<" { return token(LT); } ">" { return token(GT); } "^" { return token(XOR); } "|" { return token(OR); } "?" { return token(QUEST); } \.\.\. { return token(ELLIPSIS); } {L}({L}|{D})* { return token(IDENTIFIER); } 0[xX]{H}+{IS}? { return token(INT_LITERAL); } 0{O}+{IS}? { return token(INT_LITERAL); } {D}+{IS}? { return token(INT_LITERAL); } '(\\.|[^\\'])+' { return token(CHAR_LITERAL); } {D}+{E}{FS}? { return token(FLOAT_LITERAL); } {D}*"."{D}+({E})?{FS}? { return token(FLOAT_LITERAL); } {D}+"."{D}*({E})?{FS}? { return token(FLOAT_LITERAL); } {LIT} { return token(STRING_LITERAL); } %% #define isodigit(c) (('0'<=(c)) && ((c) <= '7')) #define hexval(c) (isdigit(c)?((c)-'0'):(tolower(c)-'a')) int token(int category) { yytoken.text = strdup(yytext); /* replace with lexeme table entry */ yytoken.lineno = yylineno; yytoken.filename = yyfilename; if (category == INT_LITERAL) yytoken.ival = atoi(yytext); else if (category == STRING_LITERAL) { int i, j=0; for (i=1; i < yyleng-1; i++) { if (yytext[i] == '\\') { i++; switch(yytext[i]) { case 'n': yytoken.sval[j++] = '\n'; break; case 't': yytoken.sval[j++] = '\t'; break; case 'v': yytoken.sval[j++] = '\v'; break; case 'b': yytoken.sval[j++] = '\b'; break; case 'r': yytoken.sval[j++] = '\r'; break; case 'f': yytoken.sval[j++] = '\f'; break; case 'a': yytoken.sval[j++] = '\a'; break; case '\\': yytoken.sval[j++] = '\\'; break; case '?': yytoken.sval[j++] = '\?'; break; case '\'': yytoken.sval[j++] = '\''; break; case '\"': yytoken.sval[j++] = '\"'; break; case 'x': { i++; yytoken.sval[j] = 0; while(isxdigit(yytext[i])) { yytoken.sval[j] = yytoken.sval[j] * 16 + hexval(yytext[i++]); } i--; break; } default: { if (isodigit(yytext[i])) { yytoken.sval[j] = 0; while(isxdigit(yytext[i])) { yytoken.sval[j] = yytoken.sval[j] * 8 + yytext[i++]-'0'; } i--; } else { /* escaped something we don't treat specially */ yytoken.sval[j++] = yytext[i]; } } } } else yytoken.sval[j++] = yytext[i]; } yytoken.sval[j++] = 0; } return category; }