Commit 066f2d5d authored by 李晓奇's avatar 李晓奇

change parser

parent 60b6c163
lexical_analyzer_stu.l
syntax_analyzer_stu.y
......@@ -7,76 +7,61 @@
#include "syntax_tree.h"
#include "syntax_analyzer.h"
/* #define __DEBUG_COMMENT__ */
int lines = 1;
int lines;
int pos_start;
int pos_end;
void pass_node(char *text){
yylval.node = new_syntax_tree_node(text);
}
void comment_helper(char *comment, unsigned int len)
{
#ifdef __DEBUG_COMMENT__
printf("Get COMMENT in line<%d>: \"%s\"\n", lines, comment);
#endif
}
/*****************声明和选项设置 end*****************/
%}
/* use exclusive state */
%x COMMENT
letter [a-zA-Z]
digit [0-9]
ID {letter}+
INTEGER {digit}+
FLOAT {digit}+\.|{digit}*\.{digit}+
NEWLINE \r\n|\r|\n
WHITESPACE [ \t]
/*
%token <node> _IF _ELSE _WHILE _RETURN _INT _FLOAT _VOID
%token <node> _ASSIGN _RELOP _ADD_OP _MUL_OP
%token <node> _L_SQUARE _R_SQUARE _L_PARE _R_PARE _L_BRACKET _R_BRACKET
%token <node> _SEMI _COMMA _ID _INTEGER _FLOATPOINT
*/
/* <COMMENT>.* { pos_end += yyleng; comment_helper(yytext, yyleng); } */
%%
<INITIAL>"/*" { BEGIN(COMMENT); pos_end += 2; }
<COMMENT>[^*\n]*|"*"+[^*/\n]* { pos_end += yyleng; comment_helper(yytext, yyleng); }
<COMMENT>"*/" { BEGIN(0); pos_end += 2; }
\+ {pos_start = pos_end; pos_end += 1; pass_node(yytext); return ADD;}
\- {pos_start = pos_end; pos_end += 1; pass_node(yytext); return SUB;}
\* {pos_start = pos_end; pos_end += 1; pass_node(yytext); return MUL;}
\/ {pos_start = pos_end; pos_end += 1; pass_node(yytext); return DIV;}
\< {pos_start = pos_end; pos_end += 1; pass_node(yytext); return LT;}
\<= {pos_start = pos_end; pos_end += 2; pass_node(yytext); return LTE;}
\> {pos_start = pos_end; pos_end += 1; pass_node(yytext); return GT;}
\>= {pos_start = pos_end; pos_end += 2; pass_node(yytext); return GTE;}
== {pos_start = pos_end; pos_end += 2; pass_node(yytext); return EQ;}
!= {pos_start = pos_end; pos_end += 2; pass_node(yytext); return NEQ;}
= {pos_start = pos_end; pos_end += 1; pass_node(yytext); return ASSIN;}
; {pos_start = pos_end; pos_end += 1; pass_node(yytext); return SEMICOLON;}
, {pos_start = pos_end; pos_end += 1; pass_node(yytext); return COMMA;}
\( {pos_start = pos_end; pos_end += 1; pass_node(yytext); return LPARENTHESE;}
\) {pos_start = pos_end; pos_end += 1; pass_node(yytext); return RPARENTHESE;}
\[ {pos_start = pos_end; pos_end += 1; pass_node(yytext); return LBRACKET;}
\] {pos_start = pos_end; pos_end += 1; pass_node(yytext); return RBRACKET;}
\{ {pos_start = pos_end; pos_end += 1; pass_node(yytext); return LBRACE;}
\} {pos_start = pos_end; pos_end += 1; pass_node(yytext); return RBRACE;}
else {pos_start = pos_end; pos_end += 4; pass_node(yytext); return ELSE;}
if {pos_start = pos_end; pos_end += 2; pass_node(yytext); return IF;}
int {pos_start = pos_end; pos_end += 3; pass_node(yytext); return INT;}
float {pos_start = pos_end; pos_end += 5; pass_node(yytext); return FLOAT;}
return {pos_start = pos_end; pos_end += 6; pass_node(yytext); return RETURN;}
void {pos_start = pos_end; pos_end += 4; pass_node(yytext); return VOID;}
while {pos_start = pos_end; pos_end += 5; pass_node(yytext); return WHILE;}
[a-zA-Z]+ {pos_start = pos_end; pos_end += strlen(yytext); pass_node(yytext); return IDENTIFIER;}
[0-9]+ {pos_start = pos_end; pos_end += strlen(yytext); pass_node(yytext); return INTEGER;}
[0-9]+\.[0-9]*|[0-9]*\.[0-9]+ { pos_start = pos_end; pos_end += strlen(yytext); pass_node(yytext); return FLOATPOINT; }
\n {lines++; pos_start = 1; pos_end = 1;}
[ \t] {pos_start = pos_end; pos_end += 1;}
"/*" { pos_start = pos_end; pos_end += 2; BEGIN(COMMENT); }
<COMMENT>"*/" { pos_start = pos_end; pos_end += 2; BEGIN(INITIAL); }
<COMMENT>. { pos_start = pos_end; pos_start += 1; }
<COMMENT>\n { pos_start = 1; pos_end = 1; lines++; }
. { pos_start = pos_end; pos_end++; return ERROR; }
if { pos_start = pos_end; pos_end += 2; pass_node("if"); return _IF;}
else { pos_start = pos_end; pos_end += 4; pass_node("else"); return _ELSE;}
while { pos_start = pos_end; pos_end += 5; pass_node("while"); return _WHILE;}
return { pos_start = pos_end; pos_end += 6; pass_node("return"); return _RETURN;}
int { pos_start = pos_end; pos_end += 3; pass_node("int"); return _INT;}
float { pos_start = pos_end; pos_end += 5; pass_node("float"); return _FLOAT;}
void { pos_start = pos_end; pos_end += 4; pass_node("void"); return _VOID;}
{ID} { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _ID;}
{INTEGER} { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _INTEGER;}
{FLOAT} { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _FLOATPOINT;}
"=" { pos_start = pos_end; pos_end += 1; pass_node("="); return _ASSIGN;}
"<="|">="|"<"|">"|"=="|"!=" { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _RELOP;}
"+"|"-" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return _ADD_OP;}
"*"|"/" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return _MUL_OP;}
"["|"]" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == '[' ? _L_SQUARE : _R_SQUARE;}
"("|")" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == '(' ? _L_PARE : _R_PARE;}
"{"|"}" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == '{' ? _L_BRACKET : _R_BRACKET;}
","|";" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == ',' ? _COMMA : _SEMI;}
{WHITESPACE} { pos_end++; }
<*>{NEWLINE} { lines++; pos_end = 0;}
%%
......@@ -14,7 +14,7 @@ extern FILE * yyin;
// external variables from lexical_analyzer module
extern int lines;
extern char *yytext;
extern char * yytext;
extern int pos_end;
extern int pos_start;
......@@ -28,179 +28,181 @@ void yyerror(const char *s);
syntax_tree_node *node(const char *node_name, int children_num, ...);
%}
/* TODO: Complete this definition.
Hint: See pass_node(), node(), and syntax_tree.h.
Use forward declaring. */
/* TODO: Complete this definition. */
%union {
struct _syntax_tree_node *node;
struct _syntax_tree_node * node;
char * name;
}
/* TODO: Your tokens here. */
/*
alias:
- SPEC: SPECIFIER
- DEC:DECLARATION
- COM: COMPOUND
- STMT: STATEMENT
- EXPR: EXPRESSION
- ITER: ITERATION
- SELC: SELCTION
- RET: RETURN
- Tokens starting with '_' is the terminator
*/
%token <node> ERROR
%type <node> TYPE_SPEC RELOP ADDOP MULOP
%type <node> DEC_LIST DEC VAR_DEC FUN_DEC LOCAL_DEC
%type <node> COM_STMT STMT_LIST STMT EXPR_STMT ITER_STMT SELC_STMT RET_STMT
%type <node> EXPR SIMPLE_EXPR VAR ADD_EXPR TERM FACTOR INTEGER FLOAT CALL
%type <node> PARAM PARAMS PARAM_LIST ARGS ARG_LIST
/* These are for flex to return
NOTE: Though combining _LE _LT _BT _BE _EQ _NEQ to _RELOP makes the program simpler,
it may not satisfy the subsequent requirements.
*/
%token <node> _IF _ELSE _WHILE _RETURN _INT _FLOAT _VOID
%token <node> _ASSIGN _RELOP _ADD_OP _MUL_OP
%token <node> _L_SQUARE _R_SQUARE _L_PARE _R_PARE _L_BRACKET _R_BRACKET
%token <node> _SEMI _COMMA _ID _INTEGER _FLOATPOINT
%type <node> program
%token <node> ADD
%token <node> SUB
%token <node> MUL
%token <node> DIV
%token <node> LT
%token <node> LTE
%token <node> GT
%token <node> GTE
%token <node> EQ
%token <node> NEQ
%token <node> ASSIN
%token <node> SEMICOLON
%token <node> COMMA
%token <node> LPARENTHESE
%token <node> RPARENTHESE
%token <node> LBRACKET
%token <node> RBRACKET
%token <node> LBRACE
%token <node> RBRACE
%token <node> ELSE
%token <node> IF
%token <node> INT
%token <node> RETURN
%token <node> VOID
%token <node> WHILE
%token <node> IDENTIFIER
%token <node> INTEGER
%token <node> FLOAT
%token <node> FLOATPOINT
//%token <node> EOL
//%token <node> BLANK
//%token <node> COMMENT
%type <node> program declaration-list declaration var-declaration type-specifier fun-declaration params param-list param compound-stmt local-declarations statement-list statement expression-stmt selection-stmt iteration-stmt return-stmt expression var simple-expression relop additive-expression addop term mulop factor integer float call args arg-list
/* compulsory starting symbol */
%start program
/* TODO: Your rules here. */
%%
/* TODO: Your rules here. */
program: DEC_LIST {$$ = node("program", 1, $1); gt->root = $$;}
;
program : declaration-list {$$ = node( "program", 1, $1); gt->root = $$;}
;
DEC_LIST: DEC_LIST DEC {$$ = node("declaration-list", 2, $1, $2); }
| DEC {$$ = node("declaration-list", 1, $1);}
;
declaration-list : declaration-list declaration {$$ = node( "declaration-list", 2, $1, $2);}
| declaration {$$ = node( "declaration-list", 1, $1);}
;
DEC: VAR_DEC {$$ = node("declaration", 1, $1); }
| FUN_DEC {$$ = node("declaration", 1, $1); }
;
declaration : var-declaration {$$ = node( "declaration", 1, $1);}
| fun-declaration {$$ = node( "declaration", 1, $1);}
;
VAR_DEC: TYPE_SPEC _ID _SEMI {$$ = node("var-declaration", 3, $1, $2, $3); }
| TYPE_SPEC _ID _L_SQUARE _INTEGER _R_SQUARE _SEMI {$$ = node("var-declaration", 6, $1, $2, $3, $4, $5, $6); }
;
var-declaration : type-specifier IDENTIFIER SEMICOLON {$$ = node( "var-declaration", 3, $1, $2, $3);}
| type-specifier IDENTIFIER LBRACKET INTEGER RBRACKET SEMICOLON {$$ = node( "var-declaration", 6, $1, $2, $3, $4, $5, $6);}
;
TYPE_SPEC: _INT {$$ = node("type-specifier", 1, $1); }
| _FLOAT {$$ = node("type-specifier", 1, $1); }
| _VOID {$$ = node("type-specifier", 1, $1); }
;
type-specifier : INT {$$ = node( "type-specifier", 1, $1);}
| FLOAT { $$ = node( "type-specifier", 1, $1); }
| VOID {$$ = node( "type-specifier", 1, $1);}
;
FUN_DEC: TYPE_SPEC _ID _L_PARE PARAMS _R_PARE COM_STMT {$$ = node("fun-declaration", 6, $1, $2, $3, $4, $5, $6); }
;
fun-declaration : type-specifier IDENTIFIER LPARENTHESE params RPARENTHESE compound-stmt {$$ = node( "fun-declaration", 6, $1, $2, $3, $4, $5, $6);}
;
PARAMS: PARAM_LIST {$$ = node("params", 1, $1); }
| _VOID {$$ = node("params", 1, $1); }
;
params : param-list {$$ = node( "params", 1, $1);}
| VOID {$$ = node( "params", 1, $1);}
;
PARAM_LIST: PARAM_LIST _COMMA PARAM {$$ = node("param-list", 3, $1, $2, $3); }
| PARAM {$$ = node("param-list", 1, $1); }
;
param-list : param-list COMMA param {$$ = node( "param-list", 3, $1, $2, $3);}
| param {$$ = node( "param-list", 1, $1);}
;
param : type-specifier IDENTIFIER {$$ = node( "param", 2, $1, $2);}
| type-specifier IDENTIFIER LBRACKET RBRACKET {$$ = node( "param", 4, $1, $2, $3, $4);}
;
PARAM: TYPE_SPEC _ID {$$ = node("param", 2, $1, $2); }
| TYPE_SPEC _ID _L_SQUARE _R_SQUARE {$$ = node("param", 4, $1, $2, $3, $4);}
;
compound-stmt : LBRACE local-declarations statement-list RBRACE {$$ = node( "compound-stmt", 4, $1, $2, $3, $4);}
;
COM_STMT: _L_BRACKET LOCAL_DEC STMT_LIST _R_BRACKET {$$ = node("compound-stmt", 4, $1, $2, $3, $4);}
;
local-declarations : local-declarations var-declaration {$$ = node( "local-declarations", 2, $1, $2);}
| {$$ = node( "local-declarations",0);}
;
LOCAL_DEC: LOCAL_DEC VAR_DEC {$$ = node("local-declarations", 2, $1, $2);}
| {$$ = node("local-declarations", 0);}
;
statement-list : statement-list statement {$$ = node( "statement-list", 2, $1, $2);}
| {$$ = node( "statement-list",0);}
;
STMT_LIST: STMT_LIST STMT {$$ = node("statement-list", 2, $1, $2);}
| {$$ = node("statement-list", 0);}
;
statement : expression-stmt {$$ = node( "statement", 1, $1);}
| compound-stmt {$$ = node( "statement", 1, $1);}
| selection-stmt {$$ = node( "statement", 1, $1);}
| iteration-stmt {$$ = node( "statement", 1, $1);}
| return-stmt {$$ = node( "statement", 1, $1);}
;
STMT: EXPR_STMT {$$ = node("statement", 1, $1);}
| COM_STMT {$$ = node("statement", 1, $1);}
| SELC_STMT {$$ = node("statement", 1, $1);}
| ITER_STMT {$$ = node("statement", 1, $1);}
| RET_STMT {$$ = node("statement", 1, $1);}
;
expression-stmt : expression SEMICOLON {$$ = node( "expression-stmt", 2, $1, $2);}
| SEMICOLON {$$ = node( "expression-stmt", 1, $1);}
;
EXPR_STMT: EXPR _SEMI {$$ = node("expression-stmt", 2, $1, $2);}
| _SEMI {$$ = node("expression-stmt", 1, $1);}
;
selection-stmt : IF LPARENTHESE expression RPARENTHESE statement {$$ = node( "selection-stmt", 5, $1, $2, $3, $4, $5);}
| IF LPARENTHESE expression RPARENTHESE statement ELSE statement {$$ = node( "selection-stmt", 7, $1, $2, $3, $4, $5, $6, $7);}
;
SELC_STMT: _IF _L_PARE EXPR _R_PARE STMT {$$ = node("selection-stmt", 5, $1, $2, $3, $4, $5);}
| _IF _L_PARE EXPR _R_PARE STMT _ELSE STMT {$$ = node("selection-stmt", 7, $1, $2, $3, $4, $5, $6, $7);}
;
iteration-stmt : WHILE LPARENTHESE expression RPARENTHESE statement {$$ = node( "iteration-stmt", 5, $1, $2, $3, $4, $5);}
;
ITER_STMT: _WHILE _L_PARE EXPR _R_PARE STMT {$$ = node("iteration-stmt", 5, $1, $2, $3, $4, $5);}
;
return-stmt : RETURN SEMICOLON {$$ = node( "return-stmt", 1, $1);}
| RETURN expression SEMICOLON {$$ = node( "return-stmt", 3, $1, $2, $3);}
;
RET_STMT: _RETURN _SEMI {$$ = node("return-stmt", 2, $1, $2);}
| _RETURN EXPR _SEMI {$$ = node("return-stmt", 3, $1, $2, $3);}
;
expression : var ASSIN expression {$$ = node( "expression", 3, $1, $2, $3);}
| simple-expression {$$ = node( "expression", 1, $1);}
;
EXPR: VAR _ASSIGN EXPR {$$ = node("expression", 3, $1, $2, $3);}
| SIMPLE_EXPR {$$ = node("expression", 1, $1);}
var : IDENTIFIER {$$ = node( "var", 1, $1);}
| IDENTIFIER LBRACKET expression RBRACKET {$$ = node( "var", 4, $1, $2, $3, $4);}
;
VAR: _ID {$$ = node("var", 1, $1);}
| _ID _L_SQUARE EXPR _R_SQUARE {$$ = node("var", 4, $1, $2, $3, $4);}
;
SIMPLE_EXPR: ADD_EXPR RELOP ADD_EXPR {$$ = node("simple-expression", 3, $1, $2, $3);}
| ADD_EXPR {$$ = node("simple-expression", 1, $1);}
;
RELOP: _RELOP {$$ = node("relop", 1, $1);}
;
/*
RELOP: _LE {$$ = node("relop", 1, $1);}
| _LT {$$ = node("relop", 1, $1);}
| _GT {$$ = node("relop", 1, $1);}
| _GE {$$ = node("relop", 1, $1);}
| _EQ {$$ = node("relop", 1, $1);}
| _NEQ {$$ = node("relop", 1, $1);}
;
*/
ADD_EXPR: ADD_EXPR ADDOP TERM {$$ = node("additive-expression", 3, $1, $2, $3);}
| TERM {$$ = node("additive-expression", 1, $1);}
;
ADDOP: _ADD_OP {$$ = node("addop", 1, $1);}
;
TERM: TERM MULOP FACTOR {$$ = node("term", 3, $1, $2, $3);}
| FACTOR {$$ = node("term", 1, $1);}
;
simple-expression : additive-expression relop additive-expression {$$ = node( "simple-expression", 3, $1, $2, $3);}
| additive-expression {$$ = node( "simple-expression", 1, $1);}
;
MULOP: _MUL_OP {$$ = node("mulop", 1, $1);}
;
relop : LT {$$ = node( "relop", 1, $1);}
| LTE {$$ = node( "relop", 1, $1);}
| GT {$$ = node( "relop", 1, $1);}
| GTE {$$ = node( "relop", 1, $1);}
| EQ {$$ = node( "relop", 1, $1);}
| NEQ {$$ = node( "relop", 1, $1);}
;
FACTOR: _L_PARE EXPR _R_PARE {$$ = node("factor", 3, $1, $2, $3);}
| VAR {$$ = node("factor", 1, $1);}
| CALL {$$ = node("factor", 1, $1);}
| INTEGER {$$ = node("factor", 1, $1);}
| FLOAT {$$ = node("factor", 1, $1);}
;
additive-expression : additive-expression addop term {$$ = node( "additive-expression", 3, $1, $2, $3);}
| term {$$ = node( "additive-expression", 1, $1);}
;
INTEGER: _INTEGER {$$ = node("integer", 1, $1);}
;
addop : ADD {$$ = node( "addop", 1, $1);}
| SUB {$$ = node( "addop", 1, $1);}
;
FLOAT: _FLOATPOINT {$$ = node("float", 1, $1);}
;
term : term mulop factor {$$ = node( "term", 3, $1, $2, $3);}
| factor {$$ = node( "term", 1, $1);}
;
CALL: _ID _L_PARE ARGS _R_PARE {$$ = node("call", 4, $1, $2, $3, $4);}
;
mulop : MUL {$$ = node( "mulop", 1, $1);}
| DIV {$$ = node( "mulop", 1, $1);}
;
ARGS: ARG_LIST {$$ = node("args", 1, $1);}
| {$$ = node("args", 0);}
;
factor : LPARENTHESE expression RPARENTHESE {$$ = node( "factor", 3, $1, $2, $3);}
| var {$$ = node( "factor", 1, $1);}
| call {$$ = node( "factor", 1, $1);}
| integer {$$ = node( "factor", 1, $1);}
| float {$$ = node( "factor", 1, $1);}
;
integer : INTEGER {$$ = node( "integer", 1, $1);}
;
float : FLOATPOINT {$$ = node( "float", 1, $1);}
;
ARG_LIST: ARG_LIST _COMMA EXPR {$$ = node("arg-list", 3, $1, $2, $3);}
| EXPR {$$ = node("arg-list", 1, $1);}
;
call : IDENTIFIER LPARENTHESE args RPARENTHESE {$$ = node( "call", 4, $1, $2, $3, $4);}
;
args : arg-list {$$ = node( "args", 1, $1);}
| {$$ = node( "args", 0);}
;
arg-list : arg-list COMMA expression {$$ = node( "arg-list", 3, $1, $2, $3);}
| expression {$$ = node( "arg-list", 1, $1);}
;
%%
......@@ -227,7 +229,7 @@ syntax_tree *parse(const char *input_path)
yyin = stdin;
}
lines = pos_start = pos_end = 1;
lines = pos_start = pos_end = 1;
gt = new_syntax_tree();
yyrestart(yyin);
yyparse();
......@@ -255,3 +257,4 @@ syntax_tree_node *node(const char *name, int children_num, ...)
}
return p;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment