Commit 066f2d5d authored by 李晓奇's avatar 李晓奇

change parser

parent 60b6c163
lexical_analyzer_stu.l
syntax_analyzer_stu.y
...@@ -7,76 +7,61 @@ ...@@ -7,76 +7,61 @@
#include "syntax_tree.h" #include "syntax_tree.h"
#include "syntax_analyzer.h" #include "syntax_analyzer.h"
/* #define __DEBUG_COMMENT__ */ int lines;
int lines = 1;
int pos_start; int pos_start;
int pos_end; int pos_end;
void pass_node(char *text){ void pass_node(char *text){
yylval.node = new_syntax_tree_node(text); yylval.node = new_syntax_tree_node(text);
} }
void comment_helper(char *comment, unsigned int len)
{
#ifdef __DEBUG_COMMENT__
printf("Get COMMENT in line<%d>: \"%s\"\n", lines, comment);
#endif
}
/*****************声明和选项设置 end*****************/ /*****************声明和选项设置 end*****************/
%} %}
/* use exclusive state */
%x COMMENT %x COMMENT
letter [a-zA-Z]
digit [0-9]
ID {letter}+
INTEGER {digit}+
FLOAT {digit}+\.|{digit}*\.{digit}+
NEWLINE \r\n|\r|\n
WHITESPACE [ \t]
/*
%token <node> _IF _ELSE _WHILE _RETURN _INT _FLOAT _VOID
%token <node> _ASSIGN _RELOP _ADD_OP _MUL_OP
%token <node> _L_SQUARE _R_SQUARE _L_PARE _R_PARE _L_BRACKET _R_BRACKET
%token <node> _SEMI _COMMA _ID _INTEGER _FLOATPOINT
*/
/* <COMMENT>.* { pos_end += yyleng; comment_helper(yytext, yyleng); } */
%% %%
<INITIAL>"/*" { BEGIN(COMMENT); pos_end += 2; } \+ {pos_start = pos_end; pos_end += 1; pass_node(yytext); return ADD;}
<COMMENT>[^*\n]*|"*"+[^*/\n]* { pos_end += yyleng; comment_helper(yytext, yyleng); } \- {pos_start = pos_end; pos_end += 1; pass_node(yytext); return SUB;}
<COMMENT>"*/" { BEGIN(0); pos_end += 2; } \* {pos_start = pos_end; pos_end += 1; pass_node(yytext); return MUL;}
\/ {pos_start = pos_end; pos_end += 1; pass_node(yytext); return DIV;}
\< {pos_start = pos_end; pos_end += 1; pass_node(yytext); return LT;}
\<= {pos_start = pos_end; pos_end += 2; pass_node(yytext); return LTE;}
\> {pos_start = pos_end; pos_end += 1; pass_node(yytext); return GT;}
\>= {pos_start = pos_end; pos_end += 2; pass_node(yytext); return GTE;}
== {pos_start = pos_end; pos_end += 2; pass_node(yytext); return EQ;}
!= {pos_start = pos_end; pos_end += 2; pass_node(yytext); return NEQ;}
= {pos_start = pos_end; pos_end += 1; pass_node(yytext); return ASSIN;}
; {pos_start = pos_end; pos_end += 1; pass_node(yytext); return SEMICOLON;}
, {pos_start = pos_end; pos_end += 1; pass_node(yytext); return COMMA;}
\( {pos_start = pos_end; pos_end += 1; pass_node(yytext); return LPARENTHESE;}
\) {pos_start = pos_end; pos_end += 1; pass_node(yytext); return RPARENTHESE;}
\[ {pos_start = pos_end; pos_end += 1; pass_node(yytext); return LBRACKET;}
\] {pos_start = pos_end; pos_end += 1; pass_node(yytext); return RBRACKET;}
\{ {pos_start = pos_end; pos_end += 1; pass_node(yytext); return LBRACE;}
\} {pos_start = pos_end; pos_end += 1; pass_node(yytext); return RBRACE;}
else {pos_start = pos_end; pos_end += 4; pass_node(yytext); return ELSE;}
if {pos_start = pos_end; pos_end += 2; pass_node(yytext); return IF;}
int {pos_start = pos_end; pos_end += 3; pass_node(yytext); return INT;}
float {pos_start = pos_end; pos_end += 5; pass_node(yytext); return FLOAT;}
return {pos_start = pos_end; pos_end += 6; pass_node(yytext); return RETURN;}
void {pos_start = pos_end; pos_end += 4; pass_node(yytext); return VOID;}
while {pos_start = pos_end; pos_end += 5; pass_node(yytext); return WHILE;}
[a-zA-Z]+ {pos_start = pos_end; pos_end += strlen(yytext); pass_node(yytext); return IDENTIFIER;}
[0-9]+ {pos_start = pos_end; pos_end += strlen(yytext); pass_node(yytext); return INTEGER;}
[0-9]+\.[0-9]*|[0-9]*\.[0-9]+ { pos_start = pos_end; pos_end += strlen(yytext); pass_node(yytext); return FLOATPOINT; }
\n {lines++; pos_start = 1; pos_end = 1;}
[ \t] {pos_start = pos_end; pos_end += 1;}
"/*" { pos_start = pos_end; pos_end += 2; BEGIN(COMMENT); }
<COMMENT>"*/" { pos_start = pos_end; pos_end += 2; BEGIN(INITIAL); }
<COMMENT>. { pos_start = pos_end; pos_start += 1; }
<COMMENT>\n { pos_start = 1; pos_end = 1; lines++; }
. { pos_start = pos_end; pos_end++; return ERROR; }
if { pos_start = pos_end; pos_end += 2; pass_node("if"); return _IF;}
else { pos_start = pos_end; pos_end += 4; pass_node("else"); return _ELSE;}
while { pos_start = pos_end; pos_end += 5; pass_node("while"); return _WHILE;}
return { pos_start = pos_end; pos_end += 6; pass_node("return"); return _RETURN;}
int { pos_start = pos_end; pos_end += 3; pass_node("int"); return _INT;}
float { pos_start = pos_end; pos_end += 5; pass_node("float"); return _FLOAT;}
void { pos_start = pos_end; pos_end += 4; pass_node("void"); return _VOID;}
{ID} { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _ID;}
{INTEGER} { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _INTEGER;}
{FLOAT} { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _FLOATPOINT;}
"=" { pos_start = pos_end; pos_end += 1; pass_node("="); return _ASSIGN;}
"<="|">="|"<"|">"|"=="|"!=" { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _RELOP;}
"+"|"-" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return _ADD_OP;}
"*"|"/" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return _MUL_OP;}
"["|"]" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == '[' ? _L_SQUARE : _R_SQUARE;}
"("|")" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == '(' ? _L_PARE : _R_PARE;}
"{"|"}" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == '{' ? _L_BRACKET : _R_BRACKET;}
","|";" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == ',' ? _COMMA : _SEMI;}
{WHITESPACE} { pos_end++; }
<*>{NEWLINE} { lines++; pos_end = 0;}
%% %%
...@@ -14,7 +14,7 @@ extern FILE * yyin; ...@@ -14,7 +14,7 @@ extern FILE * yyin;
// external variables from lexical_analyzer module // external variables from lexical_analyzer module
extern int lines; extern int lines;
extern char *yytext; extern char * yytext;
extern int pos_end; extern int pos_end;
extern int pos_start; extern int pos_start;
...@@ -28,179 +28,181 @@ void yyerror(const char *s); ...@@ -28,179 +28,181 @@ void yyerror(const char *s);
syntax_tree_node *node(const char *node_name, int children_num, ...); syntax_tree_node *node(const char *node_name, int children_num, ...);
%} %}
/* TODO: Complete this definition. /* TODO: Complete this definition. */
Hint: See pass_node(), node(), and syntax_tree.h.
Use forward declaring. */
%union { %union {
struct _syntax_tree_node *node; struct _syntax_tree_node * node;
char * name;
} }
/* TODO: Your tokens here. */ /* TODO: Your tokens here. */
/*
alias:
- SPEC: SPECIFIER
- DEC:DECLARATION
- COM: COMPOUND
- STMT: STATEMENT
- EXPR: EXPRESSION
- ITER: ITERATION
- SELC: SELCTION
- RET: RETURN
- Tokens starting with '_' is the terminator
*/
%token <node> ERROR %token <node> ERROR
%type <node> TYPE_SPEC RELOP ADDOP MULOP %token <node> ADD
%type <node> DEC_LIST DEC VAR_DEC FUN_DEC LOCAL_DEC %token <node> SUB
%type <node> COM_STMT STMT_LIST STMT EXPR_STMT ITER_STMT SELC_STMT RET_STMT %token <node> MUL
%type <node> EXPR SIMPLE_EXPR VAR ADD_EXPR TERM FACTOR INTEGER FLOAT CALL %token <node> DIV
%type <node> PARAM PARAMS PARAM_LIST ARGS ARG_LIST %token <node> LT
/* These are for flex to return %token <node> LTE
NOTE: Though combining _LE _LT _BT _BE _EQ _NEQ to _RELOP makes the program simpler, %token <node> GT
it may not satisfy the subsequent requirements. %token <node> GTE
*/ %token <node> EQ
%token <node> _IF _ELSE _WHILE _RETURN _INT _FLOAT _VOID %token <node> NEQ
%token <node> _ASSIGN _RELOP _ADD_OP _MUL_OP %token <node> ASSIN
%token <node> _L_SQUARE _R_SQUARE _L_PARE _R_PARE _L_BRACKET _R_BRACKET %token <node> SEMICOLON
%token <node> _SEMI _COMMA _ID _INTEGER _FLOATPOINT %token <node> COMMA
%token <node> LPARENTHESE
%type <node> program %token <node> RPARENTHESE
%token <node> LBRACKET
%token <node> RBRACKET
%token <node> LBRACE
%token <node> RBRACE
%token <node> ELSE
%token <node> IF
%token <node> INT
%token <node> RETURN
%token <node> VOID
%token <node> WHILE
%token <node> IDENTIFIER
%token <node> INTEGER
%token <node> FLOAT
%token <node> FLOATPOINT
//%token <node> EOL
//%token <node> BLANK
//%token <node> COMMENT
%type <node> program declaration-list declaration var-declaration type-specifier fun-declaration params param-list param compound-stmt local-declarations statement-list statement expression-stmt selection-stmt iteration-stmt return-stmt expression var simple-expression relop additive-expression addop term mulop factor integer float call args arg-list
/* compulsory starting symbol */
%start program %start program
/* TODO: Your rules here. */
%% %%
/* TODO: Your rules here. */
program: DEC_LIST {$$ = node("program", 1, $1); gt->root = $$;} program : declaration-list {$$ = node( "program", 1, $1); gt->root = $$;}
; ;
DEC_LIST: DEC_LIST DEC {$$ = node("declaration-list", 2, $1, $2); } declaration-list : declaration-list declaration {$$ = node( "declaration-list", 2, $1, $2);}
| DEC {$$ = node("declaration-list", 1, $1);} | declaration {$$ = node( "declaration-list", 1, $1);}
; ;
DEC: VAR_DEC {$$ = node("declaration", 1, $1); } declaration : var-declaration {$$ = node( "declaration", 1, $1);}
| FUN_DEC {$$ = node("declaration", 1, $1); } | fun-declaration {$$ = node( "declaration", 1, $1);}
; ;
VAR_DEC: TYPE_SPEC _ID _SEMI {$$ = node("var-declaration", 3, $1, $2, $3); } var-declaration : type-specifier IDENTIFIER SEMICOLON {$$ = node( "var-declaration", 3, $1, $2, $3);}
| TYPE_SPEC _ID _L_SQUARE _INTEGER _R_SQUARE _SEMI {$$ = node("var-declaration", 6, $1, $2, $3, $4, $5, $6); } | type-specifier IDENTIFIER LBRACKET INTEGER RBRACKET SEMICOLON {$$ = node( "var-declaration", 6, $1, $2, $3, $4, $5, $6);}
; ;
TYPE_SPEC: _INT {$$ = node("type-specifier", 1, $1); } type-specifier : INT {$$ = node( "type-specifier", 1, $1);}
| _FLOAT {$$ = node("type-specifier", 1, $1); } | FLOAT { $$ = node( "type-specifier", 1, $1); }
| _VOID {$$ = node("type-specifier", 1, $1); } | VOID {$$ = node( "type-specifier", 1, $1);}
; ;
FUN_DEC: TYPE_SPEC _ID _L_PARE PARAMS _R_PARE COM_STMT {$$ = node("fun-declaration", 6, $1, $2, $3, $4, $5, $6); } fun-declaration : type-specifier IDENTIFIER LPARENTHESE params RPARENTHESE compound-stmt {$$ = node( "fun-declaration", 6, $1, $2, $3, $4, $5, $6);}
; ;
PARAMS: PARAM_LIST {$$ = node("params", 1, $1); } params : param-list {$$ = node( "params", 1, $1);}
| _VOID {$$ = node("params", 1, $1); } | VOID {$$ = node( "params", 1, $1);}
; ;
PARAM_LIST: PARAM_LIST _COMMA PARAM {$$ = node("param-list", 3, $1, $2, $3); } param-list : param-list COMMA param {$$ = node( "param-list", 3, $1, $2, $3);}
| PARAM {$$ = node("param-list", 1, $1); } | param {$$ = node( "param-list", 1, $1);}
; ;
param : type-specifier IDENTIFIER {$$ = node( "param", 2, $1, $2);}
| type-specifier IDENTIFIER LBRACKET RBRACKET {$$ = node( "param", 4, $1, $2, $3, $4);}
;
PARAM: TYPE_SPEC _ID {$$ = node("param", 2, $1, $2); } compound-stmt : LBRACE local-declarations statement-list RBRACE {$$ = node( "compound-stmt", 4, $1, $2, $3, $4);}
| TYPE_SPEC _ID _L_SQUARE _R_SQUARE {$$ = node("param", 4, $1, $2, $3, $4);} ;
;
COM_STMT: _L_BRACKET LOCAL_DEC STMT_LIST _R_BRACKET {$$ = node("compound-stmt", 4, $1, $2, $3, $4);} local-declarations : local-declarations var-declaration {$$ = node( "local-declarations", 2, $1, $2);}
; | {$$ = node( "local-declarations",0);}
;
LOCAL_DEC: LOCAL_DEC VAR_DEC {$$ = node("local-declarations", 2, $1, $2);} statement-list : statement-list statement {$$ = node( "statement-list", 2, $1, $2);}
| {$$ = node("local-declarations", 0);} | {$$ = node( "statement-list",0);}
; ;
STMT_LIST: STMT_LIST STMT {$$ = node("statement-list", 2, $1, $2);} statement : expression-stmt {$$ = node( "statement", 1, $1);}
| {$$ = node("statement-list", 0);} | compound-stmt {$$ = node( "statement", 1, $1);}
; | selection-stmt {$$ = node( "statement", 1, $1);}
| iteration-stmt {$$ = node( "statement", 1, $1);}
| return-stmt {$$ = node( "statement", 1, $1);}
;
STMT: EXPR_STMT {$$ = node("statement", 1, $1);} expression-stmt : expression SEMICOLON {$$ = node( "expression-stmt", 2, $1, $2);}
| COM_STMT {$$ = node("statement", 1, $1);} | SEMICOLON {$$ = node( "expression-stmt", 1, $1);}
| SELC_STMT {$$ = node("statement", 1, $1);} ;
| ITER_STMT {$$ = node("statement", 1, $1);}
| RET_STMT {$$ = node("statement", 1, $1);}
;
EXPR_STMT: EXPR _SEMI {$$ = node("expression-stmt", 2, $1, $2);} selection-stmt : IF LPARENTHESE expression RPARENTHESE statement {$$ = node( "selection-stmt", 5, $1, $2, $3, $4, $5);}
| _SEMI {$$ = node("expression-stmt", 1, $1);} | IF LPARENTHESE expression RPARENTHESE statement ELSE statement {$$ = node( "selection-stmt", 7, $1, $2, $3, $4, $5, $6, $7);}
; ;
SELC_STMT: _IF _L_PARE EXPR _R_PARE STMT {$$ = node("selection-stmt", 5, $1, $2, $3, $4, $5);} iteration-stmt : WHILE LPARENTHESE expression RPARENTHESE statement {$$ = node( "iteration-stmt", 5, $1, $2, $3, $4, $5);}
| _IF _L_PARE EXPR _R_PARE STMT _ELSE STMT {$$ = node("selection-stmt", 7, $1, $2, $3, $4, $5, $6, $7);} ;
;
ITER_STMT: _WHILE _L_PARE EXPR _R_PARE STMT {$$ = node("iteration-stmt", 5, $1, $2, $3, $4, $5);} return-stmt : RETURN SEMICOLON {$$ = node( "return-stmt", 1, $1);}
; | RETURN expression SEMICOLON {$$ = node( "return-stmt", 3, $1, $2, $3);}
;
RET_STMT: _RETURN _SEMI {$$ = node("return-stmt", 2, $1, $2);} expression : var ASSIN expression {$$ = node( "expression", 3, $1, $2, $3);}
| _RETURN EXPR _SEMI {$$ = node("return-stmt", 3, $1, $2, $3);} | simple-expression {$$ = node( "expression", 1, $1);}
; ;
EXPR: VAR _ASSIGN EXPR {$$ = node("expression", 3, $1, $2, $3);} var : IDENTIFIER {$$ = node( "var", 1, $1);}
| SIMPLE_EXPR {$$ = node("expression", 1, $1);} | IDENTIFIER LBRACKET expression RBRACKET {$$ = node( "var", 4, $1, $2, $3, $4);}
; ;
VAR: _ID {$$ = node("var", 1, $1);} simple-expression : additive-expression relop additive-expression {$$ = node( "simple-expression", 3, $1, $2, $3);}
| _ID _L_SQUARE EXPR _R_SQUARE {$$ = node("var", 4, $1, $2, $3, $4);} | additive-expression {$$ = node( "simple-expression", 1, $1);}
; ;
SIMPLE_EXPR: ADD_EXPR RELOP ADD_EXPR {$$ = node("simple-expression", 3, $1, $2, $3);}
| ADD_EXPR {$$ = node("simple-expression", 1, $1);}
;
RELOP: _RELOP {$$ = node("relop", 1, $1);}
;
/*
RELOP: _LE {$$ = node("relop", 1, $1);}
| _LT {$$ = node("relop", 1, $1);}
| _GT {$$ = node("relop", 1, $1);}
| _GE {$$ = node("relop", 1, $1);}
| _EQ {$$ = node("relop", 1, $1);}
| _NEQ {$$ = node("relop", 1, $1);}
;
*/
ADD_EXPR: ADD_EXPR ADDOP TERM {$$ = node("additive-expression", 3, $1, $2, $3);}
| TERM {$$ = node("additive-expression", 1, $1);}
;
ADDOP: _ADD_OP {$$ = node("addop", 1, $1);}
;
TERM: TERM MULOP FACTOR {$$ = node("term", 3, $1, $2, $3);}
| FACTOR {$$ = node("term", 1, $1);}
;
MULOP: _MUL_OP {$$ = node("mulop", 1, $1);} relop : LT {$$ = node( "relop", 1, $1);}
; | LTE {$$ = node( "relop", 1, $1);}
| GT {$$ = node( "relop", 1, $1);}
| GTE {$$ = node( "relop", 1, $1);}
| EQ {$$ = node( "relop", 1, $1);}
| NEQ {$$ = node( "relop", 1, $1);}
;
FACTOR: _L_PARE EXPR _R_PARE {$$ = node("factor", 3, $1, $2, $3);} additive-expression : additive-expression addop term {$$ = node( "additive-expression", 3, $1, $2, $3);}
| VAR {$$ = node("factor", 1, $1);} | term {$$ = node( "additive-expression", 1, $1);}
| CALL {$$ = node("factor", 1, $1);} ;
| INTEGER {$$ = node("factor", 1, $1);}
| FLOAT {$$ = node("factor", 1, $1);}
;
INTEGER: _INTEGER {$$ = node("integer", 1, $1);} addop : ADD {$$ = node( "addop", 1, $1);}
; | SUB {$$ = node( "addop", 1, $1);}
;
FLOAT: _FLOATPOINT {$$ = node("float", 1, $1);} term : term mulop factor {$$ = node( "term", 3, $1, $2, $3);}
; | factor {$$ = node( "term", 1, $1);}
;
CALL: _ID _L_PARE ARGS _R_PARE {$$ = node("call", 4, $1, $2, $3, $4);} mulop : MUL {$$ = node( "mulop", 1, $1);}
; | DIV {$$ = node( "mulop", 1, $1);}
;
ARGS: ARG_LIST {$$ = node("args", 1, $1);} factor : LPARENTHESE expression RPARENTHESE {$$ = node( "factor", 3, $1, $2, $3);}
| {$$ = node("args", 0);} | var {$$ = node( "factor", 1, $1);}
; | call {$$ = node( "factor", 1, $1);}
| integer {$$ = node( "factor", 1, $1);}
| float {$$ = node( "factor", 1, $1);}
;
integer : INTEGER {$$ = node( "integer", 1, $1);}
;
float : FLOATPOINT {$$ = node( "float", 1, $1);}
;
ARG_LIST: ARG_LIST _COMMA EXPR {$$ = node("arg-list", 3, $1, $2, $3);} call : IDENTIFIER LPARENTHESE args RPARENTHESE {$$ = node( "call", 4, $1, $2, $3, $4);}
| EXPR {$$ = node("arg-list", 1, $1);} ;
;
args : arg-list {$$ = node( "args", 1, $1);}
| {$$ = node( "args", 0);}
;
arg-list : arg-list COMMA expression {$$ = node( "arg-list", 3, $1, $2, $3);}
| expression {$$ = node( "arg-list", 1, $1);}
;
%% %%
...@@ -227,7 +229,7 @@ syntax_tree *parse(const char *input_path) ...@@ -227,7 +229,7 @@ syntax_tree *parse(const char *input_path)
yyin = stdin; yyin = stdin;
} }
lines = pos_start = pos_end = 1; lines = pos_start = pos_end = 1;
gt = new_syntax_tree(); gt = new_syntax_tree();
yyrestart(yyin); yyrestart(yyin);
yyparse(); yyparse();
...@@ -255,3 +257,4 @@ syntax_tree_node *node(const char *name, int children_num, ...) ...@@ -255,3 +257,4 @@ syntax_tree_node *node(const char *name, int children_num, ...)
} }
return p; return p;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment