From 066f2d5dc24e5a3d7930a21ed001b642477ab552 Mon Sep 17 00:00:00 2001 From: LiXiaoQi Date: Wed, 14 Dec 2022 22:41:20 +0800 Subject: [PATCH] change parser --- src/parser/.gitignore | 2 + src/parser/lexical_analyzer.l | 97 +++++------- src/parser/syntax_analyzer.y | 279 +++++++++++++++++----------------- 3 files changed, 184 insertions(+), 194 deletions(-) create mode 100644 src/parser/.gitignore diff --git a/src/parser/.gitignore b/src/parser/.gitignore new file mode 100644 index 0000000..02f2cac --- /dev/null +++ b/src/parser/.gitignore @@ -0,0 +1,2 @@ +lexical_analyzer_stu.l +syntax_analyzer_stu.y diff --git a/src/parser/lexical_analyzer.l b/src/parser/lexical_analyzer.l index 52ab572..e638270 100644 --- a/src/parser/lexical_analyzer.l +++ b/src/parser/lexical_analyzer.l @@ -7,76 +7,61 @@ #include "syntax_tree.h" #include "syntax_analyzer.h" -/* #define __DEBUG_COMMENT__ */ - -int lines = 1; +int lines; int pos_start; int pos_end; void pass_node(char *text){ yylval.node = new_syntax_tree_node(text); } -void comment_helper(char *comment, unsigned int len) -{ -#ifdef __DEBUG_COMMENT__ - printf("Get COMMENT in line<%d>: \"%s\"\n", lines, comment); -#endif -} /*****************声明和选项设置 end*****************/ %} -/* use exclusive state */ %x COMMENT -letter [a-zA-Z] -digit [0-9] -ID {letter}+ -INTEGER {digit}+ -FLOAT {digit}+\.|{digit}*\.{digit}+ - -NEWLINE \r\n|\r|\n -WHITESPACE [ \t] -/* - -%token _IF _ELSE _WHILE _RETURN _INT _FLOAT _VOID -%token _ASSIGN _RELOP _ADD_OP _MUL_OP -%token _L_SQUARE _R_SQUARE _L_PARE _R_PARE _L_BRACKET _R_BRACKET -%token _SEMI _COMMA _ID _INTEGER _FLOATPOINT - -*/ -/* .* { pos_end += yyleng; comment_helper(yytext, yyleng); } */ - %% -"/*" { BEGIN(COMMENT); pos_end += 2; } -[^*\n]*|"*"+[^*/\n]* { pos_end += yyleng; comment_helper(yytext, yyleng); } -"*/" { BEGIN(0); pos_end += 2; } +\+ {pos_start = pos_end; pos_end += 1; pass_node(yytext); return ADD;} +\- {pos_start = pos_end; pos_end += 1; pass_node(yytext); return SUB;} +\* {pos_start = pos_end; pos_end += 1; pass_node(yytext); return MUL;} +\/ {pos_start = pos_end; pos_end += 1; pass_node(yytext); return DIV;} +\< {pos_start = pos_end; pos_end += 1; pass_node(yytext); return LT;} +\<= {pos_start = pos_end; pos_end += 2; pass_node(yytext); return LTE;} +\> {pos_start = pos_end; pos_end += 1; pass_node(yytext); return GT;} +\>= {pos_start = pos_end; pos_end += 2; pass_node(yytext); return GTE;} +== {pos_start = pos_end; pos_end += 2; pass_node(yytext); return EQ;} +!= {pos_start = pos_end; pos_end += 2; pass_node(yytext); return NEQ;} += {pos_start = pos_end; pos_end += 1; pass_node(yytext); return ASSIN;} +; {pos_start = pos_end; pos_end += 1; pass_node(yytext); return SEMICOLON;} +, {pos_start = pos_end; pos_end += 1; pass_node(yytext); return COMMA;} +\( {pos_start = pos_end; pos_end += 1; pass_node(yytext); return LPARENTHESE;} +\) {pos_start = pos_end; pos_end += 1; pass_node(yytext); return RPARENTHESE;} +\[ {pos_start = pos_end; pos_end += 1; pass_node(yytext); return LBRACKET;} +\] {pos_start = pos_end; pos_end += 1; pass_node(yytext); return RBRACKET;} +\{ {pos_start = pos_end; pos_end += 1; pass_node(yytext); return LBRACE;} +\} {pos_start = pos_end; pos_end += 1; pass_node(yytext); return RBRACE;} +else {pos_start = pos_end; pos_end += 4; pass_node(yytext); return ELSE;} +if {pos_start = pos_end; pos_end += 2; pass_node(yytext); return IF;} +int {pos_start = pos_end; pos_end += 3; pass_node(yytext); return INT;} +float {pos_start = pos_end; pos_end += 5; pass_node(yytext); return FLOAT;} +return {pos_start = pos_end; pos_end += 6; pass_node(yytext); return RETURN;} +void {pos_start = pos_end; pos_end += 4; pass_node(yytext); return VOID;} +while {pos_start = pos_end; pos_end += 5; pass_node(yytext); return WHILE;} +[a-zA-Z]+ {pos_start = pos_end; pos_end += strlen(yytext); pass_node(yytext); return IDENTIFIER;} +[0-9]+ {pos_start = pos_end; pos_end += strlen(yytext); pass_node(yytext); return INTEGER;} +[0-9]+\.[0-9]*|[0-9]*\.[0-9]+ { pos_start = pos_end; pos_end += strlen(yytext); pass_node(yytext); return FLOATPOINT; } + +\n {lines++; pos_start = 1; pos_end = 1;} +[ \t] {pos_start = pos_end; pos_end += 1;} + +"/*" { pos_start = pos_end; pos_end += 2; BEGIN(COMMENT); } +"*/" { pos_start = pos_end; pos_end += 2; BEGIN(INITIAL); } +. { pos_start = pos_end; pos_start += 1; } +\n { pos_start = 1; pos_end = 1; lines++; } + +. { pos_start = pos_end; pos_end++; return ERROR; } -if { pos_start = pos_end; pos_end += 2; pass_node("if"); return _IF;} -else { pos_start = pos_end; pos_end += 4; pass_node("else"); return _ELSE;} -while { pos_start = pos_end; pos_end += 5; pass_node("while"); return _WHILE;} -return { pos_start = pos_end; pos_end += 6; pass_node("return"); return _RETURN;} -int { pos_start = pos_end; pos_end += 3; pass_node("int"); return _INT;} -float { pos_start = pos_end; pos_end += 5; pass_node("float"); return _FLOAT;} -void { pos_start = pos_end; pos_end += 4; pass_node("void"); return _VOID;} - -{ID} { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _ID;} -{INTEGER} { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _INTEGER;} -{FLOAT} { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _FLOATPOINT;} - -"=" { pos_start = pos_end; pos_end += 1; pass_node("="); return _ASSIGN;} -"<="|">="|"<"|">"|"=="|"!=" { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _RELOP;} -"+"|"-" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return _ADD_OP;} -"*"|"/" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return _MUL_OP;} - -"["|"]" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == '[' ? _L_SQUARE : _R_SQUARE;} -"("|")" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == '(' ? _L_PARE : _R_PARE;} -"{"|"}" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == '{' ? _L_BRACKET : _R_BRACKET;} - -","|";" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == ',' ? _COMMA : _SEMI;} - -{WHITESPACE} { pos_end++; } -<*>{NEWLINE} { lines++; pos_end = 0;} %% + diff --git a/src/parser/syntax_analyzer.y b/src/parser/syntax_analyzer.y index 9842706..b94930c 100644 --- a/src/parser/syntax_analyzer.y +++ b/src/parser/syntax_analyzer.y @@ -14,7 +14,7 @@ extern FILE * yyin; // external variables from lexical_analyzer module extern int lines; -extern char *yytext; +extern char * yytext; extern int pos_end; extern int pos_start; @@ -28,179 +28,181 @@ void yyerror(const char *s); syntax_tree_node *node(const char *node_name, int children_num, ...); %} -/* TODO: Complete this definition. - Hint: See pass_node(), node(), and syntax_tree.h. - Use forward declaring. */ +/* TODO: Complete this definition. */ %union { - struct _syntax_tree_node *node; + struct _syntax_tree_node * node; + char * name; } /* TODO: Your tokens here. */ -/* -alias: -- SPEC: SPECIFIER -- DEC:DECLARATION -- COM: COMPOUND -- STMT: STATEMENT -- EXPR: EXPRESSION -- ITER: ITERATION -- SELC: SELCTION -- RET: RETURN -- Tokens starting with '_' is the terminator -*/ %token ERROR -%type TYPE_SPEC RELOP ADDOP MULOP -%type DEC_LIST DEC VAR_DEC FUN_DEC LOCAL_DEC -%type COM_STMT STMT_LIST STMT EXPR_STMT ITER_STMT SELC_STMT RET_STMT -%type EXPR SIMPLE_EXPR VAR ADD_EXPR TERM FACTOR INTEGER FLOAT CALL -%type PARAM PARAMS PARAM_LIST ARGS ARG_LIST -/* These are for flex to return -NOTE: Though combining _LE _LT _BT _BE _EQ _NEQ to _RELOP makes the program simpler, - it may not satisfy the subsequent requirements. -*/ -%token _IF _ELSE _WHILE _RETURN _INT _FLOAT _VOID -%token _ASSIGN _RELOP _ADD_OP _MUL_OP -%token _L_SQUARE _R_SQUARE _L_PARE _R_PARE _L_BRACKET _R_BRACKET -%token _SEMI _COMMA _ID _INTEGER _FLOATPOINT - -%type program - +%token ADD +%token SUB +%token MUL +%token DIV +%token LT +%token LTE +%token GT +%token GTE +%token EQ +%token NEQ +%token ASSIN +%token SEMICOLON +%token COMMA +%token LPARENTHESE +%token RPARENTHESE +%token LBRACKET +%token RBRACKET +%token LBRACE +%token RBRACE +%token ELSE +%token IF +%token INT +%token RETURN +%token VOID +%token WHILE +%token IDENTIFIER +%token INTEGER +%token FLOAT +%token FLOATPOINT +//%token EOL +//%token BLANK +//%token COMMENT +%type program declaration-list declaration var-declaration type-specifier fun-declaration params param-list param compound-stmt local-declarations statement-list statement expression-stmt selection-stmt iteration-stmt return-stmt expression var simple-expression relop additive-expression addop term mulop factor integer float call args arg-list + +/* compulsory starting symbol */ %start program -/* TODO: Your rules here. */ %% +/* TODO: Your rules here. */ -program: DEC_LIST {$$ = node("program", 1, $1); gt->root = $$;} - ; +program : declaration-list {$$ = node( "program", 1, $1); gt->root = $$;} + ; -DEC_LIST: DEC_LIST DEC {$$ = node("declaration-list", 2, $1, $2); } - | DEC {$$ = node("declaration-list", 1, $1);} - ; +declaration-list : declaration-list declaration {$$ = node( "declaration-list", 2, $1, $2);} + | declaration {$$ = node( "declaration-list", 1, $1);} + ; -DEC: VAR_DEC {$$ = node("declaration", 1, $1); } - | FUN_DEC {$$ = node("declaration", 1, $1); } - ; +declaration : var-declaration {$$ = node( "declaration", 1, $1);} + | fun-declaration {$$ = node( "declaration", 1, $1);} + ; -VAR_DEC: TYPE_SPEC _ID _SEMI {$$ = node("var-declaration", 3, $1, $2, $3); } - | TYPE_SPEC _ID _L_SQUARE _INTEGER _R_SQUARE _SEMI {$$ = node("var-declaration", 6, $1, $2, $3, $4, $5, $6); } - ; +var-declaration : type-specifier IDENTIFIER SEMICOLON {$$ = node( "var-declaration", 3, $1, $2, $3);} + | type-specifier IDENTIFIER LBRACKET INTEGER RBRACKET SEMICOLON {$$ = node( "var-declaration", 6, $1, $2, $3, $4, $5, $6);} + ; -TYPE_SPEC: _INT {$$ = node("type-specifier", 1, $1); } - | _FLOAT {$$ = node("type-specifier", 1, $1); } - | _VOID {$$ = node("type-specifier", 1, $1); } - ; +type-specifier : INT {$$ = node( "type-specifier", 1, $1);} + | FLOAT { $$ = node( "type-specifier", 1, $1); } + | VOID {$$ = node( "type-specifier", 1, $1);} + ; -FUN_DEC: TYPE_SPEC _ID _L_PARE PARAMS _R_PARE COM_STMT {$$ = node("fun-declaration", 6, $1, $2, $3, $4, $5, $6); } - ; +fun-declaration : type-specifier IDENTIFIER LPARENTHESE params RPARENTHESE compound-stmt {$$ = node( "fun-declaration", 6, $1, $2, $3, $4, $5, $6);} + ; -PARAMS: PARAM_LIST {$$ = node("params", 1, $1); } - | _VOID {$$ = node("params", 1, $1); } - ; +params : param-list {$$ = node( "params", 1, $1);} + | VOID {$$ = node( "params", 1, $1);} + ; -PARAM_LIST: PARAM_LIST _COMMA PARAM {$$ = node("param-list", 3, $1, $2, $3); } - | PARAM {$$ = node("param-list", 1, $1); } - ; +param-list : param-list COMMA param {$$ = node( "param-list", 3, $1, $2, $3);} + | param {$$ = node( "param-list", 1, $1);} + ; +param : type-specifier IDENTIFIER {$$ = node( "param", 2, $1, $2);} + | type-specifier IDENTIFIER LBRACKET RBRACKET {$$ = node( "param", 4, $1, $2, $3, $4);} + ; -PARAM: TYPE_SPEC _ID {$$ = node("param", 2, $1, $2); } - | TYPE_SPEC _ID _L_SQUARE _R_SQUARE {$$ = node("param", 4, $1, $2, $3, $4);} - ; +compound-stmt : LBRACE local-declarations statement-list RBRACE {$$ = node( "compound-stmt", 4, $1, $2, $3, $4);} + ; -COM_STMT: _L_BRACKET LOCAL_DEC STMT_LIST _R_BRACKET {$$ = node("compound-stmt", 4, $1, $2, $3, $4);} - ; +local-declarations : local-declarations var-declaration {$$ = node( "local-declarations", 2, $1, $2);} +| {$$ = node( "local-declarations",0);} + ; -LOCAL_DEC: LOCAL_DEC VAR_DEC {$$ = node("local-declarations", 2, $1, $2);} - | {$$ = node("local-declarations", 0);} - ; +statement-list : statement-list statement {$$ = node( "statement-list", 2, $1, $2);} +| {$$ = node( "statement-list",0);} + ; -STMT_LIST: STMT_LIST STMT {$$ = node("statement-list", 2, $1, $2);} - | {$$ = node("statement-list", 0);} - ; +statement : expression-stmt {$$ = node( "statement", 1, $1);} + | compound-stmt {$$ = node( "statement", 1, $1);} + | selection-stmt {$$ = node( "statement", 1, $1);} + | iteration-stmt {$$ = node( "statement", 1, $1);} + | return-stmt {$$ = node( "statement", 1, $1);} + ; -STMT: EXPR_STMT {$$ = node("statement", 1, $1);} - | COM_STMT {$$ = node("statement", 1, $1);} - | SELC_STMT {$$ = node("statement", 1, $1);} - | ITER_STMT {$$ = node("statement", 1, $1);} - | RET_STMT {$$ = node("statement", 1, $1);} - ; +expression-stmt : expression SEMICOLON {$$ = node( "expression-stmt", 2, $1, $2);} + | SEMICOLON {$$ = node( "expression-stmt", 1, $1);} + ; -EXPR_STMT: EXPR _SEMI {$$ = node("expression-stmt", 2, $1, $2);} - | _SEMI {$$ = node("expression-stmt", 1, $1);} - ; +selection-stmt : IF LPARENTHESE expression RPARENTHESE statement {$$ = node( "selection-stmt", 5, $1, $2, $3, $4, $5);} + | IF LPARENTHESE expression RPARENTHESE statement ELSE statement {$$ = node( "selection-stmt", 7, $1, $2, $3, $4, $5, $6, $7);} + ; -SELC_STMT: _IF _L_PARE EXPR _R_PARE STMT {$$ = node("selection-stmt", 5, $1, $2, $3, $4, $5);} - | _IF _L_PARE EXPR _R_PARE STMT _ELSE STMT {$$ = node("selection-stmt", 7, $1, $2, $3, $4, $5, $6, $7);} - ; +iteration-stmt : WHILE LPARENTHESE expression RPARENTHESE statement {$$ = node( "iteration-stmt", 5, $1, $2, $3, $4, $5);} + ; -ITER_STMT: _WHILE _L_PARE EXPR _R_PARE STMT {$$ = node("iteration-stmt", 5, $1, $2, $3, $4, $5);} - ; +return-stmt : RETURN SEMICOLON {$$ = node( "return-stmt", 1, $1);} + | RETURN expression SEMICOLON {$$ = node( "return-stmt", 3, $1, $2, $3);} + ; -RET_STMT: _RETURN _SEMI {$$ = node("return-stmt", 2, $1, $2);} - | _RETURN EXPR _SEMI {$$ = node("return-stmt", 3, $1, $2, $3);} - ; +expression : var ASSIN expression {$$ = node( "expression", 3, $1, $2, $3);} + | simple-expression {$$ = node( "expression", 1, $1);} + ; -EXPR: VAR _ASSIGN EXPR {$$ = node("expression", 3, $1, $2, $3);} - | SIMPLE_EXPR {$$ = node("expression", 1, $1);} +var : IDENTIFIER {$$ = node( "var", 1, $1);} + | IDENTIFIER LBRACKET expression RBRACKET {$$ = node( "var", 4, $1, $2, $3, $4);} ; -VAR: _ID {$$ = node("var", 1, $1);} - | _ID _L_SQUARE EXPR _R_SQUARE {$$ = node("var", 4, $1, $2, $3, $4);} - ; - -SIMPLE_EXPR: ADD_EXPR RELOP ADD_EXPR {$$ = node("simple-expression", 3, $1, $2, $3);} - | ADD_EXPR {$$ = node("simple-expression", 1, $1);} - ; - -RELOP: _RELOP {$$ = node("relop", 1, $1);} - ; -/* -RELOP: _LE {$$ = node("relop", 1, $1);} - | _LT {$$ = node("relop", 1, $1);} - | _GT {$$ = node("relop", 1, $1);} - | _GE {$$ = node("relop", 1, $1);} - | _EQ {$$ = node("relop", 1, $1);} - | _NEQ {$$ = node("relop", 1, $1);} - ; -*/ - -ADD_EXPR: ADD_EXPR ADDOP TERM {$$ = node("additive-expression", 3, $1, $2, $3);} - | TERM {$$ = node("additive-expression", 1, $1);} - ; - -ADDOP: _ADD_OP {$$ = node("addop", 1, $1);} - ; - -TERM: TERM MULOP FACTOR {$$ = node("term", 3, $1, $2, $3);} - | FACTOR {$$ = node("term", 1, $1);} - ; +simple-expression : additive-expression relop additive-expression {$$ = node( "simple-expression", 3, $1, $2, $3);} + | additive-expression {$$ = node( "simple-expression", 1, $1);} + ; -MULOP: _MUL_OP {$$ = node("mulop", 1, $1);} - ; +relop : LT {$$ = node( "relop", 1, $1);} + | LTE {$$ = node( "relop", 1, $1);} + | GT {$$ = node( "relop", 1, $1);} + | GTE {$$ = node( "relop", 1, $1);} + | EQ {$$ = node( "relop", 1, $1);} + | NEQ {$$ = node( "relop", 1, $1);} + ; -FACTOR: _L_PARE EXPR _R_PARE {$$ = node("factor", 3, $1, $2, $3);} - | VAR {$$ = node("factor", 1, $1);} - | CALL {$$ = node("factor", 1, $1);} - | INTEGER {$$ = node("factor", 1, $1);} - | FLOAT {$$ = node("factor", 1, $1);} - ; +additive-expression : additive-expression addop term {$$ = node( "additive-expression", 3, $1, $2, $3);} + | term {$$ = node( "additive-expression", 1, $1);} + ; -INTEGER: _INTEGER {$$ = node("integer", 1, $1);} - ; +addop : ADD {$$ = node( "addop", 1, $1);} + | SUB {$$ = node( "addop", 1, $1);} + ; -FLOAT: _FLOATPOINT {$$ = node("float", 1, $1);} - ; +term : term mulop factor {$$ = node( "term", 3, $1, $2, $3);} + | factor {$$ = node( "term", 1, $1);} + ; -CALL: _ID _L_PARE ARGS _R_PARE {$$ = node("call", 4, $1, $2, $3, $4);} - ; +mulop : MUL {$$ = node( "mulop", 1, $1);} + | DIV {$$ = node( "mulop", 1, $1);} + ; -ARGS: ARG_LIST {$$ = node("args", 1, $1);} - | {$$ = node("args", 0);} - ; +factor : LPARENTHESE expression RPARENTHESE {$$ = node( "factor", 3, $1, $2, $3);} + | var {$$ = node( "factor", 1, $1);} + | call {$$ = node( "factor", 1, $1);} + | integer {$$ = node( "factor", 1, $1);} + | float {$$ = node( "factor", 1, $1);} + ; + +integer : INTEGER {$$ = node( "integer", 1, $1);} + ; + +float : FLOATPOINT {$$ = node( "float", 1, $1);} + ; -ARG_LIST: ARG_LIST _COMMA EXPR {$$ = node("arg-list", 3, $1, $2, $3);} - | EXPR {$$ = node("arg-list", 1, $1);} - ; +call : IDENTIFIER LPARENTHESE args RPARENTHESE {$$ = node( "call", 4, $1, $2, $3, $4);} + ; +args : arg-list {$$ = node( "args", 1, $1);} +| {$$ = node( "args", 0);} + ; + +arg-list : arg-list COMMA expression {$$ = node( "arg-list", 3, $1, $2, $3);} + | expression {$$ = node( "arg-list", 1, $1);} + ; %% @@ -227,7 +229,7 @@ syntax_tree *parse(const char *input_path) yyin = stdin; } -lines = pos_start = pos_end = 1; + lines = pos_start = pos_end = 1; gt = new_syntax_tree(); yyrestart(yyin); yyparse(); @@ -255,3 +257,4 @@ syntax_tree_node *node(const char *name, int children_num, ...) } return p; } + -- GitLab