From f3c7983749e44555700a176fab9965793c13f452 Mon Sep 17 00:00:00 2001 From: LiXiaoQi Date: Thu, 22 Sep 2022 09:29:48 +0800 Subject: [PATCH] Now it can pass a few tests. A clear bug is the regex for comment --- src/parser/lexical_analyzer.l | 64 +++++++++++++++++++++-------------- src/parser/syntax_analyzer.y | 2 +- tests/parser/easy/expr.cminus | 2 +- 3 files changed, 41 insertions(+), 27 deletions(-) diff --git a/src/parser/lexical_analyzer.l b/src/parser/lexical_analyzer.l index 1bb15aa..e6c1728 100644 --- a/src/parser/lexical_analyzer.l +++ b/src/parser/lexical_analyzer.l @@ -7,12 +7,24 @@ #include "syntax_tree.h" #include "syntax_analyzer.h" -int lines; +int lines = 1; int pos_start; int pos_end; void pass_node(char *text){ - yylval.value = new_syntax_tree_node(text); + yylval.node = new_syntax_tree_node(text); +} +void comment_helper(char *comment, unsigned int len) +{ + pos_end += 2; + for (int i = 2; i < len-2; i++){ + if (comment[i] == '\n' || comment[i] == '\r'){ + lines++; + pos_end = 0; + } else + pos_end++; + } + pos_end += 2; } /*****************声明和选项设置 end*****************/ @@ -23,8 +35,11 @@ letter [a-zA-Z] digit [0-9] ID {letter}+ INTEGER {digit}+ -FLOAT {digit}+\. | {digit}*\.{digit}+ +FLOAT {digit}+\.|{digit}*\.{digit}+ +COMMENT "/*".*"*/" +NEWLINE \r\n|\r|\n +WHITESPACE [ \t] /* %token _IF _ELSE _WHILE _RETURN _INT _FLOAT _VOID @@ -35,31 +50,30 @@ FLOAT {digit}+\. | {digit}*\.{digit}+ */ %% -if {pos_start = pos_end; pos_end += 2; pass_node("if"); return _IF;} -else {pos_start = pos_end; pos_end += 4; pass_node("else"); return _ELSE;} -while {pos_start = pos_end; pos_end += 5; pass_node("while"); return _WHILE;} -return {pos_start = pos_end; pos_end += 6; pass_node("return"); return _RETURN;} -int {pos_start = pos_end; pos_end += 3; pass_node("int"); return _INT;} -float {pos_start = pos_end; pos_end += 5; pass_node("float"); return _FLOAT;} -void {pos_start = pos_end; pos_end += 4; pass_node("void"); return _VOID;} +{COMMENT} { comment_helper(yytext, yyleng); } +if { pos_start = pos_end; pos_end += 2; pass_node("if"); return _IF;} +else { pos_start = pos_end; pos_end += 4; pass_node("else"); return _ELSE;} +while { pos_start = pos_end; pos_end += 5; pass_node("while"); return _WHILE;} +return { pos_start = pos_end; pos_end += 6; pass_node("return"); return _RETURN;} +int { pos_start = pos_end; pos_end += 3; pass_node("int"); return _INT;} +float { pos_start = pos_end; pos_end += 5; pass_node("float"); return _FLOAT;} +void { pos_start = pos_end; pos_end += 4; pass_node("void"); return _VOID;} -{ID} {pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _ID;} -{INTEGER} {pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _INTEGER;} -{FLOAT} {pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _FLOATPOINT;} +{ID} { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _ID;} +{INTEGER} { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _INTEGER;} +{FLOAT} { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _FLOATPOINT;} -\= {pos_start = pos_end; pos_end += 1; pass_node("="); return _ASSIGN;} -"<=" | ">=" | "<" | ">" | "==" | "!=" {pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _RELOP;} -"+" | "-" {pos_start = pos_end; pos_end += 1; pass_node(yytext); return _ADD_OP;} -"*" | "/" {pos_start = pos_end; pos_end += 1; pass_node(yytext); return _MUL_OP;} +"=" { pos_start = pos_end; pos_end += 1; pass_node("="); return _ASSIGN;} +"<="|">="|"<"|">"|"=="|"!=" { pos_start = pos_end; pos_end += yyleng; pass_node(yytext); return _RELOP;} +"+"|"-" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return _ADD_OP;} +"*"|"/" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return _MUL_OP;} -\[ | \] {pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == '[' ? _L_SQUARE : _R_SQUARE;} -\( | \) {pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == '(' ? _L_PARE : _R_PARE;} -\{ | \} {pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == '{' ? _L_BRACKET : _R_BRACKET;} +"["|"]" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == '[' ? _L_SQUARE : _R_SQUARE;} +"("|")" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == '(' ? _L_PARE : _R_PARE;} +"{"|"}" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == '{' ? _L_BRACKET : _R_BRACKET;} -"," | ";" {pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == ',' ? _COMMA : _SEMI;} +","|";" { pos_start = pos_end; pos_end += 1; pass_node(yytext); return yytext[0] == ',' ? _COMMA : _SEMI;} -" " | \t { pos_end++; } -\r\n | \n | \r { lines++; pos_end = 0;} -/* . { pos_start = pos_end; pos_end++; return ERROR; } */ -/****请在此补全所有flex的模式与动作 end******/ +{WHITESPACE} { pos_end++; } +{NEWLINE} { lines++; pos_end = 0;} %% diff --git a/src/parser/syntax_analyzer.y b/src/parser/syntax_analyzer.y index 1beaaf5..1ae75be 100644 --- a/src/parser/syntax_analyzer.y +++ b/src/parser/syntax_analyzer.y @@ -32,7 +32,7 @@ syntax_tree_node *node(const char *node_name, int children_num, ...); Hint: See pass_node(), node(), and syntax_tree.h. Use forward declaring. */ %union { - node value; + struct _syntax_tree_node *node; } /* TODO: Your tokens here. */ diff --git a/tests/parser/easy/expr.cminus b/tests/parser/easy/expr.cminus index 787f1ca..2c99f4e 100644 --- a/tests/parser/easy/expr.cminus +++ b/tests/parser/easy/expr.cminus @@ -13,4 +13,4 @@ int main(void){ x = a/x; return 0; -} \ No newline at end of file +} -- GitLab