From 1631b1c3fc7d8e257359054b1f667076e4810e49 Mon Sep 17 00:00:00 2001 From: lxq <877250099@qq.com> Date: Sat, 4 Feb 2023 12:52:01 +0800 Subject: [PATCH] ready to start register allocation --- Reports/5-bonus/report.md | 112 ++++++++++++++++++++++++++++++++++ include/codegen/codegen.hpp | 2 +- include/codegen/liverange.hpp | 46 ++++++++++---- src/cminusfc/cminusfc.cpp | 4 +- src/codegen/liverange.cpp | 84 +++++++++++++++---------- tests/5-bonus/test_time.py | 2 +- 6 files changed, 203 insertions(+), 47 deletions(-) create mode 100644 Reports/5-bonus/report.md diff --git a/Reports/5-bonus/report.md b/Reports/5-bonus/report.md new file mode 100644 index 0000000..8da5b2e --- /dev/null +++ b/Reports/5-bonus/report.md @@ -0,0 +1,112 @@ +# Lab5 报告 + +## 实验任务 + +基于前4个实验,完成对LightIR的翻译,目标架构为龙芯LA64架构。 + +## 实验流程 + +1. 使用栈式内存分配,优先追求功能性 + + 这一步主要完成了指令选择,所有变量(local or global)均在栈中储存,参数通过栈传递。常量保存在只读区(模拟gcc)。 + + 这里对于phi指令的处理是:将phi指令还原为前驱块的`copy-statement`,需要将其插入在基本块的最后一条指令(跳转指令)之前。 + + 这一步可以完成所有测试样例,但是生成的代码效率较差。 + +2. 活跃变量分析 + + 先确定指令的遍历顺序,这里使用常规的BFS遍历,phi指令的处理和上述相同,例如对于`5-while.ll`: + + ```llvm + define i32 @main() { + label_entry: + br label %label0 + label0: ; preds = %label_entry, %label5 + %op1 = phi i32 [ 0, %label_entry ], [ %op6, %label5 ] + %op2 = icmp slt i32 %op1, 10 + %op3 = zext i1 %op2 to i32 + %op4 = icmp ne i32 %op3, 0 + br i1 %op4, label %label5, label %label7 + label5: ; preds = %label0 + call void @output(i32 %op1) + %op6 = add i32 %op1, 1 + br label %label0 + label7: ; preds = %label0 + ret i32 0 + } + ``` + + 指令遍历顺序如下,第1条与第9条指令就是phi指令的还原。 + + ```llvm + 1. op1 = 0 + 2. br label %label0 + 3. %op2 = icmp slt i32 %op1, 10 + 4. %op3 = zext i1 %op2 to i32 + 5. %op4 = icmp ne i32 %op3, 0 + 6. br i1 %op4, label %label5, label %label7 + 7. call void @output(i32 %op1) + 8. %op6 = add i32 %op1, 1 + 9. op1 = op6 + 10. br label %label0 + 11. ret i32 0 + ``` + + 用编号代替指令,获得每个程序点的IN和OUT: + + ```llvm + 1. op1 = 0 + in-set: [ ] + out-set: [ op1 ] + 2. br label %label0 + in-set: [ op1 ] + out-set: [ op1 ] + 3. %op2 = icmp slt i32 %op1, 10 + in-set: [ op1 ] + out-set: [ op2 op1 ] + 4. %op3 = zext i1 %op2 to i32 + in-set: [ op2 op1 ] + out-set: [ op3 op1 ] + 5. %op4 = icmp ne i32 %op3, 0 + in-set: [ op3 op1 ] + out-set: [ op4 op1 ] + 6. br i1 %op4, label %label5, label %label7 + in-set: [ op4 op1 ] + out-set: [ op1 ] + 7. call void @output(i32 %op1) + in-set: [ op1 ] + out-set: [ op1 ] + 8. %op6 = add i32 %op1, 1 + in-set: [ op1 ] + out-set: [ op6 ] + 9. op1 = op6 + in-set: [ op6 ] + out-set: [ op1 ] + 10. br label %label0 + in-set: [ op1 ] + out-set: [ op1 ] + 11. ret i32 0 + in-set: [ ] + out-set: [ ] + ``` + + 获得活跃区间:编号为i的指令,涉及两个端点:i-1和i,分别对应IN和OUT。由此得到各个变量的活跃区间是: + + ```llvm + op1: <1, 10> + op2: <3, 3> + op3: <4, 4> + op4: <5, 5> + op6: <8, 8> + ``` + +3. 寄存器分配 + + 使用线性扫描算法实现寄存器分配,参考: + + - http://web.cs.ucla.edu/~palsberg/course/cs132/linearscan.pdf + + - [Documentations/5-bonus/寄存器分配.md · master · compiler_staff / 2022fall-Compiler_CMinus · GitLab](https://cscourse.ustc.edu.cn/vdir/Gitlab/compiler_staff/2022fall-compiler_cminus/-/blob/master/Documentations/5-bonus/%E5%AF%84%E5%AD%98%E5%99%A8%E5%88%86%E9%85%8D.md#poletto) + + diff --git a/include/codegen/codegen.hpp b/include/codegen/codegen.hpp index 3b3b66c..baeb370 100644 --- a/include/codegen/codegen.hpp +++ b/include/codegen/codegen.hpp @@ -156,7 +156,7 @@ class CodeGen { vector output; // register allocation - LiveRangeAnalyzer LRA; + LRA::LiveRangeAnalyzer LRA; }; #endif diff --git a/include/codegen/liverange.hpp b/include/codegen/liverange.hpp index f16010d..22749fe 100644 --- a/include/codegen/liverange.hpp +++ b/include/codegen/liverange.hpp @@ -13,52 +13,75 @@ #include #include #include +#include +#include using std::map; using std::pair; using std::set; using std::string; +using std::to_string; using std::vector; #define __LRA_PRINT__ +namespace LRA { + +struct Interval { + Interval(int a = 0, int b = 0) : i(a), j(b) {} + int i; // 0 means uninitialized + int j; +}; + +using LiveSet = set; +using PhiMap = map>>; +using LiveInterval = pair; + +struct LiveIntervalCMP { + bool operator()(LiveInterval const &lhs, LiveInterval const &rhs) const { + if (lhs.first.i != rhs.first.i) + return lhs.first.i < rhs.first.i; + else + return lhs.second < rhs.second; + } +}; + class LiveRangeAnalyzer { + public: friend class CodeGen; - using LiveSet = set; - using PhiMap = map>>; - public: - struct Interval { - Interval(int a, int b) : i(a), j(b) {} - Interval() = delete; - int i, j; - }; LiveRangeAnalyzer(Module *m_, PhiMap &phi_map_) : m(m_), phi_map(phi_map_) {} LiveRangeAnalyzer() = delete; void run(); + void run(Function *); void clear(); void print(Function *func, bool printSet = true); - string print_liveSet(LiveSet &ls) { + string print_liveSet(const LiveSet &ls) { string s = "[ "; for (auto k : ls) s += k->get_name() + " "; s += "]"; return s; } + string print_interval(Interval &i) { + return "<" + to_string(i.i) + ", " + to_string(i.j) + ">"; + } private: Module *m; // Function *func; - map liverange; + int ir_cnt; map IN, OUT; map instr_id; map, int> cpstmt_id; const PhiMap &phi_map; + set liveIntervals; void make_id(Function *); - void run(Function *); + void make_interval(Function *); + LiveSet joinFor(BasicBlock *bb); void union_ip(LiveSet &dest, LiveSet &src) { LiveSet res; @@ -73,4 +96,5 @@ class LiveRangeAnalyzer { // Return: the in-set(will not set IN-map) LiveSet transferFunction(Instruction *); }; +} // namespace LRA #endif diff --git a/src/cminusfc/cminusfc.cpp b/src/cminusfc/cminusfc.cpp index 2a144f0..cda9a19 100644 --- a/src/cminusfc/cminusfc.cpp +++ b/src/cminusfc/cminusfc.cpp @@ -34,7 +34,7 @@ main(int argc, char **argv) { bool gvn = false; bool dump_json = false; bool emit = false; - bool assembly = false; + bool assembly = true; for (int i = 1; i < argc; ++i) { if (argv[i] == "-h"s || argv[i] == "--help"s) { @@ -128,7 +128,7 @@ main(int argc, char **argv) { std::ofstream target_file(target_path + ".s"); target_file << codegen.print(); target_file.close(); - return 0; + // return 0; } std::ofstream output_stream; diff --git a/src/codegen/liverange.cpp b/src/codegen/liverange.cpp index 797b5a1..9ddab4e 100644 --- a/src/codegen/liverange.cpp +++ b/src/codegen/liverange.cpp @@ -6,15 +6,21 @@ #include #include +using std::cout; +using std::endl; +using namespace LRA; + void LiveRangeAnalyzer::clear() { + ir_cnt = 0; IN.clear(); OUT.clear(); - liverange.clear(); instr_id.clear(); + cpstmt_id.clear(); + liveIntervals.clear(); } -LiveRangeAnalyzer::LiveSet +LiveSet LiveRangeAnalyzer::joinFor(BasicBlock *bb) { LiveSet out; for (auto succ : bb->get_succ_basic_blocks()) { @@ -24,9 +30,9 @@ LiveRangeAnalyzer::joinFor(BasicBlock *bb) { ++it; assert(it != irs.end() && "need to find first_ir from copy-stmt"); union_ip(out, IN[instr_id.at(&(*it))]); - // std::cout << "# " + it->print() << std::endl; + // cout << "# " + it->print() << endl; } - // std::cout << "\tget out: " << print_liveSet(out) << std::endl; + // cout << "\tget out: " << print_liveSet(out) << endl; return out; } @@ -35,7 +41,6 @@ LiveRangeAnalyzer::make_id(Function *func) { // instruction numbering // this is also the structure of the IR logically: // ignore phi, add copy-statement - int ir_cnt = 0; for (auto &bb : func->get_basic_blocks()) { for (auto &instr : bb.get_instructions()) { if (instr.is_phi()) @@ -125,12 +130,37 @@ LiveRangeAnalyzer::run(Function *func) { } } } + make_interval(func); + #ifdef __LRA_PRINT__ print(func); #endif } -LiveRangeAnalyzer::LiveSet +void +LiveRangeAnalyzer::make_interval(Function *) { + map liverange; + for (int time = 1; time <= ir_cnt; ++time) { + for (auto op : IN.at(time)) { + auto &interval = liverange[op]; + if (interval.i == 0) // uninitialized + interval.i = time - 1; + else + interval.j = time - 1; + } + for (auto op : OUT.at(time)) { + auto &interval = liverange[op]; + if (interval.i == 0) // uninitialized + interval.i = time; + else + interval.j = time; + } + } + for (auto [op, interval] : liverange) + liveIntervals.insert({interval, op}); +} + +LiveSet LiveRangeAnalyzer::transferFunction(Instruction *instr) { LiveSet in, out = OUT[instr_id.at(instr)]; LiveSet use; @@ -165,6 +195,7 @@ LiveRangeAnalyzer::transferFunction(Instruction *instr) { void LiveRangeAnalyzer::print(Function *func, bool printSet) { // for debug + cout << "Function " << func->get_name() << endl; for (auto &bb : func->get_basic_blocks()) { for (auto &instr : bb.get_instructions()) { if (instr.is_phi()) // ignore phi @@ -175,44 +206,33 @@ LiveRangeAnalyzer::print(Function *func, bool printSet) { // for debug for (auto pr : phi_map.find(&bb)->second) { auto [lv, rv] = pr; auto idx = cpstmt_id.at(pr); - std::cout - << cpstmt_id[pr] << ". " << lv->get_name() << " = " - << (rv->get_name() == "" ? rv->print() : rv->get_name()) - << std::endl; + cout << cpstmt_id[pr] << ". " << lv->get_name() << " = " + << (rv->get_name() == "" ? rv->print() + : rv->get_name()) + << endl; if (not printSet) continue; auto &in = IN.at(idx); auto &out = OUT.at(idx); - std::cout << "\tin-set: " + print_liveSet(in) << "\n"; - std::cout << "\tout-set: " + print_liveSet(out) << "\n"; + cout << "\tin-set: " + print_liveSet(in) << "\n"; + cout << "\tout-set: " + print_liveSet(out) << "\n"; } } // normal ir - std::cout << instr_id[&instr] << ". " << instr.print() << " # " - << &instr << std::endl; + cout << instr_id[&instr] << ". " << instr.print() << " # " << &instr + << endl; if (not printSet) continue; auto idx = instr_id.at(&instr); auto &in = IN.at(idx); auto &out = OUT.at(idx); - std::cout << "\tin-set: " + print_liveSet(in) << "\n"; - std::cout << "\tout-set: " + print_liveSet(out) << "\n"; + cout << "\tin-set: " + print_liveSet(in) << "\n"; + cout << "\tout-set: " + print_liveSet(out) << "\n"; } - /* if (phi_map.find(&bb) != phi_map.end()) { - * for (auto pr : phi_map.find(&bb)->second) { - * auto [lv, rv] = pr; - * auto idx = cpstmt_id.at(pr); - * std::cout << cpstmt_id[pr] << ". " << lv->get_name() << " = " - * << (rv->get_name() == "" ? rv->print() - * : rv->get_name()) - * << std::endl; - * if (not printSet) - * continue; - * auto &in = IN.at(idx); - * auto &out = OUT.at(idx); - * std::cout << "\tin-set: " + print_liveSet(in) << "\n"; - * std::cout << "\tout-set: " + print_liveSet(out) << "\n"; - * } - * } */ + } + + if (printSet) { + for (auto [interval, op] : liveIntervals) + cout << op->get_name() << ": " << print_interval(interval) << endl; } } diff --git a/tests/5-bonus/test_time.py b/tests/5-bonus/test_time.py index de3cdd4..3bd4a02 100644 --- a/tests/5-bonus/test_time.py +++ b/tests/5-bonus/test_time.py @@ -68,7 +68,7 @@ def eval(console=False, test_dir=testfile_dir, use_clang=False): if not use_clang: try: # ===可修改=== - compile_res = subprocess.run([cminus, filepath, '-S', 'a.s'], + compile_res = subprocess.run([cminus, '-mem2reg', filepath, '-S', 'a.s'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=300) -- GitLab