diff --git a/Reports/5-bonus/report.md b/Reports/5-bonus/report.md index 8da5b2e89686b54018c33cb2ae43c9e5caac0e07..3ff497b7416b12ac79bce2d4ca7d9256e3eeeb6d 100644 --- a/Reports/5-bonus/report.md +++ b/Reports/5-bonus/report.md @@ -6,107 +6,109 @@ ## 实验流程 -1. 使用栈式内存分配,优先追求功能性 - - 这一步主要完成了指令选择,所有变量(local or global)均在栈中储存,参数通过栈传递。常量保存在只读区(模拟gcc)。 - - 这里对于phi指令的处理是:将phi指令还原为前驱块的`copy-statement`,需要将其插入在基本块的最后一条指令(跳转指令)之前。 - - 这一步可以完成所有测试样例,但是生成的代码效率较差。 - -2. 活跃变量分析 - - 先确定指令的遍历顺序,这里使用常规的BFS遍历,phi指令的处理和上述相同,例如对于`5-while.ll`: - - ```llvm - define i32 @main() { - label_entry: - br label %label0 - label0: ; preds = %label_entry, %label5 - %op1 = phi i32 [ 0, %label_entry ], [ %op6, %label5 ] - %op2 = icmp slt i32 %op1, 10 - %op3 = zext i1 %op2 to i32 - %op4 = icmp ne i32 %op3, 0 - br i1 %op4, label %label5, label %label7 - label5: ; preds = %label0 - call void @output(i32 %op1) - %op6 = add i32 %op1, 1 - br label %label0 - label7: ; preds = %label0 - ret i32 0 - } - ``` - - 指令遍历顺序如下,第1条与第9条指令就是phi指令的还原。 - - ```llvm - 1. op1 = 0 - 2. br label %label0 - 3. %op2 = icmp slt i32 %op1, 10 - 4. %op3 = zext i1 %op2 to i32 - 5. %op4 = icmp ne i32 %op3, 0 - 6. br i1 %op4, label %label5, label %label7 - 7. call void @output(i32 %op1) - 8. %op6 = add i32 %op1, 1 - 9. op1 = op6 - 10. br label %label0 - 11. ret i32 0 - ``` - - 用编号代替指令,获得每个程序点的IN和OUT: - - ```llvm - 1. op1 = 0 - in-set: [ ] - out-set: [ op1 ] - 2. br label %label0 - in-set: [ op1 ] - out-set: [ op1 ] - 3. %op2 = icmp slt i32 %op1, 10 - in-set: [ op1 ] - out-set: [ op2 op1 ] - 4. %op3 = zext i1 %op2 to i32 - in-set: [ op2 op1 ] - out-set: [ op3 op1 ] - 5. %op4 = icmp ne i32 %op3, 0 - in-set: [ op3 op1 ] - out-set: [ op4 op1 ] - 6. br i1 %op4, label %label5, label %label7 - in-set: [ op4 op1 ] - out-set: [ op1 ] - 7. call void @output(i32 %op1) - in-set: [ op1 ] - out-set: [ op1 ] - 8. %op6 = add i32 %op1, 1 - in-set: [ op1 ] - out-set: [ op6 ] - 9. op1 = op6 - in-set: [ op6 ] - out-set: [ op1 ] - 10. br label %label0 - in-set: [ op1 ] - out-set: [ op1 ] - 11. ret i32 0 - in-set: [ ] - out-set: [ ] - ``` - - 获得活跃区间:编号为i的指令,涉及两个端点:i-1和i,分别对应IN和OUT。由此得到各个变量的活跃区间是: - - ```llvm - op1: <1, 10> - op2: <3, 3> - op3: <4, 4> - op4: <5, 5> - op6: <8, 8> - ``` - -3. 寄存器分配 - - 使用线性扫描算法实现寄存器分配,参考: - - - http://web.cs.ucla.edu/~palsberg/course/cs132/linearscan.pdf - - - [Documentations/5-bonus/寄存器分配.md · master · compiler_staff / 2022fall-Compiler_CMinus · GitLab](https://cscourse.ustc.edu.cn/vdir/Gitlab/compiler_staff/2022fall-compiler_cminus/-/blob/master/Documentations/5-bonus/%E5%AF%84%E5%AD%98%E5%99%A8%E5%88%86%E9%85%8D.md#poletto) - - +#### 1. 使用栈式内存分配,优先追求功能性 + +这一步主要完成了指令选择,所有变量(local or global)均在栈中储存,参数通过栈传递。常量保存在只读区(模拟gcc)。 + +这里对于phi指令的处理是:将phi指令还原为前驱块的`copy-statement`,需要将其插入在基本块的最后一条指令(跳转指令)之前。 + +一个坑:汇编指令`ftint.w.s fa0, fa0`将浮点数转化为定点数,竟然是四舍五入的...后来对比gcc的生成的汇编,发现了`ftintrz`这条指令。 + +这一步可以完成所有测试样例,但是生成的代码效率较差。 + +#### 2. 活跃变量分析 + +先确定指令的遍历顺序,这里使用常规的BFS遍历,phi指令的处理和上述相同,例如对于`5-while.ll`: + +```llvm +define i32 @main() { +label_entry: + br label %label0 +label0: ; preds = %label_entry, %label5 + %op1 = phi i32 [ 0, %label_entry ], [ %op6, %label5 ] + %op2 = icmp slt i32 %op1, 10 + %op3 = zext i1 %op2 to i32 + %op4 = icmp ne i32 %op3, 0 + br i1 %op4, label %label5, label %label7 +label5: ; preds = %label0 + call void @output(i32 %op1) + %op6 = add i32 %op1, 1 + br label %label0 +label7: ; preds = %label0 + ret i32 0 +} +``` + +指令遍历顺序如下,第1条与第9条指令就是phi指令的还原。 + +```llvm +1. op1 = 0 +2. br label %label0 +3. %op2 = icmp slt i32 %op1, 10 +4. %op3 = zext i1 %op2 to i32 +5. %op4 = icmp ne i32 %op3, 0 +6. br i1 %op4, label %label5, label %label7 +7. call void @output(i32 %op1) +8. %op6 = add i32 %op1, 1 +9. op1 = op6 +10. br label %label0 +11. ret i32 0 +``` + +用编号代替指令,获得每个程序点的IN和OUT: + +```llvm +1. op1 = 0 + in-set: [ ] + out-set: [ op1 ] +2. br label %label0 + in-set: [ op1 ] + out-set: [ op1 ] +3. %op2 = icmp slt i32 %op1, 10 + in-set: [ op1 ] + out-set: [ op2 op1 ] +4. %op3 = zext i1 %op2 to i32 + in-set: [ op2 op1 ] + out-set: [ op3 op1 ] +5. %op4 = icmp ne i32 %op3, 0 + in-set: [ op3 op1 ] + out-set: [ op4 op1 ] +6. br i1 %op4, label %label5, label %label7 + in-set: [ op4 op1 ] + out-set: [ op1 ] +7. call void @output(i32 %op1) + in-set: [ op1 ] + out-set: [ op1 ] +8. %op6 = add i32 %op1, 1 + in-set: [ op1 ] + out-set: [ op6 ] +9. op1 = op6 + in-set: [ op6 ] + out-set: [ op1 ] +10. br label %label0 + in-set: [ op1 ] + out-set: [ op1 ] +11. ret i32 0 + in-set: [ ] + out-set: [ ] +``` + +获得活跃区间:编号为i的指令,涉及两个端点:i-1和i,分别对应IN和OUT。由此得到各个变量的活跃区间是: + +```llvm +op1: <1, 10> +op2: <3, 3> +op3: <4, 4> +op4: <5, 5> +op6: <8, 8> +``` + +#### 3. 寄存器分配 + +使用线性扫描算法实现寄存器分配,参考: + +- http://web.cs.ucla.edu/~palsberg/course/cs132/linearscan.pdf + +- [Documentations/5-bonus/寄存器分配.md · master · compiler_staff / 2022fall-Compiler_CMinus · GitLab](https://cscourse.ustc.edu.cn/vdir/Gitlab/compiler_staff/2022fall-compiler_cminus/-/blob/master/Documentations/5-bonus/%E5%AF%84%E5%AD%98%E5%99%A8%E5%88%86%E9%85%8D.md#poletto) + +i diff --git a/include/codegen/liverange.hpp b/include/codegen/liverange.hpp index 22749feae30abb2edd8c0db83c11b63fa800e45c..baba00bfafeafb97f08b30b44bd8f0b6fcc9140f 100644 --- a/include/codegen/liverange.hpp +++ b/include/codegen/liverange.hpp @@ -1,20 +1,11 @@ #ifndef LIVERANGE_HPP #define LIVERANGE_HPP -#include "BasicBlock.h" -#include "Function.h" -#include "Instruction.h" #include "Module.h" -#include "Value.h" -#include #include -#include #include #include -#include -#include -#include using std::map; using std::pair; diff --git a/include/codegen/regalloc.hpp b/include/codegen/regalloc.hpp new file mode 100644 index 0000000000000000000000000000000000000000..108472fb4d6d4aa9d5d8b4a33f49c1fe43d6f487 --- /dev/null +++ b/include/codegen/regalloc.hpp @@ -0,0 +1,37 @@ +#include "liverange.hpp" + +// using std::transform; +using namespace LRA; + +namespace RA { + +#define MAXR 32 + +struct ActiveCMP { + bool operator()(LiveInterval const &lhs, LiveInterval const &rhs) const { + if (lhs.first.j != rhs.first.j) + return lhs.first.j < rhs.first.j; + else + return lhs.first.i < rhs.first.i; + } +}; + +class RegAllocator { + public: + RegAllocator(const uint R_) : R(R_), used{false} {} + RegAllocator() = delete; + // input set is sorted by increasing start point + void LinearScan(set &liveints); + void reset(); + + private: + const uint R; + bool used[MAXR]; + map regmap; + // sorted by increasing end point + set active; + + void ExpireOldIntervals(LiveInterval); + void SpillAtInterval(LiveInterval); +}; +} // namespace RA diff --git a/src/codegen/CMakeLists.txt b/src/codegen/CMakeLists.txt index 302c048c9dfee88dc5d8678788fc46b925591409..46e01bcfc44d4ac96458ef43f357f83561ab3d66 100644 --- a/src/codegen/CMakeLists.txt +++ b/src/codegen/CMakeLists.txt @@ -1,6 +1,7 @@ add_library(codegen STATIC codegen.cpp liverange.cpp + regalloc.cpp ) target_link_libraries(common) diff --git a/src/codegen/codegen.cpp b/src/codegen/codegen.cpp index ca2e1063f3b4ca2e093d0925070c4dfcadd71d31..43a867386393fad1bc87f9fd400424c49b7caa58 100644 --- a/src/codegen/codegen.cpp +++ b/src/codegen/codegen.cpp @@ -255,10 +255,13 @@ CodeGen::compute_arg_info(Function *func) { void CodeGen::stackMemDealloc() { - output.push_back("# epilog"); // 7: return value should be determined already! output.push_back(cur_func->get_name() + "_end:"); - output.push_back("ld.d $ra, $fp, -8"); + output.push_back("# epilog"); + output.push_back("ld.d $ra, $sp, " + to_string(stackN - 8)); + output.push_back("ld.d $fp, $sp, " + to_string(stackN - 16)); + /* output.push_back("ld.d $ra, $fp, -8"); + * output.push_back("ld.d $fp, $fp, -16"); */ output.push_back("addi.d $sp, $sp, " + to_string(stackN)); output.push_back("jr $ra"); } @@ -266,8 +269,8 @@ CodeGen::stackMemDealloc() { // the addr for opk is: fp - off[opk] void CodeGen::stackMemAlloc() { - // preserved for ra - stackN = 8; + // preserved for $ra and $fp + stackN = 16; off.clear(); for (auto &bb : cur_func->get_basic_blocks()) for (auto &instr : bb.get_instructions()) { @@ -287,15 +290,16 @@ CodeGen::stackMemAlloc() { stackN = STACK_ALIGN(stackN); output.push_back("# prolog"); output.push_back("addi.d $sp, $sp, -" + to_string(stackN)); + output.push_back("st.d $ra, $sp," + to_string(stackN - 8)); + output.push_back("st.d $fp, $sp, " + to_string(stackN - 16)); output.push_back("addi.d $fp, $sp, " + to_string(stackN)); - output.push_back("st.d $ra, $fp, -8"); } void CodeGen::IR2assem(FpToSiInst *instr) { assert(instr->get_operand(0)->get_type() == m->get_float_type()); assert(instr->get_dest_type() == m->get_int32_type()); value2reg(instr->get_operand(0)); - output.push_back("ftint.w.s $fa0, $fa0"); + output.push_back("ftintrz.w.s $fa0, $fa0"); output.push_back("movfr2gr.s $a0, $fa0"); } void @@ -419,7 +423,7 @@ CodeGen::IR2assem(CallInst *instr) { } output.push_back("bl " + func->get_name()); output.push_back("addi.d $sp, $sp, " + to_string(func_arg_N[func])); - output.push_back("addi.d $fp, $sp, " + to_string(stackN)); + // output.push_back("addi.d $fp, $sp, " + to_string(stackN)); } void diff --git a/src/codegen/liverange.cpp b/src/codegen/liverange.cpp index 9ddab4e770cf32f16bbff7f7914974385f3aebcd..75c4626470f8d3275fe1237d5eac20365f074a5a 100644 --- a/src/codegen/liverange.cpp +++ b/src/codegen/liverange.cpp @@ -1,11 +1,5 @@ #include "liverange.hpp" -#include "Function.h" -#include "Instruction.h" - -#include -#include - using std::cout; using std::endl; using namespace LRA; diff --git a/src/codegen/regalloc.cpp b/src/codegen/regalloc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..52113c5bec3b0e57d9066234077f8c6280f49c7b --- /dev/null +++ b/src/codegen/regalloc.cpp @@ -0,0 +1,52 @@ +#include "regalloc.hpp" + +#include + +using std::for_each; + +using namespace RA; + +void +RegAllocator::reset() { + regmap.clear(); + active.clear(); + for_each(used, used + R, [](bool &u) { u = false; }); +} + +void +RegAllocator::LinearScan(set &liveints) { + reset(); + int reg; + for (auto liveint : liveints) { + ExpireOldIntervals(liveint); + if (active.size() == R) + SpillAtInterval(liveint); + else { + for (reg = 1; reg <= R and used[reg]; ++reg) + ; + used[reg] = true; + regmap[liveint.second] = reg; + active.insert(liveint); + } + } +} + +void +RegAllocator::ExpireOldIntervals(LiveInterval liveint) { + auto it = active.begin(); + for (; it != active.end() and it->first.j < liveint.first.i; ++it) + used[regmap.at(it->second)] = false; + active.erase(active.begin(), it); +} + +void +RegAllocator::SpillAtInterval(LiveInterval liveint) { + auto spill = *active.rbegin(); + if (spill.first.j > liveint.first.j) { + // cancel reg allocation for spill + regmap[liveint.second] = regmap.at(spill.second); + + active.erase(spill); + regmap.erase(spill.second); + } +}