add regalloc files

3d814ba8 · lxq · 1631b1c3 · 3d814ba8 · 3d814ba8 · 3d814ba8
Commit 3d814ba8 authored Feb 04, 2023 by lxq
7 changed files
--- a/Reports/5-bonus/report.md
+++ b/Reports/5-bonus/report.md
@@ -6,107 +6,109 @@
 ## 实验流程
-1. 使用栈式内存分配，优先追求功能性
+#### 1. 使用栈式内存分配，优先追求功能性
-   这一步主要完成了指令选择，所有变量(local or global)均在栈中储存，参数通过栈传递。常量保存在只读区(模拟gcc)。
+这一步主要完成了指令选择，所有变量(local or global)均在栈中储存，参数通过栈传递。常量保存在只读区(模拟gcc)。
-   这里对于phi指令的处理是：将phi指令还原为前驱块的`copy-statement`，需要将其插入在基本块的最后一条指令(跳转指令)之前。
+这里对于phi指令的处理是：将phi指令还原为前驱块的`copy-statement`，需要将其插入在基本块的最后一条指令(跳转指令)之前。
-   这一步可以完成所有测试样例，但是生成的代码效率较差。
+一个坑：汇编指令`ftint.w.s fa0, fa0`将浮点数转化为定点数，竟然是四舍五入的...后来对比gcc的生成的汇编，发现了`ftintrz`这条指令。
-2. 活跃变量分析
+这一步可以完成所有测试样例，但是生成的代码效率较差。
-   先确定指令的遍历顺序，这里使用常规的BFS遍历，phi指令的处理和上述相同，例如对于`5-while.ll`：
+#### 2. 活跃变量分析
-   ```llvm
+先确定指令的遍历顺序，这里使用常规的BFS遍历，phi指令的处理和上述相同，例如对于`5-while.ll`：
-   define i32 @main() {
-   label_entry:
+```llvm
-     br label %label0
+define i32 @main() {
-   label0:                                                ; preds = %label_entry, %label5
+label_entry:
-     %op1 = phi i32 [ 0, %label_entry ], [ %op6, %label5 ]
+  br label %label0
-     %op2 = icmp slt i32 %op1, 10
+label0:                                                ; preds = %label_entry, %label5
-     %op3 = zext i1 %op2 to i32
+  %op1 = phi i32 [ 0, %label_entry ], [ %op6, %label5 ]
-     %op4 = icmp ne i32 %op3, 0
+  %op2 = icmp slt i32 %op1, 10
-     br i1 %op4, label %label5, label %label7
+  %op3 = zext i1 %op2 to i32
-   label5:                                                ; preds = %label0
+  %op4 = icmp ne i32 %op3, 0
-     call void @output(i32 %op1)
+  br i1 %op4, label %label5, label %label7
-     %op6 = add i32 %op1, 1
+label5:                                                ; preds = %label0
-     br label %label0
+  call void @output(i32 %op1)
-   label7:                                                ; preds = %label0
+  %op6 = add i32 %op1, 1
-     ret i32 0
+  br label %label0
-   }
+label7:                                                ; preds = %label0
-   ```
+  ret i32 0
+}
-   指令遍历顺序如下，第1条与第9条指令就是phi指令的还原。
+```
-   ```llvm
+指令遍历顺序如下，第1条与第9条指令就是phi指令的还原。
-   1. op1 = 0
-   2. br label %label0 
+```llvm
-   3. %op2 = icmp slt i32 %op1, 10 
+1. op1 = 0
-   4. %op3 = zext i1 %op2 to i32 
+2. br label %label0 
-   5. %op4 = icmp ne i32 %op3, 0 
+3. %op2 = icmp slt i32 %op1, 10 
-   6. br i1 %op4, label %label5, label %label7 
+4. %op3 = zext i1 %op2 to i32 
-   7. call void @output(i32 %op1) 
+5. %op4 = icmp ne i32 %op3, 0 
-   8. %op6 = add i32 %op1, 1 
+6. br i1 %op4, label %label5, label %label7 
-   9. op1 = op6
+7. call void @output(i32 %op1) 
-   10. br label %label0 
+8. %op6 = add i32 %op1, 1 
-   11. ret i32 0 
+9. op1 = op6
-   ```
+10. br label %label0 
+11. ret i32 0 
-   用编号代替指令，获得每个程序点的IN和OUT：
+```
-   ```llvm
+用编号代替指令，获得每个程序点的IN和OUT：
-   1. op1 = 0
-   	in-set: [ ]
+```llvm
-   	out-set: [ op1 ]
+1. op1 = 0
-   2. br label %label0 
+	in-set: [ ]
-   	in-set: [ op1 ]
+	out-set: [ op1 ]
-   	out-set: [ op1 ]
+2. br label %label0 
-   3. %op2 = icmp slt i32 %op1, 10 
+	in-set: [ op1 ]
-   	in-set: [ op1 ]
+	out-set: [ op1 ]
-   	out-set: [ op2 op1 ]
+3. %op2 = icmp slt i32 %op1, 10 
-   4. %op3 = zext i1 %op2 to i32 
+	in-set: [ op1 ]
-   	in-set: [ op2 op1 ]
+	out-set: [ op2 op1 ]
-   	out-set: [ op3 op1 ]
+4. %op3 = zext i1 %op2 to i32 
-   5. %op4 = icmp ne i32 %op3, 0 
+	in-set: [ op2 op1 ]
-   	in-set: [ op3 op1 ]
+	out-set: [ op3 op1 ]
-   	out-set: [ op4 op1 ]
+5. %op4 = icmp ne i32 %op3, 0 
-   6. br i1 %op4, label %label5, label %label7 
+	in-set: [ op3 op1 ]
-   	in-set: [ op4 op1 ]
+	out-set: [ op4 op1 ]
-   	out-set: [ op1 ]
+6. br i1 %op4, label %label5, label %label7 
-   7. call void @output(i32 %op1) 
+	in-set: [ op4 op1 ]
-   	in-set: [ op1 ]
+	out-set: [ op1 ]
-   	out-set: [ op1 ]
+7. call void @output(i32 %op1) 
-   8. %op6 = add i32 %op1, 1 
+	in-set: [ op1 ]
-   	in-set: [ op1 ]
+	out-set: [ op1 ]
-   	out-set: [ op6 ]
+8. %op6 = add i32 %op1, 1 
-   9. op1 = op6
+	in-set: [ op1 ]
-   	in-set: [ op6 ]
+	out-set: [ op6 ]
-   	out-set: [ op1 ]
+9. op1 = op6
-   10. br label %label0 
+	in-set: [ op6 ]
-   	in-set: [ op1 ]
+	out-set: [ op1 ]
-   	out-set: [ op1 ]
+10. br label %label0 
-   11. ret i32 0 
+	in-set: [ op1 ]
-   	in-set: [ ]
+	out-set: [ op1 ]
-   	out-set: [ ]
+11. ret i32 0 
-   ```
+	in-set: [ ]
+	out-set: [ ]
-   获得活跃区间：编号为i的指令，涉及两个端点：i-1和i，分别对应IN和OUT。由此得到各个变量的活跃区间是：
+```
-   ```llvm
+获得活跃区间：编号为i的指令，涉及两个端点：i-1和i，分别对应IN和OUT。由此得到各个变量的活跃区间是：
-   op1: <1, 10>
-   op2: <3, 3>
+```llvm
-   op3: <4, 4>
+op1: <1, 10>
-   op4: <5, 5>
+op2: <3, 3>
-   op6: <8, 8>
+op3: <4, 4>
-   ```
+op4: <5, 5>
+op6: <8, 8>
-3. 寄存器分配
+```
-   使用线性扫描算法实现寄存器分配，参考：
+#### 3. 寄存器分配
-    - http://web.cs.ucla.edu/~palsberg/course/cs132/linearscan.pdf
+使用线性扫描算法实现寄存器分配，参考：
-    - [Documentations/5-bonus/寄存器分配.md · master · compiler_staff / 2022fall-Compiler_CMinus · GitLab](https://cscourse.ustc.edu.cn/vdir/Gitlab/compiler_staff/2022fall-compiler_cminus/-/blob/master/Documentations/5-bonus/%E5%AF%84%E5%AD%98%E5%99%A8%E5%88%86%E9%85%8D.md#poletto)
+- http://web.cs.ucla.edu/~palsberg/course/cs132/linearscan.pdf
+- [Documentations/5-bonus/寄存器分配.md · master · compiler_staff / 2022fall-Compiler_CMinus · GitLab](https://cscourse.ustc.edu.cn/vdir/Gitlab/compiler_staff/2022fall-compiler_cminus/-/blob/master/Documentations/5-bonus/%E5%AF%84%E5%AD%98%E5%99%A8%E5%88%86%E9%85%8D.md#poletto)
+i
--- a/include/codegen/liverange.hpp
+++ b/include/codegen/liverange.hpp
 #ifndef LIVERANGE_HPP
 #define LIVERANGE_HPP
-#include "BasicBlock.h"
-#include "Function.h"
-#include "Instruction.h"
 #include "Module.h"
-#include "Value.h"
-#include <algorithm>
 #include <iostream>
-#include <iterator>
 #include <map>
 #include <set>
-#include <string>
-#include <utility>
-#include <vector>
 using std::map;
 using std::pair;

--- a/include/codegen/regalloc.hpp
+++ b/include/codegen/regalloc.hpp
+#include "liverange.hpp"
+// using std::transform;
+using namespace LRA;
+namespace RA {
+#define MAXR 32
+struct ActiveCMP {
+    bool operator()(LiveInterval const &lhs, LiveInterval const &rhs) const {
+        if (lhs.first.j != rhs.first.j)
+            return lhs.first.j < rhs.first.j;
+        else
+            return lhs.first.i < rhs.first.i;
+    }
+};
+class RegAllocator {
+  public:
+    RegAllocator(const uint R_) : R(R_), used{false} {}
+    RegAllocator() = delete;
+    // input set is sorted by increasing start point
+    void LinearScan(set<LiveInterval> &liveints);
+    void reset();
+  private:
+    const uint R;
+    bool used[MAXR];
+    map<Value *, int> regmap;
+    // sorted by increasing end point
+    set<LiveInterval, ActiveCMP> active;
+    void ExpireOldIntervals(LiveInterval);
+    void SpillAtInterval(LiveInterval);
+};
+} // namespace RA
--- a/src/codegen/CMakeLists.txt
+++ b/src/codegen/CMakeLists.txt
 add_library(codegen STATIC
    codegen.cpp
    liverange.cpp
+    regalloc.cpp
 )
 target_link_libraries(common)
--- a/src/codegen/codegen.cpp
+++ b/src/codegen/codegen.cpp
@@ -255,10 +255,13 @@ CodeGen::compute_arg_info(Function *func) {
 void
 CodeGen::stackMemDealloc() {
-    output.push_back("# epilog");
    // 7: return value should be determined already!
    output.push_back(cur_func->get_name() + "_end:");
-    output.push_back("ld.d $ra, $fp, -8");
+    output.push_back("# epilog");
+    output.push_back("ld.d $ra, $sp, " + to_string(stackN - 8));
+    output.push_back("ld.d $fp, $sp, " + to_string(stackN - 16));
+    /* output.push_back("ld.d $ra, $fp, -8");
+     * output.push_back("ld.d $fp, $fp, -16"); */
    output.push_back("addi.d $sp, $sp, " + to_string(stackN));
    output.push_back("jr $ra");
 }
@@ -266,8 +269,8 @@ CodeGen::stackMemDealloc() {
 // the addr for opk is: fp - off[opk]
 void
 CodeGen::stackMemAlloc() {
-    // preserved for ra
+    // preserved for $ra and $fp
-    stackN = 8;
+    stackN = 16;
    off.clear();
    for (auto &bb : cur_func->get_basic_blocks())
        for (auto &instr : bb.get_instructions()) {
@@ -287,15 +290,16 @@ CodeGen::stackMemAlloc() {
    stackN = STACK_ALIGN(stackN);
    output.push_back("# prolog");
    output.push_back("addi.d $sp, $sp, -" + to_string(stackN));
+    output.push_back("st.d $ra, $sp," + to_string(stackN - 8));
+    output.push_back("st.d $fp, $sp, " + to_string(stackN - 16));
    output.push_back("addi.d $fp, $sp, " + to_string(stackN));
-    output.push_back("st.d $ra, $fp, -8");
 }
 void
 CodeGen::IR2assem(FpToSiInst *instr) {
    assert(instr->get_operand(0)->get_type() == m->get_float_type());
    assert(instr->get_dest_type() == m->get_int32_type());
    value2reg(instr->get_operand(0));
-    output.push_back("ftint.w.s $fa0, $fa0");
+    output.push_back("ftintrz.w.s $fa0, $fa0");
    output.push_back("movfr2gr.s $a0, $fa0");
 }
 void
@@ -419,7 +423,7 @@ CodeGen::IR2assem(CallInst *instr) {
    }
    output.push_back("bl " + func->get_name());
    output.push_back("addi.d $sp, $sp, " + to_string(func_arg_N[func]));
-    output.push_back("addi.d $fp, $sp, " + to_string(stackN));
+    // output.push_back("addi.d $fp, $sp, " + to_string(stackN));
 }
 void

--- a/src/codegen/liverange.cpp
+++ b/src/codegen/liverange.cpp
 #include "liverange.hpp"
-#include "Function.h"
-#include "Instruction.h"
-#include <algorithm>
-#include <iterator>
 using std::cout;
 using std::endl;
 using namespace LRA;

--- a/src/codegen/regalloc.cpp
+++ b/src/codegen/regalloc.cpp
+#include "regalloc.hpp"
+#include <algorithm>
+using std::for_each;
+using namespace RA;
+void
+RegAllocator::reset() {
+    regmap.clear();
+    active.clear();
+    for_each(used, used + R, [](bool &u) { u = false; });
+}
+void
+RegAllocator::LinearScan(set<LiveInterval> &liveints) {
+    reset();
+    int reg;
+    for (auto liveint : liveints) {
+        ExpireOldIntervals(liveint);
+        if (active.size() == R)
+            SpillAtInterval(liveint);
+        else {
+            for (reg = 1; reg <= R and used[reg]; ++reg)
+                ;
+            used[reg] = true;
+            regmap[liveint.second] = reg;
+            active.insert(liveint);
+        }
+    }
+}
+void
+RegAllocator::ExpireOldIntervals(LiveInterval liveint) {
+    auto it = active.begin();
+    for (; it != active.end() and it->first.j < liveint.first.i; ++it)
+        used[regmap.at(it->second)] = false;
+    active.erase(active.begin(), it);
+}
+void
+RegAllocator::SpillAtInterval(LiveInterval liveint) {
+    auto spill = *active.rbegin();
+    if (spill.first.j > liveint.first.j) {
+        // cancel reg allocation for spill
+        regmap[liveint.second] = regmap.at(spill.second);
+        active.erase(spill);
+        regmap.erase(spill.second);
+    }
+}