diff --git a/include/codegen.hpp b/include/codegen.hpp index 6e8f2cc8a6d0043f3891301a6e11d18255d3a22a..150b50cf2edde735996f131db2c6226149471e81 100644 --- a/include/codegen.hpp +++ b/include/codegen.hpp @@ -1,6 +1,7 @@ #ifndef CODEGEN_HPP #define CODEGEN_HPP +#include "BasicBlock.h" #include "Function.h" #include "Instruction.h" #include "Module.h" @@ -8,8 +9,11 @@ #include "logging.hpp" #include +#include -#define STACK_ALIGN(x) (((x / 16) + (x % 16 ? 1 : 0)) * 16) +#define ALIGN(x, align) (((x / align) + (x % align ? 1 : 0)) * align) +// #define STACK_ALIGN(x) (((x / 16) + (x % 16 ? 1 : 0)) * 16) +#define STACK_ALIGN(x) ALIGN(x, 16) using std::map; using std::string; @@ -32,18 +36,23 @@ class CodeGen { void run(); private: - void IR2assem(Instruction &); - void IR2assem(ReturnInst *); void IR2assem(LoadInst *); void IR2assem(StoreInst *); - - void IR2assem(BranchInst *) {} - void IR2assem(BinaryInst *) {} - void IR2assem(AllocaInst *) {} - void IR2assem(PhiInst *) {} - void IR2assem(CallInst *) {} + void IR2assem(ReturnInst *); + void IR2assem(Instruction &); void IR2assem(GetElementPtrInst *); + void IR2assem(CallInst *); + void IR2assem(BranchInst *); + // The Instructions below will do nothing + void IR2assem(AllocaInst *) {} + // integration with BranchInst + void IR2assem(CmpInst *) {} void IR2assem(ZextInst *) {} + + void IR2assem(BinaryInst *); + + void IR2assem(PhiInst *) {} + void IR2assem(FCmpInst *) {} void IR2assem(FpToSiInst *) {} void IR2assem(SiToFpInst *) {} @@ -54,11 +63,16 @@ class CodeGen { // - for global variables and pointers from alloca and GEP, read through // address // only use register a_id and t_ - void value2reg(Value *, int id = 0); + void value2reg(Value *, int id = 0, bool is_float = false); // load the content in ptr to specified register. // only use register a_id and t_ void ptrContent2reg(Value *, int id = 0); + void compute_arg_info(Function *); + string bool2branch(Instruction *); + string label_in_assem(BasicBlock *bb) { + return cur_func->get_name() + bb->get_name().substr(5); + } int typeLen(Type *type) { if (type->is_float_type()) return 4; @@ -73,8 +87,16 @@ class CodeGen { auto arr_tp = static_cast(type); int n = arr_tp->get_num_of_elements(); return n * typeLen(arr_tp->get_element_type()); - } else + } else { assert(false && "unexpected case while computing type-length"); + } + } + + void back2stack(Instruction *instr, string reg = "$a0") { + // std::cerr << instr->print() << std::endl; + string suff = suffix(typeLen(instr->get_type())); + string addr = "$fp, -" + std::to_string(off[instr]); + output.push_back("st" + suff + " " + reg + " " + addr); } string suffix(int len) { @@ -91,8 +113,19 @@ class CodeGen { assert(false && "no such suffix"); } - std::map off; - unsigned int N; + bool no_stack_alloca(Instruction *instr) { + if (instr->is_void()) + return true; + if (instr->is_cmp() or instr->is_zext()) + return true; + + return false; + } + + std::map off; // to $fp + std::map> func_arg_off; // to $sp + std::map func_arg_N; + unsigned int stackN; Function *cur_func; diff --git a/src/codegen/codegen.cpp b/src/codegen/codegen.cpp index 912e59b3e109f17800b40035f4a16106a0313f69..162a482af3296c343f868410ecdd9f387063022f 100644 --- a/src/codegen/codegen.cpp +++ b/src/codegen/codegen.cpp @@ -7,6 +7,7 @@ #include "codegen.hpp" #include "Constant.h" +#include "Function.h" #include "GlobalVariable.h" #include "Instruction.h" #include "Type.h" @@ -65,6 +66,10 @@ CodeGen::run() { output.push_back(".comm " + globl.get_name() + ", " + to_string(typeLen(type))); } + // arguments: stack transfer + for (auto &func : m->get_functions()) + if (not func.is_declaration()) + compute_arg_info(&func); // funtions for (auto &func : m->get_functions()) { if (not func.is_declaration()) { @@ -77,11 +82,7 @@ CodeGen::run() { stackMemAlloc(); for (auto &bb : func.get_basic_blocks()) { - if (&bb == func.get_entry_block()) - output.push_back(func.get_name() + "_entry:"); - else - output.push_back(func.get_name() + "_" + - bb.get_name().substr(5) + ":"); + output.push_back(label_in_assem(&bb) + ":"); for (auto &instr : bb.get_instructions()) { IR2assem(instr); } @@ -122,8 +123,8 @@ CodeGen::ptrContent2reg(Value *ptr, int id) { } void -CodeGen::value2reg(Value *v, int id) { - auto reg_name = "$a" + to_string(id); +CodeGen::value2reg(Value *v, int id, bool is_float) { + auto reg_name = (is_float ? "$f" : "$a") + to_string(id); if (dynamic_cast(v)) { auto constant = static_cast(v); if (dynamic_cast(constant)) { @@ -142,12 +143,40 @@ CodeGen::value2reg(Value *v, int id) { } else if (dynamic_cast(v)) { // auto alloc_instr = dynamic_cast(v); output.push_back("addi.d " + reg_name + ", $fp, -" + to_string(off[v])); + } else if (dynamic_cast(v)) { + auto args = cur_func->get_args(); + int id = 1; + for (auto iter = args.begin(); id <= args.size(); ++iter, ++id) + if (*iter == v) + break; + output.push_back("ld" + suffix(typeLen(v->get_type())) + " " + + reg_name + ", $fp, " + + to_string(func_arg_off[cur_func][id])); } else { output.push_back("ld" + suffix(typeLen(v->get_type())) + " " + reg_name + ", $fp, -" + to_string(off[v])); } } +void +CodeGen::compute_arg_info(Function *func) { + if (func_arg_off.find(func) != func_arg_off.end()) + return; + auto func_tp = func->get_function_type(); + auto &arg_off = func_arg_off[func]; + int argN = 0, arg_id = func->get_num_of_args(); + // reserve space + for (auto iter = func_tp->param_end(); iter != func_tp->param_begin();) { + --iter; + auto tplen = typeLen(*iter); + argN = ALIGN(argN, tplen) + tplen; + arg_off[arg_id--] = argN; + } + for (arg_id = 1; arg_id <= func->get_num_of_args(); ++arg_id) + arg_off[arg_id] = argN - arg_off[arg_id]; + func_arg_N[func] = STACK_ALIGN(argN); +} + void CodeGen::stackMemDealloc() { output.push_back("# epilog"); @@ -155,7 +184,7 @@ CodeGen::stackMemDealloc() { // output.push_back("addi.w $a0, $zero, 0"); output.push_back(cur_func->get_name() + "_end:"); output.push_back("ld.d $ra, $fp, -8"); - output.push_back("addi.d $sp, $sp, " + to_string(N)); + output.push_back("addi.d $sp, $sp, " + to_string(stackN)); output.push_back("jr $ra"); } @@ -163,28 +192,182 @@ CodeGen::stackMemDealloc() { void CodeGen::stackMemAlloc() { // preserved for ra - N = 8; + stackN = 8; off.clear(); for (auto &bb : cur_func->get_basic_blocks()) for (auto &instr : bb.get_instructions()) { - if (not instr.is_void()) { - if (instr.is_alloca()) { - auto alloc_instr = static_cast(&instr); - N += typeLen(alloc_instr->get_alloca_type()); - } else { - auto type = instr.get_type(); - N += typeLen(type); - } - off[&instr] = N; + if (no_stack_alloca(&instr)) + continue; + int tplen; + if (instr.is_alloca()) { + auto alloc_instr = static_cast(&instr); + tplen = typeLen(alloc_instr->get_alloca_type()); + } else { + auto type = instr.get_type(); + tplen = typeLen(type); } + stackN = ALIGN(stackN, tplen) + tplen; + off[&instr] = stackN; } - N = STACK_ALIGN(N); + stackN = STACK_ALIGN(stackN); output.push_back("# prolog"); - output.push_back("addi.d $sp, $sp, -" + to_string(N)); - output.push_back("addi.d $fp, $sp, " + to_string(N)); + output.push_back("addi.d $sp, $sp, -" + to_string(stackN)); + output.push_back("addi.d $fp, $sp, " + to_string(stackN)); output.push_back("st.d $ra, $fp, -8"); } +string +CodeGen::bool2branch(Instruction *instr) { + assert(instr->get_type() == instr->get_module()->get_int1_type()); + auto cmp_instr = dynamic_cast(instr); + assert(cmp_instr); + string instr_ir; + bool reverse = false; + switch (cmp_instr->get_cmp_op()) { + case CmpInst::EQ: + instr_ir = "beq"; + break; + case CmpInst::NE: { + instr_ir = "bne"; + if (instr->get_operand(1) == + ConstantInt::get(0, instr->get_module()) and + dynamic_cast(instr->get_operand(0))->is_zext()) { + // something like: + // %op0 = icmp slt i32 1, 2 # deepest + // %op1 = zext i1 %op0 to i32 + // %op2 = icmp ne i32 %op1, 0 + // br i1 %op2, label %label3, label %label5 + auto deepest = static_cast( + static_cast(instr->get_operand(0)) + ->get_operand(0)); + return bool2branch(deepest); + } + } + + break; + case CmpInst::GT: + instr_ir = "blt"; + reverse = true; + break; + case CmpInst::GE: + instr_ir = "bge"; + break; + case CmpInst::LT: + instr_ir = "blt"; + break; + case CmpInst::LE: + instr_ir = "bge"; + reverse = true; + break; + } + value2reg(instr->get_operand(0), 0); + value2reg(instr->get_operand(1), 1); + + return instr_ir + (reverse ? " $a1, $a0 " : " $a0, $a1 "); +} + +void +CodeGen::IR2assem(BranchInst *instr) { + if (instr->is_cond_br()) { + auto TBB = static_cast(instr->get_operand(1)); + auto FBB = static_cast(instr->get_operand(2)); + // value2reg(instr->get_operand(0)); + string instr_ir = + bool2branch(static_cast(instr->get_operand(0))); + output.push_back(instr_ir + " " + label_in_assem(TBB)); + output.push_back("b " + label_in_assem(FBB)); + } else { + auto bb = static_cast(instr->get_operand(0)); + output.push_back("b " + label_in_assem(bb)); + } +} + +void +CodeGen::IR2assem(CallInst *instr) { + auto func = static_cast(instr->get_operand(0)); + // stack space allocation + output.push_back("addi.d $sp, $sp, -" + to_string(func_arg_N[func])); + // place the arguments + for (int i = 1; i < instr->get_num_operand(); i++) { + auto arg = instr->get_operand(i); + auto tplen = typeLen(arg->get_type()); + string suff = suffix(tplen); + value2reg(arg); + output.push_back("st" + suff + " $a0, $sp, " + + to_string(func_arg_off[func][i])); + } + output.push_back("bl " + func->get_name()); + output.push_back("addi.d $sp, $sp, " + to_string(func_arg_N[func])); + output.push_back("addi.d $fp, $sp, " + to_string(stackN)); + if (not instr->is_void()) + back2stack(instr); +} +void +CodeGen::IR2assem(CmpInst *instr) { + value2reg(instr->get_operand(0), 0); + value2reg(instr->get_operand(1), 1); + string instr_ir; + switch (instr->get_cmp_op()) { + case CmpInst::EQ: + break; + case CmpInst::NE: + // output.push_back("xor $a0, $a0, $a1"); + break; + case CmpInst::GT: + output.push_back("slt $a0, $a1, $a0"); + break; + case CmpInst::GE: + break; + case CmpInst::LT: + output.push_back("slt $a0, $a0, $a1"); + break; + case CmpInst::LE: + break; + } + output.push_back(instr_ir + " $a0, $a0, $a1"); + back2stack(instr); +} + +void +CodeGen::IR2assem(BinaryInst *instr) { + value2reg(instr->get_operand(0), 0); + value2reg(instr->get_operand(1), 1); + + string suff = suffix(typeLen(instr->get_type())); + string instr_ir; + switch (instr->get_instr_type()) { + case Instruction::add: + instr_ir = "add"; + break; + case Instruction::sub: + instr_ir = "sub"; + break; + case Instruction::mul: + instr_ir = "mulw.d.w"; + suff = ""; + break; + case Instruction::sdiv: + instr_ir = "div"; + break; + /* case Instruction::fadd: + * instr_ir = "fadd"; + * break; + * case Instruction::fsub: + * instr_ir = "fsub"; + * break; + * case Instruction::fmul: + * instr_ir = "fmul"; + * break; + * case Instruction::fdiv: + * instr_ir = "fdiv"; + * break; */ + default: + assert(false && "instruction type"); + } + output.push_back(instr_ir + suff + " $a0, $a0, $a1"); + back2stack(instr); +} + void CodeGen::IR2assem(GetElementPtrInst *instr) { value2reg(instr->get_operand(0), 0); @@ -203,6 +386,7 @@ CodeGen::IR2assem(GetElementPtrInst *instr) { output.push_back("mul.w $a1, $a1, " + to_string(size)); output.push_back("add.d $a0, $a0, $a1"); } + back2stack(instr); } void @@ -210,9 +394,11 @@ CodeGen::IR2assem(LoadInst *instr) { // move the address to a0 ptrContent2reg(instr->get_lval()); - string suff = suffix(typeLen(instr->get_load_type())); - string addr = "$fp, -" + to_string(off[instr]); - output.push_back("st" + suff + " $a0, " + addr); + assert(instr->get_type() == instr->get_load_type()); + back2stack(instr); + /* string suff = suffix(typeLen(instr->get_load_type())); + * string addr = "$fp, -" + to_string(off[instr]); + * output.push_back("st" + suff + " $a0, " + addr); */ } void @@ -244,13 +430,11 @@ CodeGen::IR2assem(Instruction &instr) { return; // Standard binary operators case Instruction::add: - break; case Instruction::sub: - break; case Instruction::mul: - break; case Instruction::sdiv: - break; + IR2assem(static_cast(&instr)); + return; // float binary operators case Instruction::fadd: break;