diff --git a/CMakeLists.txt b/CMakeLists.txt index a4917026022a6bab2387cf799139f21d7e8285ed..991332dd27a6d960569fb1f425765b4b2d874a73 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,10 +3,25 @@ cmake_minimum_required( VERSION 3.4 ) set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -std=c99") -set(CMAKE_BUILD_TYPE "Debug") -set(CMAKE_C_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g2 -ggdb") -set(CMAKE_C_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") +SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g2 -ggdb") +SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") +SET(CMAKE_CXX_FLAGS_ASAN "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined -fsanitize=address") + +set(default_build_type "Debug") + +if(NOT(CMAKE_BUILD_TYPE_SHADOW STREQUAL CMAKE_BUILD_TYPE)) + if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) + message(STATUS "Setting build type to '${default_build_type}'") + set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE STRING "Choose the type of build." FORCE) + else() + message(STATUS "Building in ${CMAKE_BUILD_TYPE} mode") + endif() + + set(CMAKE_BUILD_TYPE_SHADOW ${CMAKE_BUILD_TYPE} CACHE STRING "used to detect changes in build type" FORCE) +endif() + set(CMAKE_CXX_STANDARD 17) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) find_package(FLEX REQUIRED) find_package(BISON REQUIRED) @@ -32,6 +47,7 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) include_directories(${PROJECT_SOURCE_DIR}) include_directories(${PROJECT_BINARY_DIR}) +include_directories(include/lightir) add_subdirectory(src) add_subdirectory(tests) diff --git a/Documentations/1-parser/README.md b/Documentations/1-parser/README.md index 191b4c7ec602a68c62352a9f893c0867ad79142a..f60afe6c7975f39588be94108c2882060ce9727a 100644 --- a/Documentations/1-parser/README.md +++ b/Documentations/1-parser/README.md @@ -250,6 +250,7 @@ int main(void) { * 提交异常:如果遇到在平台上提交异常的问题,请通过邮件联系助教,助教将收取截止日期之前,学生在 gitlab 仓库最近一次 commit 内容进行评测。 * 迟交规定 +<<<<<<< HEAD * Soft Deadline:2022-09-30 23:59:59 (UTC+8) @@ -270,6 +271,28 @@ int main(void) { final_grade = grade * (0.9)^x, 0 < x <= 7 final_grade = 0, x > 7 ``` +======= + + * Soft Deadline:2022-10-03 23:59:59 (UTC+8) + + * Hard Deadline:2022-10-10 23:59:59 (UTC+8) + + * 补交请邮件提醒 TA: + + * 邮箱:`zhenghy22@mail.ustc.edu.cn` 抄送 `chen16614@mail.ustc.edu.cn` + * 邮件主题:lab1迟交-学号 + * 内容:迟交原因、最后版本commitID、迟交时间 + + * 迟交分数 + + * x 为相对 Soft Deadline 迟交天数,grade 满分 100 + + ``` + final_grade = grade, x = 0 + final_grade = grade * (0.9)^x, 0 < x <= 7 + final_grade = 0, x > 7 + ``` +>>>>>>> 91e15b7b5e67900d4f6e90afe5bdab681dd82e89 * 评分标准: 实验一最终分数组成如下: diff --git a/Documentations/2-ir-gen-warmup/README.md b/Documentations/2-ir-gen-warmup/README.md new file mode 100644 index 0000000000000000000000000000000000000000..290091256ef851819327f289b08b88e79b96c089 --- /dev/null +++ b/Documentations/2-ir-gen-warmup/README.md @@ -0,0 +1,199 @@ +# lab2 实验文档 +- [lab2 实验文档](#lab2-实验文档) + - [0. 前言](#0-前言) + - [主要工作](#主要工作) + - [1. LLVM IR 部分](#1-llvm-ir-部分) + - [1.1 LLVM IR 介绍](#11-llvm-ir-介绍) + - [1.2 gcd 例子: 利用 clang 生成的 .ll](#12-gcd-例子-利用-clang-生成的-ll) + - [1.3 你的提交1: 手动编写 .ll](#13-你的提交1-手动编写-ll) + - [2. LightIR 部分](#2-lightir-部分) + - [2.1 LightIR - LLVM IR 的 cpp 接口](#21-lightir---llvm-ir-的-cpp-接口) + - [2.2 gcd 例子: 利用 LightIR + cpp 生成 .ll](#22-gcd-例子-利用-lightir--cpp-生成-ll) + - [2.3 你的提交2: 利用 LightIR + cpp 编写生成 .ll 的程序](#23-你的提交2-利用-lightir--cpp-编写生成-ll-的程序) + - [3. Lab3 的准备](#3-lab3-的准备) + - [3.1 了解 Visitor Pattern](#31-了解-visitor-pattern) + - [4. 实验要求](#4-实验要求) + - [4.1 目录结构](#41-目录结构) + - [4.2 编译、运行和验证](#42-编译运行和验证) + - [4.3 提交要求和评分标准](#43-提交要求和评分标准) +## 0. 前言 + +本次实验作为 Lab3 的前驱实验,独立于 Lab1。 +本次实验的目的是让大家熟悉 Lab3 所需要的相关知识: LLVM IR、 LightIR(LLVM IR 的轻量级 C++ 接口)和 Visitor Pattern(访问者模式)。 +在开始实验之前,请根据之前的[编译实验环境搭建](http://202.38.79.174/compiler_staff/2022fall-environment/-/blob/master/README.md)确保LLVM的版本为10.0.1,且PATH环境变量配置正确。可以通过`lli --version`命令是否可以输出10.0.1的版本信息来验证(其它版本不一定兼容)。 +本次实验设置的目的是为 Lab3 进行知识的准备与热身, coding 的工程量不大,但是需要一定的阅读、学习、理解。因此本次的实验报告相比之下内容要求会稍微多一些,以避免大家在 Lab3 时手足无措。 +这里助教提供了简单的[C++简介](Documentations/common/simple_cpp.md),对C++基本特性不熟悉的同学可以先阅读该文档。 + +### 主要工作 + +1. 第一部分: 了解 LLVM IR。通过 clang 生成的 .ll ,了解 LLVM IR 与 c 代码的对应关系,**完成1.3**。 +2. 第二部分: 了解 LightIR。通过助教提供的 c++ 例子,了解 LightIR 的 c++ 接口及实现,**完成2.3**。 +3. 第三部分: 理解 Visitor Pattern 。**完成3.1** +4. 实验报告:在 [report.md](./Reports/2-ir-gen-warmup/report.md) 中**回答3个问题**。 + +## 1. LLVM IR 部分 +### 1.1 LLVM IR 介绍 +根据[维基百科](https://zh.wikipedia.org/zh-cn/LLVM)的介绍,LLVM是一个自由软件项目,它是一种编译器基础设施,以C++写成,包含一系列模块化的编译器组件和工具链,用来开发编译器前端和后端。IR的全称是Intermediate Representation,即中间表示。LLVM IR是一种类似于汇编的底层语言。 + +LLVM IR的具体指令可以参考 [Reference Manual](http://llvm.org/docs/LangRef.html)。由于其手册过于复杂,助教筛选了后续实验中将要用到的子集,总结为了 [Light IR 手册](../common/LightIR.md#ir-%E6%A0%BC%E5%BC%8F)。如果有感兴趣的同学可以阅读原手册作为拓展。 + +作为一开始的参考,你可以先阅读其中 `IR格式` 和 `IR指令` 两节,后续有需要再反复参考。实验的最后,你需要在 [report.md](./Reports/2-ir-gen-warmup/report.md) 中**回答问题1**。 + +### 1.2 gcd 例子: 利用 clang 生成的 .ll +阅读 [tests/2-ir-gen-warmup/ta_gcd/gcd_array.c](../../tests/2-ir-gen-warmup/ta_gcd/gcd_array.c)。 + +根据 `clang -S -emit-llvm gcd_array.c` 指令,你可以得到对应的 `gcd_array.ll` 文件.你需要结合 [gcd_array.c](../../tests/2-ir-gen-warmup/ta_gcd/gcd_array.c) 阅读 `gcd_array.ll` ,理解其中每条LLVM IR指令与c代码的对应情况。 + +通过 `lli gcd_array.ll; echo $?` 指令,你可以测试 `gcd_array.ll` 执行结果的正确性。其中, + +- `lli` 会运行 `*.ll` 文件 +- `$?` 的内容是上一条命令所返回的结果,而 `echo $?` 可以将其输出到终端中 + +后续你会经常用到这两条指令。 + +### 1.3 你的提交1: 手动编写 .ll +助教提供了四个简单的c程序,分别是 `tests/2-ir-gen-warmup/c_cases/` 目录下的 [assign.c](../../tests/2-ir-gen-warmup/c_cases/assign.c)、 [fun.c](../../tests/2-ir-gen-warmup/c_cases/fun.c)、 [if.c](../../tests/2-ir-gen-warmup/c_cases/if.c) 和 [while.c](../../tests/2-ir-gen-warmup/c_cases/while.c)。你需要在 `tests/2-ir-gen-warmup/stu_ll/` 目录中,手工完成自己的 [assign_hand.ll](../../tests/2-ir-gen-warmup/stu_ll/assign_hand.ll)、 [fun_hand.ll](../../tests/2-ir-gen-warmup/stu_ll/fun_hand.ll)、 [if_handf.ll](../../tests/2-ir-gen-warmup/stu_ll/if_hand.ll) 和 [while_hand.ll](../../tests/2-ir-gen-warmup/stu_ll/while_hand.ll),以实现与上述四个C程序相同的逻辑功能。我们鼓励添加必要的注释。`.ll` 文件的注释是以 ";" 开头的。 + +必要的情况下,你可以参考 `clang -S -emit-llvm` 的输出,但是你提交的结果必须避免同此输出一字不差。 + + +## 2. LightIR 部分 +### 2.1 LightIR - LLVM IR 的 cpp 接口 +由于 LLVM IR 官方的 cpp 接口的调用链同样过于冗长,助教提供了 `LightIR` 这一 cpp 接口库。你需要阅读 [LightIR cpp APIs](../common/LightIR.md#c-apis)。 +lab3 部分会要求大家通过 `LightIR` 根据 `AST` 构建生成 LLVM IR。所以你需要仔细阅读文档了解其接口的设计。 + +### 2.2 gcd 例子: 利用 LightIR + cpp 生成 .ll +为了让大家更直观地感受并学会 `LightIR` 接口的使用,助教提供了 [tests/2-ir-gen-warmup/ta_gcd/gcd_array_generator.cpp](../../tests/2-ir-gen-warmup/ta_gcd/gcd_array_generator.cpp)。该 cpp 程序会生成与 gcd_array.c 逻辑相同的 LLVM IR 文件。助教提供了非常详尽的注释,请认真阅读作为参考! +该程序的编译与运行请参考 4.2 节。 + +### 2.3 你的提交2: 利用 LightIR + cpp 编写生成 .ll 的程序 +你需要在 `tests/2-ir-gen-warmup/stu_cpp/` 目录中,编写 [assign_generator.cpp](../../tests/2-ir-gen-warmup/stu_cpp/assign_generator.cpp)、 [fun_generator.cpp](../../tests/2-ir-gen-warmup/stu_cpp/fun_generator.cpp)、 [if_generator.cpp](../../tests/2-ir-gen-warmup/stu_cpp/if_generator.cpp) 和 [while_generator.cpp](../../tests/2-ir-gen-warmup/stu_cpp/while_generator.cpp),以生成与 1.3 节的四个 c 程序相同逻辑功能的 `.ll` 文件。你需要完成 [report.md](./Reports/2-ir-gen-warmup/report.md) 中的**问题2**。 + +## 3. Lab3 的准备 +### 3.1 了解 Visitor Pattern +Visitor Pattern(访问者模式)是一种在 LLVM 项目源码中被广泛使用的设计模式。在遍历某个数据结构(比如树)时,如果我们需要对每个节点做一些额外的特定操作, Visitor Pattern 就是个不错的思路。 +Visitor Pattern 是为了解决**稳定的数据结构**和**易变的操作耦合问题**而产生的一种设计模式。解决方法就是在被访问的类里面加一个对外提供接待访问者的接口,其关键在于在数据基础类里面有一个方法接受访问者,将自身引用传入访问者。这里举一个应用实例来帮助理解访问者模式: 您在朋友家做客,您是访问者;朋友接受您的访问,您通过朋友的描述,然后对朋友的描述做出一个判断,这就是访问者模式。 +有关 Visitor Pattern 的含义、模式和特点,有梯子的同学可参考 [维基百科](https://en.wikipedia.org/wiki/Visitor_pattern#C++_example)。其中较为重要的一点原则在于, C++ 中对函数重载特性的支持。 +在 `tests/2-ir-gen-warmup/calculator` 中,助教编写了一个利用访问者模式,产生计算算数表达式的中间代码的程序。该程序首先对算数表达式进行语法分析生成语法树,再使用访问者模式来遍历语法树,产生中间代码。在 [calc_ast.hpp](../../tests/2-ir-gen-warmup/calculator/calc_ast.hpp) 中,我们定义了语法树的不同节点类型。在 [calc_builder.cpp](../../tests/2-ir-gen-warmup/calculator/calc_builder.cpp) 中,我们使用 LightIR 编写了不同的 `visit` 函数。根据节点类型的不同,编译器会在多种 `visit` 函数中,选择对应的实现进行调用。请认真阅读这两个文件和其它相关代码,理解语法树是如何通过访问者模式被遍历的,并在 [report.md](./Reports/2-ir-gen-warmup/report.md)中**回答问题3**。 + + +该程序使用方法如下: +``` shell +# 在 build 目录下操作 +$ make +$ ./calc +Input an arithmatic expression (press Ctrl+D in a new line after you finish the expression): +4 * (8 + 4 - 1) / 2 +result and result.ll have been generated. +$ ./result +22 +``` +其中,`result.ll` 是程序产生的中间代码,`result` 是中间代码编译产生的二进制,运行它就可以输出算数表达的结果。注:单独运行 `lli result.ll` 是会报错的,那怎么才能解决报错的根源问题,通过 `lli` 得到正确的运行结果呢?感兴趣的同学可以思考调研一下。 + + +## 4. 实验要求 + +### 4.1 目录结构 +除了下面指明你所要修改或提交的文件,其他文件请勿修改。 +``` log +. +├── CMakeLists.txt +├── Documentations +│   ├── ... +| ├── common <- LightIR 相关文档 +│   └── 2-ir-gen-warmup +│      └── README.md <- lab2 实验文档说明(你在这里) +├── include <- 实验所需的头文件 +│   ├── ... +│   ├── lightir +├── README.md +├── Reports +│   ├── ... +│   └── 2-ir-gen-warmup +│   └── report.md <- lab2 所需提交的实验报告,含3个问题(你要交) +├── src +│   ├── ... +│   └── lightir +└── tests + ├── CMakeLists.txt + ├── ... + └── 2-ir-gen-warmup <- lab2 文件夹 +    ├── c_cases <- 4个 c 程序 +    │   ├── assign.c +    │   ├── fun.c +    │   ├── if.c +    │   └── while.c +    ├── CMakeLists.txt <- 你在2.3节需要去掉注释(我们不收,你要改) +    ├── stu_cpp <- lab2 所需提交的 cpp 目录(你要交) +    │   ├── assign_generator.cpp +    │   ├── fun_generator.cpp +    │   ├── if_generator.cpp +    │   └── while_generator.cpp +    ├── stu_ll <- lab2 所需提交的 .ll 目录(你要交) +    │   ├── assign_hand.ll +    │   ├── fun_hand.ll +    │   ├── if_hand.ll +    │   └── while_hand.ll +    └── ta_gcd +    ├── gcd_array.c +    └── gcd_array_generator.cpp <- 助教提供的生成 gcd_array.ll 的 cpp +``` + +### 4.2 编译、运行和验证 + +* 编译与运行 + 在 `${WORKSPACE}/build/` 下执行: + ``` shell + # 如果存在 CMakeCache.txt 要先删除 + # rm CMakeCache.txt + cmake .. + make + make install + ``` + 你可以得到对应 `gcd_array_generator.cpp` 的可执行文件。 + 在完成2.3时,在 `${WORKSPACE}/tests/2-ir-gen-warmup/CMakeLists.txt` 中去掉对应的注释,再在 `${WORKSPACE}/build/` 下执行 `cmake ..` 与 `make` 指令,即可得到对应的可执行文件。 +* 验证 + 本次试验测试案例只有`${WORKSPACE}/tests/2-ir-gen-warmup/c_cases`中的4个样例。请大家自行验证。 + 助教会执行你们的代码,并使用 `diff` 命令进行验证。 + +### 4.3 提交要求和评分标准 +* 提交要求 + 本实验的提交要求分为两部分: 实验部分的文件和报告,git提交的规范性。 + * 实验部分: + * 需要完成 `./tests/2-ir-gen-warmup/stu_ll` 目录下的4个文件 + * 需要完成 `./tests/2-ir-gen-warmup/stu_cpp` 目录下的4个文件 + * 需要在 `./Reports/2-ir-gen-warmup/` 目录下撰写实验报告 + * 实验报告内容包括: + * 实验要求、3个问题、实验难点、实验反馈(具体参考 [report.md](./Reports/2-ir-gen-warmup/report.md)) + * 本次实验报告**参与**评分标准. + * git 提交规范: + * 不破坏目录结构( `report.md` 如果需要放图片,请放在 `./Reports/2-ir-gen-warmup/figs/` 下) + * 不上传临时文件(凡是自动生成的文件和临时文件请不要上传) + * git log 言之有物(不强制,请不要 git commit -m 'commit 1',git commit -m 'sdfsdf',每次 commit 请提交有用的 comment 信息) +* 提交方式: + * 代码提交:本次实验需要在希冀课程平台上发布的作业Lab2-代码提交提交自己仓库的 gitlab 链接(注:由于平台限制,请提交http协议格式的仓库链接。例:学号为 PB011001 的同学,Lab2 的实验仓库地址为`http://202.38.79.174/PB011001/2022fall-compiler_cminus.git`),我们会收集最后一次提交的评测分数,作为最终代码得分。 + * 报告提交:将 Reports/2-ir-gen-warmup/README.md 导出成 pdf 文件单独提交到Lab2-报告提交。 + * 提交异常:如果遇到在平台上提交异常的问题,请通过邮件联系助教,助教将收取截止日期之前,学生在 gitlab 仓库最近一次 commit 内容进行评测。 +* 评分标准: 本次实验的测试样例较为简单,所以为了拿高分请注意 report.md。 + * 1.3节 `.ll` 运行结果正确(一个5分,共20分) + * 2.3节 `.cpp` 运行结果正确(一个10分,共40分) + * `report.md` (40分) + * 禁止执行恶意代码,违者本次实验0分处理 +* 迟交规定 + * `Soft Deadline` : 2021/10/23 23:59:59 (北京标准时间,UTC+8) + * `Hard Deadline` : 2021/10/30 23:59:59 (北京标准时间,UTC+8) + * 迟交需要邮件通知TA: + * 邮箱: wch0925@mail.ustc.edu.cn + * 邮件主题: lab2迟交-学号 + * 内容: 包括迟交原因、最后版本commitID、迟交时间等 + * 迟交分数 + * x为迟交天数(对于 `Soft Deadline` 而言), grade 满分10 + ``` bash + final_grade = grade, x = 0 + final_grade = grade * (0.9)^x, 0 < x <= 7 + final_grade = 0, x > 7 # 这一条严格执行,请对自己负责 + ``` +* 关于抄袭和雷同 + 经过助教和老师判定属于作业抄袭或雷同情况,所有参与方一律零分,不接受任何解释和反驳。 + +如有任何问题,欢迎在论坛提意见进行批判指正。 diff --git a/Documentations/common/LightIR.md b/Documentations/common/LightIR.md new file mode 100644 index 0000000000000000000000000000000000000000..bfc0b673a83554aa0c4f1efea405415db345d24d --- /dev/null +++ b/Documentations/common/LightIR.md @@ -0,0 +1,818 @@ +# Light IR +- [Light IR](#light-ir) + - [Light IR 简介](#lightir-简介) + - [IR 格式](#ir-格式) + - [IR 结构图](#ir-结构图) + - [IR 指令](#ir-指令) + - [指令格式](#指令格式) + - [Terminator Instructions](#terminator-instructions) + - [Ret](#ret) + - [Br](#br) + - [Standard binary operators](#standard-binary-operators) + - [Add FAdd](#add-fadd) + - [Sub FSub](#sub-fsub) + - [Mul FMul](#mul-fmul) + - [SDiv FDiv](#sdiv-fdiv) + - [Memory operators](#memory-operators) + - [Alloca](#alloca) + - [Load](#load) + - [Store](#store) + - [CastInst](#castinst) + - [ZExt](#zext) + - [FpToSi](#fptosi) + - [SiToFp](#sitofp) + - [Other operators](#other-operators) + - [ICmp FCmp](#icmp-fcmp) + - [Call](#call) + - [GetElementPtr](#getelementptr) + - [C++ APIs](#c-apis) + - [C++类关系图](#c类关系图) + - [Module](#module) + - [BasicBlock](#basicblock) + - [GlobalVariable](#globalvariable) + - [Constant](#constant) + - [Argument](#argument) + - [Function](#function) + - [IRBuilder](#irbuilder) + - [Instruction](#instruction) + - [Type](#type) + - [User](#user) + - [Use](#use) + - [Value](#value) + - [注意](#注意) + +## LightIR 简介 +为了让同学们方便理解并掌握 IR 核心技术,我们从复杂繁琐的 LLVM IR 中裁剪出了本课程实验所需要的精简 IR 子集(我们将其命名为 LightIR),并且实现了轻量级的库来进行 IR 的控制和生成。本文档旨在介绍 LightIR 的结构和具体指令,同时可当作 LightIR [C++ 接口](./LightIR.md#c-apis)的参考手册。LightIR 的 [IR 格式](./LightIR.md#ir-格式)和 [IR 指令](./LightIR.md#ir-指令)部分也可以参考 LLVM 官方的 IR 文档 [LLVM Reference Manual](http://llvm.org/docs/LangRef.html)。 + +## IR 格式 + +### IR 结构图 +![image-20201109145323504](figs/image-20201109145323504.png) +我们实验中需要生成的IR代码有着相对固定的结构模式。 + +- 最上层的是模块,可以理解为一个完整编译单元,来源于一个`cminus-f`源文件。模块包含全局变量和函数定义。 +- 函数由头部和函数体组成。函数的头部包括返回值类型、函数名和参数表(每个参数包括类型和值)。函数体可以由多个基本块构成。 +- 基本块是指程序顺序执行的语句序列,只有一个入口和一个出口。基本块由若干指令构成。 +- 注意一个基本块中的**只能有一条终止指令**(Ret/Br)。 + +以下面的`easy.c`与`easy.ll`为例进行说明。 +通过命令`clang -S -emit-llvm easy.c`可以得到对应的`easy.ll`如下(助教增加了额外的注释)。`.ll`文件中注释以`;`开头。 + +- `easy.c`: + ``` c + int main(){ + int a; + int b; + a = 1; + b = 2; + if(a < b) + b = 3; + return a + b; + } + ``` + +- `easy.ll`: + ``` c + ; 注释: .ll文件中注释以';'开头 + ; ModuleID = 'easy.c' + source_filename = "easy.c" + ; 注释: target的开始 + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + ; 注释: target的结束 + + ; 注释: 全局main函数的定义 + ; Function Attrs: noinline nounwind optnone uwtable + define dso_local i32 @main() #0 { + ; 注释: 第一个基本块的开始 + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + store i32 1, i32* %2, align 4 + store i32 2, i32* %3, align 4 + %4 = load i32, i32* %2, align 4 + %5 = load i32, i32* %3, align 4 + %6 = icmp slt i32 %4, %5 + br i1 %6, label %7, label %8 + ; 注释: 第一个基本块的结束 + + ; 注释: 第二个基本块的开始 + 7: ; preds = %0 + store i32 3, i32* %3, align 4 + br label %8 + ; 注释: 第二个基本块的结束 + + ; 注释: 第三个基本块的开始 + 8: ; preds = %7, %0 + %9 = load i32, i32* %2, align 4 + %10 = load i32, i32* %3, align 4 + %11 = add nsw i32 %9, %10 + ret i32 %11 ; 注释: 返回语句 + ; 注释: 第三个基本块的结束 + } + + attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + + !llvm.module.flags = !{!0} + !llvm.ident = !{!1} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{!"clang version 10.0.1 "} + ``` + + 其中,每个程序由一个个或多个模块组成,模块之间由链接器合并(本实验中并不涉及)。 + 每个模块组成如下: +- 目标信息: + ``` c + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + ``` +- 全局符号: 全局变量、函数定义 +- 其它信息 + +每个函数的组成如下: +- 头部:函数返回值类型,函数名,函数参数 +- 一个或多个基本块: + - 每个基本块又有标签和指令组成。 + ``` c + 8: ; preds = %7, %0 + %9 = load i32, i32* %2, align 4 + %10 = load i32, i32* %3, align 4 + %11 = add nsw i32 %9, %10 + ret i32 %11 + ``` + 这个例子中,`8`就是标签。 + `%9 = load i32, i32* %2, align 4`中的`%9`是目的操作数,`load`是指令助记符,`i32`是32位整型,`i32*`是指向`i32`的指针类型,`%2`是源操作数,`align 4`表示4字节对齐。 + +## IR 指令 +- 采用 3 地址的方式 + - 区别于 X86 汇编的目标和源寄存器共用的模式: ADD EAX, EBX + - %2 = add i32 %0, %1 +- SSA 形式 + 无限寄存器 + - 每个变量都只被赋值一次 + - 容易确定操作间的依赖关系,便于优化分析 +- 强类型系统 + - 每个 Value 都具备自身的类型, + - IR类型系统: + - `i1`:1位宽的整数类型 + - `i32`:32位宽的整数类型 + - `float`:单精度浮点数类型 + - `pointer`:指针类型 + - 例如:`i32*, [10 x i32*]` + - `label`: 基本快的标识符类型 + - `functiontype`: 函数类型,包括函数返回值类型与参数类型 + +### 指令格式 + +#### Terminator Instructions +**注**:ret与br都是Terminator Instructions也就是终止指令,在llvm基本块的定义里,基本块是单进单出的,因此只能有一条终止指令(ret或br)。当一个基本块有两条终止指令,clang 在做解析会认为第一个终结指令是此基本块的结束,并会开启一个新的匿名的基本块(并占用了下一个编号)。 +##### Ret +- 概念:返回指令。用于将控制流(以及可选的值)从函数返回给调用者。`ret`指令有两种形式:一种返回值,然后终结函数,另一种仅终结函数。 +- 格式 + - `ret ` + - `ret void` +- 例子: + - `ret i32 %0` + - `ret void` +##### Br +- 概念:跳转指令。用于使控制流转移到当前功能中的另一个基本块。该指令有两种形式,分别对应于条件分支和无条件分支。 +- 格式: + - `br i1 , label , label ` + - `br label ` +- 例子: + - `br i1 %cond label %truebb label %falsebb` + - `br label %bb` +#### Standard binary operators +##### Add FAdd +- 概念:`add`指令返回其两个`i32`类型的操作数之和,返回值为`i32`类型,`fadd`指令返回其两个`float`类型的操作数之和,返回值为`float`类型。 +- 格式: + - ` = add , ` + - ` = fadd , ` +- 例子: + - `%2 = add i32 %1, %0` + - `%2 = fadd float %1, %0` + +##### Sub FSub +- 概念:`sub`指令返回其两个`i32`类型的操作数之差,返回值为`i32`类型,`fsub`指令返回其两个`float`类型的操作数之差,返回值为`float`类型。 +- 格式与例子与`add`,`fadd`类似 + +##### Mul FMul +- 概念:`mul`指令返回其两个`i32`类型的操作数之积,返回值为`i32`类型,`fmul`指令返回其两个`float`类型的操作数之积,返回值为`float`类型。 +- 格式与例子与`add`,`fadd`类似 + +##### SDiv FDiv +- 概念:`sdiv`指令返回其两个`i32`类型的操作数之商,返回值为`i32`类型,`fdiv`指令返回其两个`float`类型的操作数之商,返回值为`float`类型。 +- 格式与例子与`add`,`fadd`类似 + +#### Memory operators +##### Alloca +- 概念: `alloca`指令在当前执行函数的堆栈帧上分配内存,当该函数返回其调用者时将自动释放该内存。 始终在地址空间中为数据布局中指示的分配资源分配对象。 +- 格式:` = alloca ` +- 例子: + - `%ptr = alloca i32` + - `%ptr = alloca [10 x i32]` + +##### Load +- 概念:`load`指令用于从内存中读取。 +- 格式:` = load , * ` +- 例子:`%val = load i32, i32* %ptr` + +##### Store +- 概念:`store`指令用于写入内存。 +- 格式:`store , * ` +- 例子:`store i32 3, i32* %ptr` + +#### CastInst +##### ZExt +- 概念:`zext`指令将其操作数**零**扩展为`type2`类型。 +- 格式:` = zext to ` +- 例子:`%1 = zext i1 %0 to i32` + +##### FpToSi +- 概念:`fptosi`指令将浮点值转换为`type2`(整数)类型。 +- 格式:` = fptosi to ` +- 例子:`%Y = fptosi float 1.0E-247 to i32` + +##### SiToFp +- 概念:`sitofp`指令将有符号整数转换为`type2`(浮点数)类型。 +- 格式:` = sitofp to ` +- 例子:`%X = sitofp i32 257 to float` + +#### Other operators +##### ICmp FCmp +- 概念:`icmp`指令根据两个整数的比较返回布尔值,`fcmp`指令根据两个浮点数的比较返回布尔值。 +- 格式: + - ` = icmp , ` + - ` = eq | ne | sgt | sge | slt | sle` + - ` = fcmp , ` + - ` = eq | ne | ugt | uge | ult | ule` +- 例子:`i1 %2 = icmp sge i32 %0, %1` + +##### Call +- 概念:`call`指令用于使控制流转移到指定的函数,其传入参数绑定到指定的值。 在被调用函数中执行`ret`指令后,控制流程将在函数调用后继续执行该指令,并且该函数的返回值绑定到`result`参数。 +- 格式: + - ` = call () ` +- 例子: + - `%0 = call i32 @func( i32 %1, i32* %0)` + - `call @func( i32 %arg)` + +##### GetElementPtr +- 概念:`getelementptr`指令用于获取数组结构的元素的地址。 它仅执行地址计算,并且不访问内存。 +- 格式:` = getelementptr , * [, ]` +- 参数解释:第一个参数是计算基础类型,第二第三个参数表示索引开始的指针类型及指针,`[]`表示可重复参数,里面表示的数组索引的偏移类型及偏移值。(Question:思考指针类型为`[10 x i32]`指针和`i32`指针`getelementptr`用法的不同,并给出解释,实验结束后回答两者使用情况的区别) +- 例子: + - `%2 = getelementptr [10 x i32], [10 x i32]* %1, i32 0, i32 %0` + - `%2 = getelementptr i32, i32* %1 i32 %0` +- **额外阅读(很重要)**:[The Often Misunderstood GEP Instruction](https://llvm.org/docs/GetElementPtr.html) + +## C++ APIs + +### C++类关系图 + +![类继承关系](figs/cpp_class_inherits.png) + +上面是c++代码的类关系图。 + +- `Module`类表示IR的模块,是IR最上层的结构。 +- `Type`类表示IR的类型,派生出`ArrayType`, `FloatType`, `FunctionType`, `IntegerType`和`PointerType`等分别表示的数组类型、浮点型、函数类型、整数型和指针类型。例如,函数头部的返回值类型用一个`Type`对象表示,函数的参数表中每个参数的类型也是用`Type`对象表示(通常是`Type`的某个子类),而参数的值则是用`Value`对象表示。返回值类型和参数类型一起构成了函数类型,用`FunctionType`对象表示。 +- `Use`类用于描述`Value`的使用情况,`Value`类成员有一个`Use`链表来表示使用其的操作数链表。 +- `Value`类表示值,具体可以是`Argument`, `BasicBlock`, `Function`或`User`每一个`Value`对象都会有一个自己的类型。 + - `Function`类表示函数的定义,其中,成员`std::list arguments_`表示函数的参数(形参)。 + - `BasicBlock`表示基本块。 + - `User`类描述了值的使用情况,例如某个`Instruction`使用了哪些操作数。 + - `Constant`类表示常量,派生出各种类型的常量。 + - `GlobalVariable`类表示全局变量,在IR中位于模块内,与函数并列。 + - `Instruction`类表示基本块中的IR指令,派生出具体的各种指令。 + +下面是对各个类进行具体的介绍。 +### Module + +- 概念:一个编译单元。在 `cminus-f` 意义下表示一个文件。 +
+Module 的定义 (点击此处展开) + +```cpp +class Module +{ +public: + // 将函数f添加到该模块的函数链表上 + // 在函数被创建的时候会自动调用此方法 + void add_function(Function *f); + // 将全局变量g添加到该模块的全局变量链表上 + // 在全局变量被创建的时候会自动调用此方法 + void add_global_variable(GlobalVariable* g); + // 获取全局变量列表 + std::list get_global_variable(); + // 获得(创建)自定义的Pointer类型 + PointerType *get_pointer_type(Type *contained); + // 获得(创建)自定义的Array类型 + ArrayType *get_array_type(Type *contained, unsigned num_elements); + // 获得基本类型int32 + IntegerType *get_int32_type(); + // 其他基本类型类似... +private: + // 存储全局变量的链表 + std::list global_list_; + // 存储函数的链表 + std::list function_list_; + + // 存储基本类型 + IntegerType *int1_ty_; + IntegerType *int32_ty_; + Type *label_ty_; + Type *void_ty_; + FloatType *float32_ty_; + + // 存储自定义类型 + std::map pointer_map_; + std::map, ArrayType *> array_map_; +}; +``` +
+ +### BasicBlock + +- 概念:基本块。是一个是单入口单出口的代码块,可以作为分支指令目标对象。 + +
+BasicBlock 的定义 (点击此处展开) + +```cpp +class BasicBlock : public Value +{ +public: + // 创建并返回基本块,参数分别是基本块所属的模块,基本块名字(默认为空),基本块所属的函数 + static BasicBlock *create(Module *m, const std::string &name, Function *parent); + // 返回该基本块所属的函数 + Function *get_parent(); + // 返回该基本块所属的模块 + Module *get_module(); + // 返回该基本块的终止指令,若基本块的最后一条指令不是终止指令返回则返回 nullptr + Instruction *get_terminator(); + // 将指令 instr 添加到该基本块的指令链表末端,使用 IRBuilder 来创建函数时会自动调用此方法 + void add_instruction(Instruction *instr); + // 将指令 instr 添加到该基本块的指令链表首部 + void add_instr_begin(Instruction *instr); + // 将指令 instr 从该基本块的指令链表中移除,该 API 会同时维护好 instr 的操作数的 use 链表。 + void delete_instr(Instruction *instr); + // 判断该基本块是否为空 + bool empty(); + // 返回该基本块中的指令数目 + int get_num_of_instr(); + // 返回该基本块的指令链表 + std::list &get_instructions(); + // 将该基本块从所属函数的基本块链表中移除 + void erase_from_parent(); + + /****************APIs about cfg****************/ + // 返回前驱基本块集合 + std::list &get_pre_basic_blocks(); + // 返回后继基本块集合 + std::list &get_succ_basic_blocks(); + // 添加前驱基本块 + void add_pre_basic_block(BasicBlock *bb); + // 添加后继基本块 + void add_succ_basic_block(BasicBlock *bb); + // 移除前驱基本块 + void remove_pre_basic_block(BasicBlock *bb); + // 移除后继基本块 + void remove_succ_basic_block(BasicBlock *bb); + /****************APIs about cfg****************/ +private: + // 存储前驱基本块的链表 + std::list pre_bbs_; + // 存储后继基本块的链表 + std::list succ_bbs_; + // 存储该基本块指令的链表 + std::list instr_list_; + // 指向该基本块所属函数的指针 + Function *parent_; + +}; +``` +
+ +### GlobalVariable +- 概念:全局变量。 +
+GlobalVariable 的定义 (点击此处展开) + +```cpp +class GlobalVariable : public User +{ +public: + // 创建一个全局变量 + static GlobalVariable *create(std::string name, Module *m, Type* ty, + bool is_const, Constant* init ); +}; +``` +
+ +### Constant + +- 概念:常量。不同类型的常量由不同类来表示。 + +
+Constant 的定义 (点击此处展开) + +```cpp +class Constant : public User +{ +public: + Constant(Type *ty, const std::string &name = "", unsigned num_ops = 0); +}; +``` +
+ +
+整型常量 ConstantInt 的定义 (点击此处展开) + +```cpp +class ConstantInt : public Constant +{ +private: + // 该常量表示的值 + int value_; +public: + // 返回该常量中存的数 + int get_value(); + // 返回常量 const_val 中存的数 + static int get_value(ConstantInt *const_val); + // 以值 val 来创建常量 + static ConstantInt *get(int val, Module *m); + // 以值 val 来创建 bool 常量 + static ConstantInt *get(bool val, Module *m); +}; +``` +
+ +
+浮点数常量 ConstantFP 的定义 (点击此处展开) + +```cpp +class ConstantFP : public Constant +{ +private: + // 该常量表示的值 + float val_; +public: + // 以值 val 创建并返回浮点数常量 + static ConstantFP *get(float val, Module *m); + // 返回该常量中存的值 + float get_value(); +}; + +``` +
+ +
+ConstantZero 的定义 (点击此处展开) + +```cpp +// 用于全局变量初始化的零常量 +class ConstantZero : public Constant +{ +public: + // 创建并返回零常量 + static ConstantZero *get(Type *ty, Module *m); +}; +``` +
+ +### Argument + +- 概念:函数的参数。 + +
+Argument 的定义 (点击此处展开) + +```cpp +class Argument : public Value +{ +public: + // 返回该参数的所属的函数 + Function *get_parent(); + // 返回该参数在所在函数的参数列表中的序数 + unsigned get_arg_no() const; +private: + // 指向该参数的所属的函数的指针 + Function *parent_; + // 该参数在所在函数的参数列表中的序数 + unsigned arg_no_; +}; +``` +
+ +### Function +- 概念:函数。该类描述了一个过程,包含多个基本块。 + +
+Funtion 的定义 (点击此处展开) + +```cpp +class Function : public Value +{ +public: + // 创建并返回函数,参数依次是待创建函数类型 ty,函数名字 name (不可为空),函数所属的模块 parent + static Function *create(FunctionType *ty, const std::string &name, Module *parent); + // 返回该函数的函数类型 + FunctionType *get_function_type() const; + // 返回该函数的返回值类型 + Type *get_return_type() const; + // 将基本块 bb 添加至该函数末端(调用基本块的创建函数时会自动调用此函数来) + void add_basic_block(BasicBlock *bb); + // 得到该函数参数数量 + unsigned get_num_of_args() const; + // 得到该函数基本块数量 + unsigned get_num_basic_blocks() const; + // 得到该函数所属的Module + Module *get_parent() const; + // 得到该函数参数列表的起始迭代器 + std::list::iterator arg_begin() + // 得到该函数参数列表的终止迭代器 + std::list::iterator arg_end() + // 从函数的基本块链表中删除基本块 bb + void remove(BasicBlock* bb) + // 返回函数基本块链表 + std::list &get_basic_blocks() + // 返回函数的参数链表 + std::list &get_args() + // 给函数中未命名的基本块和指令命名 + void set_instr_name(); +private: + // 储存基本块的链表 + std::list basic_blocks_; + // 储存参数的链表 + std::list arguments_; + // 指向该函数所属的模块的指针 + Module *parent_; +}; +``` +
+ +### IRBuilder +- 概念:生成IR的辅助类。该类提供了独立的接口创建各种 IR 指令,并将它们插入基本块中(注意:该辅助类不做任何类型检查)。 + +
+IRBuilder 的定义 (点击此处展开) + +```cpp +class IRBuilder { +private: + // 该辅助类正在插入的基本块 + BasicBlock *BB_; + // 该辅助类绑定的模块 + Module *m_; +public: + // 返回当前插入的基本块 + BasicBlock *get_insert_block() + // 设置当前插入的基本块 + void set_insert_point(BasicBlock *bb) + // 创建的指令并对应插入到基本块中,函数名字和参数名字和IR文档是一一对应的 + // 具体查看 IRBuilder.h + Instruction *create_[instr_type](); +}; +``` +
+ +### Instruction + +- 概念:指令。该类是所有 LLVM 指令的基类。子类包含IR部分中的所有指令。 + +
+Instruction 的定义 (点击此处展开) + +```c++ +class Instruction : public User +{ +private: + // 该指令所属的基本块 + BasicBlock *parent_; + // 该指令的类型id + OpID op_id_; + // 该指令的操作数个数 + unsigned num_ops_; +public: + // 所有指令的创建都要通过IRBuilder进行,暂不需要关注Instruction类的实现细节 + //(注:不通过IRBuilder来创建指令,而直接调用指令子类的创建方法未经助教完善的测试) +}; +``` + +
+ +### Type + +- 概念:IR的类型(包含VoidType、LabelType、FloatType、IntegerType、ArrayType、PointerType)。module中可以通过API获得基本类型,并创建自定义类型。 +- 子类介绍:其中ArrayType、PointerType可以嵌套得到自定义类型,而VoidType、 IntegerType,FloatType可看做IR的基本类型,LabelType是BasicBlcok的类型,可作为跳转指令的参数,FunctionType表示函数类型。其中VoidType与LabelType 没有对应的子类,通过Type中的tid_字段判别,而其他类型均有对应子类 + +
+Type 的定义 (点击此处展开) + +```c++ +class Type { +private: + // 类型种类 + TypeID tid_; + Module *m_; +public: + // 判断是否是ty类型 + bool is_[ty]_type(); + // 获得ty类型 + static Type *get_[ty]_type(Module *m) + // 若是PointerType则返回指向的类型,若不是则返回nullptr。 + Type *get_pointer_element_type(); + // 若是ArrayType则返回数组元素的类型,若不是则返回nullptr。 + Type *get_array_element_type(); +}; +``` + +
+
+IntegerType 的定义 (点击此处展开) + +```c++ +class IntegerType : public Type { +public: + explicit IntegerType(unsigned num_bits ,Module *m); + // 创建 IntegerType 类型,IntegerType 包含 int1 与 int32 + static IntegerType *get(unsigned num_bits, Module *m ); + // 获得 IntegerType 类型的长度 + unsigned get_num_bits(); +private: + // 表示 IntegerType 类型的长度 + unsigned num_bits_; +}; +``` + +
+ +
+FloatType 的定义 (点击此处展开) + +```c++ +class FloatType : public Type { +public: + FloatType (Module *m); + // 创建 FloatType 类型 + static FloatType *get(Module *m); +}; +``` + +
+ +
+数组类型 ArrayType 的定义 (点击此处展开) + +```c++ +class ArrayType : public Type { +public: + // 判断数组元素类型是否合法 + static bool is_valid_element_type(Type *ty); + // 通过数组元素类型contained以及数组长度num_elements创建数组类型 + static ArrayType *get(Type *contained, unsigned num_elements); + // 得到该数组类型的元素类型 + Type *get_element_type() const; + // 获得该数组类型的长度 + unsigned get_num_of_elements(); +private: + // 数组元素类型 + Type *contained_; + // 数组长度 + unsigned num_elements_; +}; +``` +
+ +
+指针类型 PointerType 的定义 (点击此处展开) + +```c++ +class PointerType : public Type { +public: + // 获取该指针类型指向的元素类型 + Type *get_element_type() const; + // 创建指向类型为contained的指针类型 + static PointerType *get(Type *contained); +private: + // 记录指针指向的类型 + Type *contained_; +}; +``` +
+ +
+函数类型 FunctionType 的定义 (点击此处展开) + +```c++ +class FunctionType : public Type { +public: + // 判断返回值类型是否合法 + static bool is_valid_return_type(Type *ty); + // 判断参数类型是否合法 + static bool is_valid_argument_type(Type *ty); + // 根据返回值类型result,参数类型列表params创建函数类型 + static FunctionType *get(Type *result, std::vector params); + // 返回该函数类型的参数个数 + unsigned get_num_of_args() const; + // 获得该函数类型第i个参数的类型 + Type *get_param_type(unsigned i) const; + // 获得该函数类型的参数类型链表的起始迭代器 + std::vector::iterator param_begin(); + // 获得该函数类型的参数类型链表的结束迭代器 + std::vector::iterator param_end(); + // 获得该函数类型的返回值类型 + Type *get_return_type() const; +private: + // 返回值的类型 + Type *result_; + // 存储该函数类型的参数类型的链表 + std::vector args_; +} +``` +
+ +### User +- 概念:使用者。维护了 use-def 信息,表示该使用者用了哪些值。 + +
+User 的定义 (点击此处展开) + +```cpp +class User : public Value +{ +public: + // 从该使用者的操作数链表中取出第i个操作数 + Value *get_operand(unsigned i) const; + // 将该使用者的第i个操作数设为值v + void set_operand(unsigned i, Value *v); + // 将值v添加到该使用者的操作数链表上 + void add_operand(Value *v); + // 得到操作数链表的大小 + unsigned get_num_operand() const; + // 从该使用者的操作数链表中的所有操作数的使用情况中移除该使用者 + void remove_use_of_ops(); + // 移除操作数链表中索引为index1到index2的操作数 + // 例如想删除第0个操作数:remove_operands(0,0) + void remove_operands(int index1,int index2); +private: + // 参数列表,表示该使用者用到的参数 + std::vector operands_; + // 该使用者使用的参数个数 + unsigned num_ops_; +}; + +``` +
+ +### Use +- 概念:代表了值的使用情况。 + +
+Use 的定义 (点击此处展开) + +```cpp +struct Use +{ + // 使用者 + Value *val_; + // 使用者中值的序数 + // 对于 func(a, b),a为0,b为1 + unsigned arg_no_; +}; +``` +
+ +### Value +- 概念:值。代表一个可能用于指令操作数的带类型数据,是最基础的类,维护了 def-use 信息,即该值被哪些使用者使用。 + +
+Value 的定义 (点击此处展开) + +```cpp +class Value +{ +public: + // 获取该值的类型 + Type *get_type() const; + + // 取得该值的使用情况 + std::list &get_use_list() { return use_list_; } + // 添加加该值的使用情况 + void add_use(Value *val, unsigned arg_no = 0); + // 在所有地方将该值用新的值 new_val 替换,并维护好use_def和def_use链表 + void replace_all_use_with(Value *new_val); + // 将值 val 从使用链表中移除 + void remove_use(Value *val); +private: + // 值的类型 + Type *type_; + // 储存了该值的使用情况的链表 + std::list use_list_; +}; +``` +
+ +### 注意 + +助教在接口文档里筛选了可能会需要用到的接口,如果对API有问题的请移步issue讨论,本次`lightir`接口由助教自行设计实现,并做了大量测试,如有对助教的实现方法有异议或者建议的也请移步issue讨论,**请不要直接修改助教的代码,若因修改助教代码造成后续实验仓库合并的冲突请自行解决**。 diff --git a/Documentations/common/cminusf.md b/Documentations/common/cminusf.md new file mode 100644 index 0000000000000000000000000000000000000000..232f15b71c6fc3b8780f355c20a62000e14b78fc --- /dev/null +++ b/Documentations/common/cminusf.md @@ -0,0 +1,189 @@ +# cminus-f 的语法与语义 + +## cminus-f 的语法 + +1. $`\text{program} \rightarrow \text{declaration-list}`$ +2. $`\text{declaration-list} \rightarrow \text{declaration-list}\ \text{declaration}\ |\ \text{declaration}`$ +3. $`\text{declaration} \rightarrow \text{var-declaration}\ |\ \text{fun-declaration}`$ +4. $`\text{var-declaration}\ \rightarrow \text{type-specifier}\ \textbf{ID}\ \textbf{;}\ |\ \text{type-specifier}\ \textbf{ID}\ \textbf{[}\ \textbf{INTEGER}\ \textbf{]}\ \textbf{;}`$ +5. $`\text{type-specifier} \rightarrow \textbf{int}\ |\ \textbf{float}\ |\ \textbf{void}`$ +6. $`\text{fun-declaration} \rightarrow \text{type-specifier}\ \textbf{ID}\ \textbf{(}\ \text{params}\ \textbf{)}\ \text{compound-stmt}`$ +7. $`\text{params} \rightarrow \text{param-list}\ |\ \textbf{void}`$ +8. $`\text{param-list} \rightarrow \text{param-list}\ ,\ \text{param}\ |\ \text{param}`$ +9. $`\text{param} \rightarrow \text{type-specifier}\ \textbf{ID}\ |\ \text{type-specifier}\ \textbf{ID}\ \textbf{[]}`$ +10. $`\text{compound-stmt} \rightarrow \textbf{\{}\ \text{local-declarations}\ \text{statement-list} \textbf{\}}`$ +11. $`\text{local-declarations} \rightarrow \text{local-declarations var-declaration}\ |\ \text{empty}`$ +12. $`\text{statement-list} \rightarrow \text{statement-list}\ \text{statement}\ |\ \text{empty}`$ +13. $`\begin{aligned}\text{statement} \rightarrow\ &\text{expression-stmt}\\ &|\ \text{compound-stmt}\\ &|\ \text{selection-stmt}\\ &|\ \text{iteration-stmt}\\ &|\ \text{return-stmt}\end{aligned}`$ +14. $`\text{expression-stmt} \rightarrow \text{expression}\ \textbf{;}\ |\ \textbf{;}`$ +15. $`\begin{aligned}\text{selection-stmt} \rightarrow\ &\textbf{if}\ \textbf{(}\ \text{expression}\ \textbf{)}\ \text{statement}\\ &|\ \textbf{if}\ \textbf{(}\ \text{expression}\ \textbf{)}\ \text{statement}\ \textbf{else}\ \text{statement}\end{aligned}`$ +16. $`\text{iteration-stmt} \rightarrow \textbf{while}\ \textbf{(}\ \text{expression}\ \textbf{)}\ \text{statement}`$ +17. $`\text{return-stmt} \rightarrow \textbf{return}\ \textbf{;}\ |\ \textbf{return}\ \text{expression}\ \textbf{;}`$ +18. $`\text{expression} \rightarrow \text{var}\ \textbf{=}\ \text{expression}\ |\ \text{simple-expression}`$ +19. $`\text{var} \rightarrow \textbf{ID}\ |\ \textbf{ID}\ \textbf{[}\ \text{expression} \textbf{]}`$ +20. $`\text{simple-expression} \rightarrow \text{additive-expression}\ \text{relop}\ \text{additive-expression}\ |\ \text{additive-expression}`$ +21. $`\text{relop}\ \rightarrow \textbf{<=}\ |\ \textbf{<}\ |\ \textbf{>}\ |\ \textbf{>=}\ |\ \textbf{==}\ |\ \textbf{!=}`$ +22. $`\text{additive-expression} \rightarrow \text{additive-expression}\ \text{addop}\ \text{term}\ |\ \text{term}`$ +23. $`\text{addop} \rightarrow \textbf{+}\ |\ \textbf{-}`$ +24. $`\text{term} \rightarrow \text{term}\ \text{mulop}\ \text{factor}\ |\ \text{factor}`$ +25. $`\text{mulop} \rightarrow \textbf{*}\ |\ \textbf{/}`$ +26. $`\text{factor} \rightarrow \textbf{(}\ \text{expression}\ \textbf{)}\ |\ \text{var}\ |\ \text{call}\ |\ \text{integer}\ |\ \text{float}`$ +27. $`\text{integer} \rightarrow \textbf{INTEGER}`$ +28. $`\text{float} \rightarrow \textbf{FLOATPOINT}`$ +29. $`\text{call} \rightarrow \textbf{ID}\ \textbf{(}\ \text{args} \textbf{)}`$ +30. $`\text{args} \rightarrow \text{arg-list}\ |\ \text{empty}`$ +31. $`\text{arg-list} \rightarrow \text{arg-list}\ \textbf{,}\ \text{expression}\ |\ \text{expression}`$ + + +## cminus-f 的语义 + +在上述语法规则中,我们定义了 `cminus-f` 语言的语法,接着,我们对照语法规则,给出相关的语义和解释。 +在阅读前,需要理解 `cminus-f` 主要源自于 C 语言,因此它的行为都会接近 C 语言。 + +1. $`\text{program} \rightarrow \text{declaration-list}`$ +2. $`\text{declaration-list} \rightarrow \text{declaration-list}\ \text{declaration}\ |\ \text{declaration}`$ +3. $`\text{declaration} \rightarrow \text{var-declaration}\ |\ \text{fun-declaration}`$ + + 一个`程序`由一系列`声明`组成,声明包括了`函数声明`与`变量声明`,它们可以以任意顺序排列。 + + 全局变量需要初始化为全 0 + + 所有的变量必须在使用前先进行声明,所有的函数必须在使用前先进行定义 + + 一个`程序`中至少要有一个`main`函数的`声明` + + 因为没有原型这个概念, `cminus-f` 不区分函数的声明和定义。 + +4. $`\text{var-declaration}\ \rightarrow \text{type-specifier}\ \textbf{ID}\ \textbf{;}\ |\ \text{type-specifier}\ \textbf{ID}\ \textbf{[}\ \textbf{INTEGER}\ \textbf{]}\ \textbf{;}`$ +5. $`\text{type-specifier} \rightarrow \textbf{int}\ |\ \textbf{float}\ |\ \textbf{void}`$ + + `cminus-f` 的基础类型只有整型(`int`)、浮点型(`float`)和 `void`。而在变量声明中,只有整型和浮点型可以使用,`void` 仅用于函数声明。 + + 一个`变量声明`定义一个整型或者浮点型的变量,或者一个整型或浮点型的数组变量(这里整型指的是32位有符号整型,浮点数是指32位浮点数)。 + + 数组变量在声明时,$`\textbf{INTEGER}`$ 应当大于0。 + + 一次只能声明一个变量。 + +6. $`\text{fun-declaration} \rightarrow \text{type-specifier}\ \textbf{ID}\ \textbf{(}\ \text{params}\ \textbf{)}\ \text{compound-stmt}`$ +7. $`\text{params} \rightarrow \text{param-list}\ |\ \textbf{void}`$ +8. $`\text{param-list} \rightarrow \text{param-list}\ ,\ \text{param}\ |\ \text{param}`$ +9. $`\text{param} \rightarrow \text{type-specifier}\ \textbf{ID}\ |\ \text{type-specifier}\ \textbf{ID}\ \textbf{[]}`$ + + `函数声明`包含了返回类型,标识符,由逗号分隔的`形参`列表,还有一个`复合语句`。 + + 当函数的返回类型是 `void` 时,函数不返回任何值。 + + 函数的参数可以是 `void` ,也可以是一个列表。当函数的`形参`是`void`时,调用该函数时不用传入任何参数。 + + `形参`中跟着中括号代表数组参数,它们可以有不同长度。 + + 整型参数通过值来传入函数(pass by value),而数组参数通过引用来传入函数(pass by reference,即指针)。 + + 函数的`形参`拥有和`函数声明`的`复合语句`相同的作用域,并且每次函数调用都会产生一组独立内存的参数。(和C语言一致) + + 函数可以递归调用。 + +10. $`\text{compound-stmt} \rightarrow \textbf{\{}\ \text{local-declarations}\ \text{statement-list} \textbf{\}}`$ + + 一个`复合语句`由一对大括号和其中的`局部声明`与`语句列表`组成 + + `复合语句`的执行时,对包含着的语句按照`语句列表`中的顺序执行 + + `局部声明`拥有和`复合语句`中的`语句列表`一样的作用域,且其优先级高于任何同名的全局声明(常见的静态作用域) + +11. $`\text{local-declarations} \rightarrow \text{local-declarations var-declaration}\ |\ \text{empty}`$ +12. $`\text{statement-list} \rightarrow \text{statement-list}\ \text{statement}\ |\ \text{empty}`$ + + `局部声明`和`语句列表`都可以为空(empty表示空字符串,即$`\varepsilon`$) + +13. $`\begin{aligned}\text{statement} \rightarrow\ &\text{expression-stmt}\\ &|\ \text{compound-stmt}\\ &|\ \text{selection-stmt}\\ &|\ \text{iteration-stmt}\\ &|\ \text{return-stmt}\end{aligned}`$ +14. $`\text{expression-stmt} \rightarrow \text{expression}\ \textbf{;}\ |\ \textbf{;}`$ + + `表达式语句`由一个可选的`表达式`(即可以没有`表达式`)和一个分号组成 + + 我们通常使用`表达式语句`中的`表达式`计算时产生的副作用,所以这种`语句`用于赋值和函数调用 + +15. $`\begin{aligned}\text{selection-stmt} \rightarrow\ &\textbf{if}\ \textbf{(}\ \text{expression}\ \textbf{)}\ \text{statement}\\ &|\ \textbf{if}\ \textbf{(}\ \text{expression}\ \textbf{)}\ \text{statement}\ \textbf{else}\ \text{statement}\end{aligned}`$ + + `if`语句中的`表达式`将被求值,若结果的值等于0,则第二个`语句`执行(如果存在的话),否则第一个`语句`会执行。 + + 为了避免歧义,$`\textbf{else}`$将会匹配最近的$`\textbf{if}`$ + +16. $`\text{iteration-stmt} \rightarrow \textbf{while}\ \textbf{(}\ \text{expression}\ \textbf{)}\ \text{statement}`$ + + `while`语句是 `cminus-f` 中唯一的`迭代语句`。它执行时,会不断对`表达式`进行求值,并且在对`表达式`的求值结果等于 0 前,循环执行执下面的`语句` + +17. $`\text{return-stmt} \rightarrow \textbf{return}\ \textbf{;}\ |\ \textbf{return}\ \text{expression}\ \textbf{;}`$ + + `return`语句可以返回值,也可以不返回值。 + + 未声明为$`\textbf{void}`$类型的函数必须返回和函数返回类型相同的值 + + `return`会将程序的控制转移给当前函数的调用者,而$`\textbf{main}`$函数的`return`会使得程序终止 + +18. $`\text{expression} \rightarrow \text{var}\ \textbf{=}\ \text{expression}\ |\ \text{simple-expression}`$ +19. $`\text{var} \rightarrow \textbf{ID}\ |\ \textbf{ID}\ \textbf{[}\ \text{expression} \textbf{]}`$ + + 一个`表达式`可以是一个变量引用(即`var`)接着一个赋值符号(=)以及一个表达式,也可以是一个`简单表达式`。 + + `var` 可以是一个整型变量、浮点变量,或者一个取了下标的数组变量。 + + 数组的下标值为整型,作为数组下标值的表达式计算结果可能需要类型转换变成整型值 + + 一个负的下标会导致程序终止,需要调用框架中的内置函数`neg_idx_except` (该内部函数会主动退出程序,只需要调用该函数即可),但是对于上界并不做检查。 + + 赋值语义为:先找到 `var` 代表的变量地址(如果是数组,需要先对下标表达式求值),然后对右侧的表达式进行求值,求值结果将在转换成变量类型后存储在先前找到的地址中。同时,存储在 `var` 中的值将作为赋值表达式的求值结果。 + + 在 `C` 中,赋值对象(即 `var` )必须是左值,而左值可以通过多种方式获得。`cminus-f`中,唯一的左值就是通过 `var` 的语法得到的,因此 `cminus-f` 通过语法限制了 `var` 为左值,而不是像 `C` 中一样通过类型检查,这也是为什么 `cminus-f` 中不允许进行指针算数。 + +20. $`\text{simple-expression} \rightarrow \text{additive-expression}\ \text{relop}\ \text{additive-expression}\ |\ \text{additive-expression}`$ +21. $`\text{relop}\ \rightarrow \textbf{<=}\ |\ \textbf{<}\ |\ \textbf{>}\ |\ \textbf{>=}\ |\ \textbf{==}\ |\ \textbf{!=}`$ +22. $`\text{additive-expression} \rightarrow \text{additive-expression}\ \text{addop}\ \text{term}\ |\ \text{term}`$ +23. $`\text{addop} \rightarrow \textbf{+}\ |\ \textbf{-}`$ +24. $`\text{term} \rightarrow \text{term}\ \text{mulop}\ \text{factor}\ |\ \text{factor}`$ +25. $`\text{mulop} \rightarrow \textbf{*}\ |\ \textbf{/}`$ + + 一个`简单表达式`是一个`加法表达式`或者两个`加法表达式`的关系运算。当它是`加法表达式`时,它的值就是`加法表达式`的值。而当它是关系运算时,如果关系运算结果为真则值为整型值 1,反之则值为整型值 0。 + + `加法表达式`表现出了四则运算的结合性质与优先级顺序,四则运算的含义和`C`中的整型运算一致。 + + 浮点数和整型一起运算时,整型值需要进行类型提升,转换成浮点数类型,且运算结果也是浮点数类型 + +26. $`\text{factor} \rightarrow \textbf{(}\ \text{expression}\ \textbf{)}\ |\ \text{var}\ |\ \text{call}\ |\ \text{integer}\ |\ \text{float}`$ + + `因数`可以是一个括号包围的`表达式`(此时它的值是`表达式`的值),或者是一个`变量`(此时它的值是`变量`的值),或者是一个`函数调用`(此时它的值是`函数调用`的返回值),或者是一个`数字字面量`(此时它的值为该字面量的值)。当`因数`是数组变量时,除非此时它被用作一个`函数调用`中的数组参数,否则它必须要带有下标。 + +27. $`\text{integer} \rightarrow \textbf{INTEGER}`$ +28. $`\text{float} \rightarrow \textbf{FLOATPOINT}`$ +29. $`\text{call} \rightarrow \textbf{ID}\ \textbf{(}\ \text{args} \textbf{)}`$ +30. $`\text{args} \rightarrow \text{arg-list}\ |\ \text{empty}`$ +31. $`\text{arg-list} \rightarrow \text{arg-list}\ \textbf{,}\ \text{expression}\ |\ \text{expression}`$ + +`函数调用`由一个函数的`标识符`与一组括号包围的`实参`组成。`实参`可以为空,也可以是由逗号分隔的的`表达式`组成的列表,这些表达式代表着函数调用时,传给`形参`的值。`函数调用时`的`实参`数量和类型必须与`函数声明`中的`形参`一致,必要时需要进行类型转换。 + +`cminus-f`中包含四个预定义的函数 `input` 、 `output`、 `outputFloat` 和 `neg_idx_except`,它们的声明为: + +```c +int input(void) {...} +void output(int x) {...} +void outputFloat(float x) {...} +void neg_idx_except(void) {...} +``` + +* `input` 函数没有形参,且返回一个从标准输入中读到的整型值。 +* `output` 函数接受一个整型参数,然后将它的值打印到标准输出,并输出换行符。 +* `outputFloat` 函数接受一个浮点参数,然后将它的值打印到标准输出,并输出换行符。 +* `neg_idx_except` 函数没有形参,执行后报错并退出 + + +除此之外,其它规则和 C 中类似,比如同一个作用域下不允许定义重名变量或函数(本次实验中不做要求) + +## 提醒与补充 +1. 本次实验存在五种情况下的类型转换 + * 赋值时 + * 返回值类型和函数签名中的返回类型不一致时 + * 函数调用时实参和函数签名中的形参类型不一致时 + * 二元运算的两个参数类型不一致时 + * 下标计算时 +2. 如果对上述的语义有疑问可以通过发issue的方式进行交流(当然,我们推荐组内先进行讨论)。 diff --git a/Documentations/common/figs/cpp_class_inherits.png b/Documentations/common/figs/cpp_class_inherits.png new file mode 100644 index 0000000000000000000000000000000000000000..5f0d20a60e9da3331c5fbc998486753285f2a94a Binary files /dev/null and b/Documentations/common/figs/cpp_class_inherits.png differ diff --git a/Documentations/common/figs/image-20201109145323504.png b/Documentations/common/figs/image-20201109145323504.png new file mode 100644 index 0000000000000000000000000000000000000000..f2596753c044b8f88a7ab40d092588428c9bb3e0 Binary files /dev/null and b/Documentations/common/figs/image-20201109145323504.png differ diff --git a/Documentations/common/logging.md b/Documentations/common/logging.md new file mode 100644 index 0000000000000000000000000000000000000000..fbcb8ac6d655f4c777e9875e4f29f371e28d9b25 --- /dev/null +++ b/Documentations/common/logging.md @@ -0,0 +1,45 @@ +# logging 工具使用 + +## 介绍 +为了方便同学们在之后的实验中 debug,为大家设计了一个C++简单实用的分级日志工具。该工具将日志输出信息从低到高分成四种等级:`DEBUG`,`INFO`,`WARNING`,`ERROR`。通过设定环境变量`LOGV`的值,来选择输出哪些等级的日志。`LOGV`的取值是**0~3**,分别对应到上述的4种级别(`0:DEBUG`,`1:INFO`,`2:WARNING`,`3:ERROR`)。此外输出中还会包含打印该日志的代码所在位置。 + +## 使用 +项目编译好之后,可以在`build`目录下运行`test_logging`,该文件的源代码在`tests/test_logging.cpp`。用法如下: +```cpp +#include "logging.hpp" +// 引入头文件 +int main(){ + LOG(DEBUG) << "This is DEBUG log item."; + // 使用关键字LOG,括号中填入要输出的日志等级 + // 紧接着就是<<以及日志的具体信息,就跟使用std::cout一样 + LOG(INFO) << "This is INFO log item"; + LOG(WARNING) << "This is WARNING log item"; + LOG(ERROR) << "This is ERROR log item"; + return 0; +} +``` + +接着在运行该程序的时候,设定环境变量`LOGV=0`,那么程序就会输出级别**大于等于0**日志信息: +```bash +user@user:${ProjectDir}/build$ LOGV=0 ./test_logging +[DEBUG] (test_logging.cpp:5L main)This is DEBUG log item. +[INFO] (test_logging.cpp:6L main)This is INFO log item +[WARNING] (test_logging.cpp:7L main)This is WARNING log item +[ERROR] (test_logging.cpp:8L main)This is ERROR log item +``` +输出中除了包含日志级别和用户想打印的信息,在圆括号中还包含了打印该信息代码的具体位置(包括文件名称、所在行、所在函数名称),可以很方便地定位到出问题的地方。 + +假如我们觉得程序已经没有问题了,不想看那么多的DEBUG信息,那么我们就可以设定环境变量`LOGV=1`,选择只看**级别大于等于1**的日志信息: +```bash +user@user:${ProjectDir}/build$ LOGV=0 ./test_logging +[INFO] (test_logging.cpp:6L main)This is INFO log item +[WARNING] (test_logging.cpp:7L main)This is WARNING log item +[ERROR] (test_logging.cpp:8L main)This is ERROR log item +``` +当然`LOGV`值越大,日志的信息将更加简略。如果没有设定`LOGV`的环境变量,将默认不输出任何信息。 + +这里再附带一个小技巧,如果日志内容多,在终端观看体验较差,可以输入以下命令将日志输出到文件中: +``` +user@user:${ProjectDir}/build$ LOGV=0 ./test_logging > log +``` +然后就可以输出到文件名为log的文件中啦~ diff --git a/Documentations/common/simple_cpp.md b/Documentations/common/simple_cpp.md new file mode 100644 index 0000000000000000000000000000000000000000..60a07e2dccfeead09e7e8aa2b522d6b78bd3caad --- /dev/null +++ b/Documentations/common/simple_cpp.md @@ -0,0 +1,108 @@ +# CPP 简介 + +C++是一门面向对象的语言,从名字可以看出,C++从C中获得了许多灵感。但是随着C++的发展,它和C的差异也越来越大,一个典型的例子是C是弱类型的语言,而C++越来越支持强类型。因此我们不能直接认为C++兼容C,而应该先了解二者的区别。好在本次实验并不需要你们使用高级的C++特性,所以在此简单介绍一下部分特性便于理解。如果对C++有更深的兴趣,可以从Milo Yip的[游戏程序员的学习之路](https://github.com/miloyip/game-programmer/blob/master/game-programmer-zh-cn.jpg?raw=true)的C++部分开始看。 + +注:本介绍假设你有基本的C语言认知(略高于程设课标准),如果有不懂的C语言术语建议去搜索一下 + +## class + +class是C++面向对象的基础,它相当于对C中的结构体的扩展。除了保留了原来的结构体成员(即成员对象),它增加了成员函数、访问控制、继承和多态等。 + +假设某类为`Animal`,一个它的实例为`animal`,我们可以在`Animal`的定义中增加函数声明`void eat();`,这样声明的函数即是成员函数。成员函数的作用域中自带一个`Animal*`类型的指针`this`,指向调用该成员函数的实例。我们可以通过`animal.eat()`一样,用类似访问成员的方法访问该函数。 + +```cpp +// 注:C++中struct也会定义结构体,只是访问控制的默认选项有所区别 +struct Animal { + void eat() +} +``` + +类的访问控制指的是在定义class时,可以用`public`与`private`标签,指定接下来的成员是私有或是公开成员。公开成员可以在外部函数使用该类的实例时访问,而内部成员只有该类的成员函数能访问。访问控制的作用是对使用者隐藏实现的细节,而关注于设计者想要公开的接口,从而让使用者能更容易理解如何使用该类。详细介绍在[access specifiers](https://en.cppreference.com/w/cpp/language/access)。 + +类的继承是一种面向对象语言常用的代码复用方法,也是一种非常直观的抽象方式。我们可以定义`struct Cat : Animal`来声明`Cat`类是`Animal`类的子类,也就是`Cat`继承了`Animal`类。此时,新的`Cat`类从`Animal`类中继承了`void eat();`成员函数,并且可以在此之上定义额外的成员函数`void nyan()`。同理,我们也可以定义`struct Dog : Animal`来定义`Dog`类。 +```cpp +struct Cat : Animal { + // 从Animal中继承了void eat(); + void nyan() +}; + +struct Dog : Animal { + // 从Animal中继承了void eat(); + void wang() +}; +``` +我们可以通过合理的继承结构来将函数定义在合适的位置,使得大部分通用函数可以共享。 + +同学们可能会想到同是`Animal`,`Cat`和`Dog`可能会有相同名称与参数的函数,但是却有着不同的实现,这时我们就要用到虚函数了。子类中可以定义虚函数的实现,从而使得不同子类对于同一个名字的成员函数有不同实现。虚函数在调用时会通过虚函数表查找到对应的函数实现,而不是和普通类一样查找对应类型的函数实现。 +```cpp +struct Animal { + // = 0 表示该虚函数在Animal类中没有实现 + virtual void say() = 0; +}; + +struct Cat : Animal { + // override表示覆盖父函数中的实现,下同 + void say() override { + std::cout << "I'm a cat" << std::endl; + } +}; + +struct Dog : Animal { + void say() override{ + std::cout << "I'm a dog" << std::endl; + } +}; + +// 试一试 +int main() { + Cat c; + Dog d; + Animal* a; + c.say(); + d.say(); + a = &c; + a->say(); + a = &d; + a->say(); + return 0; +} +``` + +## 函数 + +C++中的函数可以重载,即可以有同名函数,但是要求它们的形参必须不同。如果想进一步了解,可以阅读[详细规则](https://en.cppreference.com/w/cpp/language/overload_resolution)。下面是函数重载的示例: + +```cpp +struct Point { + int x; + int y; +}; + +struct Line { + Point first; + Point second; +}; + +void print(Point p) { + printf("(%d, %d)", p.x, p.y); +} + +void print(Line s) { + print(s.first) // s.first == Point { ... } + printf("->"); + print(s.second) // s.second == Point { ... } +} +``` +我们可以看到上面的示例定义了两个`print`函数,并且它们的参数列表的类型不同。它们实际上是两个不同的函数(并且拥有不同的内部名字),但是C++能够正确的识别函数调用时使用了哪一个定义(前提是你正确使用了这一特性),并且在编译时就会链接上正确的实现。我们可以看到,这种特性非常符合人的直觉,并且没有带来额外开销。 + +## 泛型 + +不同于C中使用void指针来实现泛型函数(如`qsort`),C++中使用模板来帮助定义泛型类型与泛型函数等。由于模板过于复杂,这里不做深入介绍。这里你们需要理解的是,C++中的模板定义正如其名,在实例化前只是一个模板而不是参与编译的代码。只有在你使用的过程中指定了参数,编译器才会自动根据模板产生相应的代码,也就是实例化该参数对应的代码。比如`std::vector`是C++中常用的数组容器,在使用时必须指定参数,如果要实例化`int`类型的数组容器,必须要使用`std::vector`。 + +## 内存分配 + +C中,只能使用标准库中的`malloc`与`free`来进行内存分配,并且需要手动在内存上初始化类型。C++中增加了`new`与`delete`关键字,你可以使用`new classname(params)`的完成申请一块内存,利用构造函数(`classname(params)`即代表调用`classname`类型的一个构造函数)来完成内存初始化。而`delete variable`可以调用变量对应类型函数的析构函数来完成数据结构的清理和回收内存。但是它存在着和C一样的二次回收导致报错或忘记回收导致内存泄漏的问题。于是C++11引入了许多智能指针类型,本实验中用到的有两种,分别是: + +1. `std::shared_ptr`: 引用计数智能指针,使用一个共享变量来记录指针管理的对象被引用了几次。当对象引用计数为0时,说明当前该对象不再有引用,并且进程也无法再通过其它方式来引用它,也就意味着可以回收内存,这相当于低级的垃圾回收策略。 +2. `std::unique_ptr`: 表示所有权的智能指针,该指针要求它所管理的对象智能有一次引用,主要用于语义上不允许共享的对象(比如`llvm::Module`)。当引用计数为0时,它也会回收内存。 + diff --git a/README.md b/README.md index 6a63109de5e7760e119eeb834ed22653057f83a9..0f968e3e0f2ab65f0f93e560d4a2ebb13c328ef9 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,10 @@ ## 目前已布置的实验 * [lab1](./Documentations/1-parser/) - + DDL:2022-09-30 23:59:59 (UTC+8) + + DDL:2022-10-03 23:59:59 (UTC+8) + +* [lab2](./Documentations/2-ir-gen-warmup/) + + DDL:2022-10-23 23:59:59 (UTC+8) ## FAQ: How to merge upstream remote branches diff --git a/Reports/2-ir-gen-warmup/report.md b/Reports/2-ir-gen-warmup/report.md new file mode 100644 index 0000000000000000000000000000000000000000..1bcaa633a27312acb23d95c053c59e2908908ddd --- /dev/null +++ b/Reports/2-ir-gen-warmup/report.md @@ -0,0 +1,24 @@ +# lab2 实验报告 +学号 姓名 + +## 问题1: getelementptr +请给出 `IR.md` 中提到的两种 getelementptr 用法的区别,并稍加解释: + - `%2 = getelementptr [10 x i32], [10 x i32]* %1, i32 0, i32 %0` + - `%2 = getelementptr i32, i32* %1 i32 %0` + +## 问题2: cpp 与 .ll 的对应 +请说明你的 cpp 代码片段和 .ll 的每个 BasicBlock 的对应关系。 + +## 问题3: Visitor Pattern +分析 `calc` 程序在输入为 `4 * (8 + 4 - 1) / 2` 时的行为: +1. 请画出该表达式对应的抽象语法树(使用 `calc_ast.hpp` 中的 `CalcAST*` 类型和在该类型中存储的值来表示),并给节点使用数字编号。 +2. 请指出示例代码在用访问者模式遍历该语法树时的遍历顺序。 + +序列请按如下格式指明(序号为问题 2.1 中的编号): +3->2->5->1 + +## 实验难点 +描述在实验中遇到的问题、分析和解决方案。 + +## 实验反馈 +吐槽?建议? diff --git a/include/lightir/BasicBlock.h b/include/lightir/BasicBlock.h new file mode 100644 index 0000000000000000000000000000000000000000..b3355c9220e09f12db20ccf8f407816de7d63be5 --- /dev/null +++ b/include/lightir/BasicBlock.h @@ -0,0 +1,71 @@ +#ifndef SYSYC_BASICBLOCK_H +#define SYSYC_BASICBLOCK_H + +#include "Instruction.h" +#include "Value.h" + +#include +#include +#include +#include +#include + +class Function; +class Instruction; +class Module; + +class BasicBlock : public Value, public llvm::ilist_node { + public: + static BasicBlock *create(Module *m, const std::string &name, Function *parent) { + auto prefix = name.empty() ? "" : "label_"; + return new BasicBlock(m, prefix + name, parent); + } + + // return parent, or null if none. + Function *get_parent() { return parent_; } + + Module *get_module(); + + /****************api about cfg****************/ + + std::list &get_pre_basic_blocks() { return pre_bbs_; } + std::list &get_succ_basic_blocks() { return succ_bbs_; } + void add_pre_basic_block(BasicBlock *bb) { pre_bbs_.push_back(bb); } + void add_succ_basic_block(BasicBlock *bb) { succ_bbs_.push_back(bb); } + + void remove_pre_basic_block(BasicBlock *bb) { pre_bbs_.remove(bb); } + void remove_succ_basic_block(BasicBlock *bb) { succ_bbs_.remove(bb); } + + /****************api about cfg****************/ + + /// Returns the terminator instruction if the block is well formed or null + /// if the block is not well formed. + const Instruction *get_terminator() const; + Instruction *get_terminator() { + return const_cast(static_cast(this)->get_terminator()); + } + + void add_instruction(Instruction *instr); + void add_instr_begin(Instruction *instr); + + void delete_instr(Instruction *instr); + + bool empty() { return instr_list_.empty(); } + + int get_num_of_instr() { return instr_list_.size(); } + llvm::ilist &get_instructions() { return instr_list_; } + + void erase_from_parent(); + + virtual std::string print() override; + + private: + explicit BasicBlock(Module *m, const std::string &name, Function *parent); + std::list pre_bbs_; + std::list succ_bbs_; + // std::list instr_list_; + llvm::ilist instr_list_; + Function *parent_; +}; + +#endif // SYSYC_BASICBLOCK_H diff --git a/include/lightir/Constant.h b/include/lightir/Constant.h new file mode 100644 index 0000000000000000000000000000000000000000..d33419fa125cca3ff53d36efc63f77c4cdb931e9 --- /dev/null +++ b/include/lightir/Constant.h @@ -0,0 +1,77 @@ +// +// Created by cqy on 2020/6/29. +// + +#ifndef SYSYC_CONSTANT_H +#define SYSYC_CONSTANT_H +#include "User.h" +#include "Value.h" +#include "Type.h" + +class Constant : public User +{ +private: + // int value; +public: + Constant(Type *ty, const std::string &name = "", unsigned num_ops = 0) + : User(ty, name, num_ops) {} + ~Constant() = default; +}; + +class ConstantInt : public Constant +{ +private: + int value_; + ConstantInt(Type* ty,int val) + : Constant(ty,"",0),value_(val) {} +public: + + static int get_value(ConstantInt *const_val) { return const_val->value_; } + int get_value() { return value_; } + static ConstantInt *get(int val, Module *m); + static ConstantInt *get(bool val, Module *m); + virtual std::string print() override; +}; + +class ConstantArray : public Constant +{ +private: + std::vector const_array; + + ConstantArray(ArrayType *ty, const std::vector &val); +public: + + ~ConstantArray()=default; + + Constant* get_element_value(int index); + + unsigned get_size_of_array() { return const_array.size(); } + + static ConstantArray *get(ArrayType *ty, const std::vector &val); + + virtual std::string print() override; +}; + +class ConstantZero : public Constant +{ +private: + ConstantZero(Type *ty) + : Constant(ty,"",0) {} +public: + static ConstantZero *get(Type *ty, Module *m); + virtual std::string print() override; +}; + +class ConstantFP : public Constant +{ +private: + float val_; + ConstantFP(Type *ty, float val) + : Constant(ty,"",0), val_(val) {} +public: + static ConstantFP *get(float val, Module *m); + float get_value() { return val_; } + virtual std::string print() override; +}; + +#endif //SYSYC_CONSTANT_H diff --git a/include/lightir/Function.h b/include/lightir/Function.h new file mode 100644 index 0000000000000000000000000000000000000000..507c59f15995c1c33ea48bb966dfb5846f56da04 --- /dev/null +++ b/include/lightir/Function.h @@ -0,0 +1,89 @@ +#ifndef SYSYC_FUNCTION_H +#define SYSYC_FUNCTION_H + +#include "BasicBlock.h" +#include "Type.h" +#include "User.h" + +#include +#include +#include +#include +#include +#include +#include +#include +class Module; +class Argument; +class Type; +class FunctionType; + +class Function : public Value, public llvm::ilist_node { + public: + Function(FunctionType *ty, const std::string &name, Module *parent); + virtual ~Function(); + static Function *create(FunctionType *ty, const std::string &name, Module *parent); + + FunctionType *get_function_type() const; + + Type *get_return_type() const; + + void add_basic_block(BasicBlock *bb); + + unsigned get_num_of_args() const; + unsigned get_num_basic_blocks() const; + + Module *get_parent() const; + + std::list::iterator arg_begin() { return arguments_.begin(); } + std::list::iterator arg_end() { return arguments_.end(); } + + void remove(BasicBlock *bb); + BasicBlock *get_entry_block() { return &*basic_blocks_.begin(); } + + llvm::ilist &get_basic_blocks() { return basic_blocks_; } + std::list &get_args() { return arguments_; } + + bool is_declaration() { return basic_blocks_.empty(); } + + void set_instr_name(); + std::string print(); + + private: + void build_args(); + + private: + llvm::ilist basic_blocks_; // basic blocks + std::list arguments_; // arguments + Module *parent_; + unsigned seq_cnt_; + // unsigned num_args_; + // We don't need this, all value inside function should be unnamed + // std::map sym_table_; // Symbol table of args/instructions +}; + +// Argument of Function, does not contain actual value +class Argument : public Value { + public: + /// Argument constructor. + explicit Argument(Type *ty, const std::string &name = "", Function *f = nullptr, unsigned arg_no = 0) + : Value(ty, name), parent_(f), arg_no_(arg_no) {} + virtual ~Argument() {} + + inline const Function *get_parent() const { return parent_; } + inline Function *get_parent() { return parent_; } + + /// For example in "void foo(int a, float b)" a is 0 and b is 1. + unsigned get_arg_no() const { + assert(parent_ && "can't get number of unparented arg"); + return arg_no_; + } + + virtual std::string print() override; + + private: + Function *parent_; + unsigned arg_no_; // argument No. +}; + +#endif // SYSYC_FUNCTION_H diff --git a/include/lightir/GlobalVariable.h b/include/lightir/GlobalVariable.h new file mode 100644 index 0000000000000000000000000000000000000000..1d2d8880de72262c180432a3272d85495a6c032e --- /dev/null +++ b/include/lightir/GlobalVariable.h @@ -0,0 +1,26 @@ +// +// Created by cqy on 2020/6/29. +// + +#ifndef SYSYC_GLOBALVARIABLE_H +#define SYSYC_GLOBALVARIABLE_H + +#include "Constant.h" +#include "User.h" + +#include +class Module; +class GlobalVariable : public User, public llvm::ilist_node { + private: + bool is_const_; + Constant *init_val_; + GlobalVariable(std::string name, Module *m, Type *ty, bool is_const, Constant *init = nullptr); + + public: + static GlobalVariable *create(std::string name, Module *m, Type *ty, bool is_const, Constant *init); + virtual ~GlobalVariable() = default; + Constant *get_init() { return init_val_; } + bool is_const() { return is_const_; } + std::string print(); +}; +#endif // SYSYC_GLOBALVARIABLE_H diff --git a/include/lightir/IRBuilder.h b/include/lightir/IRBuilder.h new file mode 100644 index 0000000000000000000000000000000000000000..a06b9679761ff15294803e6fe67ddc570bb06ad5 --- /dev/null +++ b/include/lightir/IRBuilder.h @@ -0,0 +1,101 @@ +#ifndef SYSYC_IRBUILDER_H +#define SYSYC_IRBUILDER_H + +#include "BasicBlock.h" +#include "Function.h" +#include "Instruction.h" +#include "Value.h" + +class IRBuilder { + private: + BasicBlock *BB_; + Module *m_; + + public: + IRBuilder(BasicBlock *bb, Module *m) : BB_(bb), m_(m){}; + ~IRBuilder() = default; + Module *get_module() { return m_; } + BasicBlock *get_insert_block() { return this->BB_; } + void set_insert_point(BasicBlock *bb) { this->BB_ = bb; } // 在某个基本块中插入指令 + BinaryInst *create_iadd(Value *lhs, Value *rhs) { + return BinaryInst::create_add(lhs, rhs, this->BB_, m_); + } // 创建加法指令(以及其他算术指令) + BinaryInst *create_isub(Value *lhs, Value *rhs) { return BinaryInst::create_sub(lhs, rhs, this->BB_, m_); } + BinaryInst *create_imul(Value *lhs, Value *rhs) { return BinaryInst::create_mul(lhs, rhs, this->BB_, m_); } + BinaryInst *create_isdiv(Value *lhs, Value *rhs) { return BinaryInst::create_sdiv(lhs, rhs, this->BB_, m_); } + + CmpInst *create_icmp_eq(Value *lhs, Value *rhs) { + return CmpInst::create_cmp(CmpInst::EQ, lhs, rhs, this->BB_, m_); + } + CmpInst *create_icmp_ne(Value *lhs, Value *rhs) { + return CmpInst::create_cmp(CmpInst::NE, lhs, rhs, this->BB_, m_); + } + CmpInst *create_icmp_gt(Value *lhs, Value *rhs) { + return CmpInst::create_cmp(CmpInst::GT, lhs, rhs, this->BB_, m_); + } + CmpInst *create_icmp_ge(Value *lhs, Value *rhs) { + return CmpInst::create_cmp(CmpInst::GE, lhs, rhs, this->BB_, m_); + } + CmpInst *create_icmp_lt(Value *lhs, Value *rhs) { + return CmpInst::create_cmp(CmpInst::LT, lhs, rhs, this->BB_, m_); + } + CmpInst *create_icmp_le(Value *lhs, Value *rhs) { + return CmpInst::create_cmp(CmpInst::LE, lhs, rhs, this->BB_, m_); + } + + CallInst *create_call(Value *func, std::vector args) { + assert(dynamic_cast(func) && "func must be Function * type"); + return CallInst::create(static_cast(func), args, this->BB_); + } + + BranchInst *create_br(BasicBlock *if_true) { return BranchInst::create_br(if_true, this->BB_); } + BranchInst *create_cond_br(Value *cond, BasicBlock *if_true, BasicBlock *if_false) { + return BranchInst::create_cond_br(cond, if_true, if_false, this->BB_); + } + + ReturnInst *create_ret(Value *val) { return ReturnInst::create_ret(val, this->BB_); } + ReturnInst *create_void_ret() { return ReturnInst::create_void_ret(this->BB_); } + + GetElementPtrInst *create_gep(Value *ptr, std::vector idxs) { + return GetElementPtrInst::create_gep(ptr, idxs, this->BB_); + } + + StoreInst *create_store(Value *val, Value *ptr) { return StoreInst::create_store(val, ptr, this->BB_); } + LoadInst *create_load(Type *ty, Value *ptr) { return LoadInst::create_load(ty, ptr, this->BB_); } + LoadInst *create_load(Value *ptr) { + assert(ptr->get_type()->is_pointer_type() && "ptr must be pointer type"); + return LoadInst::create_load(ptr->get_type()->get_pointer_element_type(), ptr, this->BB_); + } + + AllocaInst *create_alloca(Type *ty) { return AllocaInst::create_alloca(ty, this->BB_); } + ZextInst *create_zext(Value *val, Type *ty) { return ZextInst::create_zext(val, ty, this->BB_); } + + SiToFpInst *create_sitofp(Value *val, Type *ty) { return SiToFpInst::create_sitofp(val, ty, this->BB_); } + FpToSiInst *create_fptosi(Value *val, Type *ty) { return FpToSiInst::create_fptosi(val, ty, this->BB_); } + + FCmpInst *create_fcmp_ne(Value *lhs, Value *rhs) { + return FCmpInst::create_fcmp(FCmpInst::NE, lhs, rhs, this->BB_, m_); + } + FCmpInst *create_fcmp_lt(Value *lhs, Value *rhs) { + return FCmpInst::create_fcmp(FCmpInst::LT, lhs, rhs, this->BB_, m_); + } + FCmpInst *create_fcmp_le(Value *lhs, Value *rhs) { + return FCmpInst::create_fcmp(FCmpInst::LE, lhs, rhs, this->BB_, m_); + } + FCmpInst *create_fcmp_ge(Value *lhs, Value *rhs) { + return FCmpInst::create_fcmp(FCmpInst::GE, lhs, rhs, this->BB_, m_); + } + FCmpInst *create_fcmp_gt(Value *lhs, Value *rhs) { + return FCmpInst::create_fcmp(FCmpInst::GT, lhs, rhs, this->BB_, m_); + } + FCmpInst *create_fcmp_eq(Value *lhs, Value *rhs) { + return FCmpInst::create_fcmp(FCmpInst::EQ, lhs, rhs, this->BB_, m_); + } + + BinaryInst *create_fadd(Value *lhs, Value *rhs) { return BinaryInst::create_fadd(lhs, rhs, this->BB_, m_); } + BinaryInst *create_fsub(Value *lhs, Value *rhs) { return BinaryInst::create_fsub(lhs, rhs, this->BB_, m_); } + BinaryInst *create_fmul(Value *lhs, Value *rhs) { return BinaryInst::create_fmul(lhs, rhs, this->BB_, m_); } + BinaryInst *create_fdiv(Value *lhs, Value *rhs) { return BinaryInst::create_fdiv(lhs, rhs, this->BB_, m_); } +}; + +#endif // SYSYC_IRBUILDER_H diff --git a/include/lightir/IRprinter.h b/include/lightir/IRprinter.h new file mode 100644 index 0000000000000000000000000000000000000000..2ac936d9d706bf3936afaf2b1864dcc80a870827 --- /dev/null +++ b/include/lightir/IRprinter.h @@ -0,0 +1,13 @@ +#include "BasicBlock.h" +#include "Constant.h" +#include "Function.h" +#include "GlobalVariable.h" +#include "Instruction.h" +#include "Module.h" +#include "Type.h" +#include "User.h" +#include "Value.h" + +std::string print_as_op(Value *v, bool print_ty); +std::string print_cmp_type(CmpInst::CmpOp op); +std::string print_fcmp_type(FCmpInst::CmpOp op); diff --git a/include/lightir/Instruction.h b/include/lightir/Instruction.h new file mode 100644 index 0000000000000000000000000000000000000000..fe502e7c46c11bf6d8dbb969ee9f7e048eee8565 --- /dev/null +++ b/include/lightir/Instruction.h @@ -0,0 +1,435 @@ +#ifndef SYSYC_INSTRUCTION_H +#define SYSYC_INSTRUCTION_H + +#include "Type.h" +#include "User.h" + +#include + +class BasicBlock; +class Function; + +class Instruction : public User, public llvm::ilist_node { + public: + enum OpID { + // Terminator Instructions + ret, + br, + // Standard binary operators + add, + sub, + mul, + sdiv, + // float binary operators + fadd, + fsub, + fmul, + fdiv, + // Memory operators + alloca, + load, + store, + // Other operators + cmp, + fcmp, + phi, + call, + getelementptr, + zext, // zero extend + fptosi, + sitofp + // float binary operators Logical operators + + }; + // create instruction, auto insert to bb + // ty here is result type + Instruction(Type *ty, OpID id, unsigned num_ops, BasicBlock *parent); + Instruction(Type *ty, OpID id, unsigned num_ops); + virtual ~Instruction() = default; + inline const BasicBlock *get_parent() const { return parent_; } + inline BasicBlock *get_parent() { return parent_; } + void set_parent(BasicBlock *parent) { this->parent_ = parent; } + // Return the function this instruction belongs to. + Function *get_function(); + Module *get_module(); + + OpID get_instr_type() const { return op_id_; } + std::string get_instr_op_name() { + switch (op_id_) { + case ret: return "ret"; break; + case br: return "br"; break; + case add: return "add"; break; + case sub: return "sub"; break; + case mul: return "mul"; break; + case sdiv: return "sdiv"; break; + case fadd: return "fadd"; break; + case fsub: return "fsub"; break; + case fmul: return "fmul"; break; + case fdiv: return "fdiv"; break; + case alloca: return "alloca"; break; + case load: return "load"; break; + case store: return "store"; break; + case cmp: return "cmp"; break; + case fcmp: return "fcmp"; break; + case phi: return "phi"; break; + case call: return "call"; break; + case getelementptr: return "getelementptr"; break; + case zext: return "zext"; break; + case fptosi: return "fptosi"; break; + case sitofp: return "sitofp"; break; + + default: return ""; break; + } + } + + bool is_void() { + return ((op_id_ == ret) || (op_id_ == br) || (op_id_ == store) || + (op_id_ == call && this->get_type()->is_void_type())); + } + + bool is_phi() { return op_id_ == phi; } + bool is_store() { return op_id_ == store; } + bool is_alloca() { return op_id_ == alloca; } + bool is_ret() { return op_id_ == ret; } + bool is_load() { return op_id_ == load; } + bool is_br() { return op_id_ == br; } + + bool is_add() { return op_id_ == add; } + bool is_sub() { return op_id_ == sub; } + bool is_mul() { return op_id_ == mul; } + bool is_div() { return op_id_ == sdiv; } + + bool is_fadd() { return op_id_ == fadd; } + bool is_fsub() { return op_id_ == fsub; } + bool is_fmul() { return op_id_ == fmul; } + bool is_fdiv() { return op_id_ == fdiv; } + bool is_fp2si() { return op_id_ == fptosi; } + bool is_si2fp() { return op_id_ == sitofp; } + + bool is_cmp() { return op_id_ == cmp; } + bool is_fcmp() { return op_id_ == fcmp; } + + bool is_call() { return op_id_ == call; } + bool is_gep() { return op_id_ == getelementptr; } + bool is_zext() { return op_id_ == zext; } + + bool isBinary() { + return (is_add() || is_sub() || is_mul() || is_div() || is_fadd() || is_fsub() || is_fmul() || is_fdiv()) && + (get_num_operand() == 2); + } + + bool isTerminator() { return is_br() || is_ret(); } + + private: + OpID op_id_; + unsigned num_ops_; + BasicBlock *parent_; +}; + +namespace detail { +template +struct tag { + using type = T; +}; +template +struct select_last { + // Use a fold-expression to fold the comma operator over the parameter pack. + using type = typename decltype((tag{}, ...))::type; +}; +template +using select_last_t = typename select_last::type; +}; // namespace detail + +template +inline constexpr bool always_false_v = false; + +template +class BaseInst : public Instruction { + protected: + template + static Inst *create(Args &&...args) { + if constexpr (std::is_same_v>, BasicBlock *>) { + auto ptr = new Inst(std::forward(args)...); + return ptr; + } else + static_assert(always_false_v, "ERROR"); + } + + template + BaseInst(Args &&...args) : Instruction(std::forward(args)...) {} +}; + +class BinaryInst : public BaseInst { + friend BaseInst; + + private: + BinaryInst(Type *ty, OpID id, Value *v1, Value *v2, BasicBlock *bb); + + public: + // create add instruction, auto insert to bb + static BinaryInst *create_add(Value *v1, Value *v2, BasicBlock *bb, Module *m); + + // create sub instruction, auto insert to bb + static BinaryInst *create_sub(Value *v1, Value *v2, BasicBlock *bb, Module *m); + + // create mul instruction, auto insert to bb + static BinaryInst *create_mul(Value *v1, Value *v2, BasicBlock *bb, Module *m); + + // create Div instruction, auto insert to bb + static BinaryInst *create_sdiv(Value *v1, Value *v2, BasicBlock *bb, Module *m); + + // create fadd instruction, auto insert to bb + static BinaryInst *create_fadd(Value *v1, Value *v2, BasicBlock *bb, Module *m); + + // create fsub instruction, auto insert to bb + static BinaryInst *create_fsub(Value *v1, Value *v2, BasicBlock *bb, Module *m); + + // create fmul instruction, auto insert to bb + static BinaryInst *create_fmul(Value *v1, Value *v2, BasicBlock *bb, Module *m); + + // create fDiv instruction, auto insert to bb + static BinaryInst *create_fdiv(Value *v1, Value *v2, BasicBlock *bb, Module *m); + + virtual std::string print() override; + + private: + void assertValid(); +}; + +class CmpInst : public BaseInst { + friend BaseInst; + + public: + enum CmpOp { + EQ, // == + NE, // != + GT, // > + GE, // >= + LT, // < + LE // <= + }; + + private: + CmpInst(Type *ty, CmpOp op, Value *lhs, Value *rhs, BasicBlock *bb); + + public: + static CmpInst *create_cmp(CmpOp op, Value *lhs, Value *rhs, BasicBlock *bb, Module *m); + + CmpOp get_cmp_op() { return cmp_op_; } + + virtual std::string print() override; + + private: + CmpOp cmp_op_; + + void assertValid(); +}; + +class FCmpInst : public BaseInst { + friend BaseInst; + + public: + enum CmpOp { + EQ, // == + NE, // != + GT, // > + GE, // >= + LT, // < + LE // <= + }; + + private: + FCmpInst(Type *ty, CmpOp op, Value *lhs, Value *rhs, BasicBlock *bb); + + public: + static FCmpInst *create_fcmp(CmpOp op, Value *lhs, Value *rhs, BasicBlock *bb, Module *m); + + CmpOp get_cmp_op() { return cmp_op_; } + + virtual std::string print() override; + + private: + CmpOp cmp_op_; + + void assert_valid(); +}; + +class CallInst : public BaseInst { + friend BaseInst; + + protected: + CallInst(Function *func, std::vector args, BasicBlock *bb); + + public: + static CallInst *create(Function *func, std::vector args, BasicBlock *bb); + FunctionType *get_function_type() const; + + virtual std::string print() override; +}; + +class BranchInst : public BaseInst { + friend BaseInst; + + private: + BranchInst(Value *cond, BasicBlock *if_true, BasicBlock *if_false, BasicBlock *bb); + BranchInst(BasicBlock *if_true, BasicBlock *bb); + + public: + static BranchInst *create_cond_br(Value *cond, BasicBlock *if_true, BasicBlock *if_false, BasicBlock *bb); + static BranchInst *create_br(BasicBlock *if_true, BasicBlock *bb); + + bool is_cond_br() const; + + virtual std::string print() override; +}; + +class ReturnInst : public BaseInst { + friend BaseInst; + + private: + ReturnInst(Value *val, BasicBlock *bb); + ReturnInst(BasicBlock *bb); + + public: + static ReturnInst *create_ret(Value *val, BasicBlock *bb); + static ReturnInst *create_void_ret(BasicBlock *bb); + bool is_void_ret() const; + + virtual std::string print() override; +}; + +class GetElementPtrInst : public BaseInst { + friend BaseInst; + + private: + GetElementPtrInst(Value *ptr, std::vector idxs, BasicBlock *bb); + + public: + static Type *get_element_type(Value *ptr, std::vector idxs); + static GetElementPtrInst *create_gep(Value *ptr, std::vector idxs, BasicBlock *bb); + Type *get_element_type() const; + + virtual std::string print() override; + + private: + Type *element_ty_; +}; + +class StoreInst : public BaseInst { + friend BaseInst; + + private: + StoreInst(Value *val, Value *ptr, BasicBlock *bb); + + public: + static StoreInst *create_store(Value *val, Value *ptr, BasicBlock *bb); + + Value *get_rval() { return this->get_operand(0); } + Value *get_lval() { return this->get_operand(1); } + + virtual std::string print() override; +}; + +class LoadInst : public BaseInst { + friend BaseInst; + + private: + LoadInst(Type *ty, Value *ptr, BasicBlock *bb); + + public: + static LoadInst *create_load(Type *ty, Value *ptr, BasicBlock *bb); + Value *get_lval() { return this->get_operand(0); } + + Type *get_load_type() const; + + virtual std::string print() override; +}; + +class AllocaInst : public BaseInst { + friend BaseInst; + + private: + AllocaInst(Type *ty, BasicBlock *bb); + + public: + static AllocaInst *create_alloca(Type *ty, BasicBlock *bb); + + Type *get_alloca_type() const; + + virtual std::string print() override; + + private: + Type *alloca_ty_; +}; + +class ZextInst : public BaseInst { + friend BaseInst; + + private: + ZextInst(OpID op, Value *val, Type *ty, BasicBlock *bb); + + public: + static ZextInst *create_zext(Value *val, Type *ty, BasicBlock *bb); + + Type *get_dest_type() const; + + virtual std::string print() override; + + private: + Type *dest_ty_; +}; + +class FpToSiInst : public BaseInst { + friend BaseInst; + + private: + FpToSiInst(OpID op, Value *val, Type *ty, BasicBlock *bb); + + public: + static FpToSiInst *create_fptosi(Value *val, Type *ty, BasicBlock *bb); + + Type *get_dest_type() const; + + virtual std::string print() override; + + private: + Type *dest_ty_; +}; + +class SiToFpInst : public BaseInst { + friend BaseInst; + + private: + SiToFpInst(OpID op, Value *val, Type *ty, BasicBlock *bb); + + public: + static SiToFpInst *create_sitofp(Value *val, Type *ty, BasicBlock *bb); + + Type *get_dest_type() const; + + virtual std::string print() override; + + private: + Type *dest_ty_; +}; + +class PhiInst : public BaseInst { + friend BaseInst; + + private: + PhiInst(OpID op, std::vector vals, std::vector val_bbs, Type *ty, BasicBlock *bb); + PhiInst(Type *ty, OpID op, unsigned num_ops, BasicBlock *bb) : BaseInst(ty, op, num_ops, bb) {} + Value *l_val_; + + public: + static PhiInst *create_phi(Type *ty, BasicBlock *bb); + Value *get_lval() { return l_val_; } + void set_lval(Value *l_val) { l_val_ = l_val; } + void add_phi_pair_operand(Value *val, Value *pre_bb) { + this->add_operand(val); + this->add_operand(pre_bb); + } + virtual std::string print() override; +}; + +#endif // SYSYC_INSTRUCTION_H diff --git a/include/lightir/Module.h b/include/lightir/Module.h new file mode 100644 index 0000000000000000000000000000000000000000..69282763f3f74729d24fab61d7a57f112b6a003f --- /dev/null +++ b/include/lightir/Module.h @@ -0,0 +1,64 @@ +#ifndef SYSYC_MODULE_H +#define SYSYC_MODULE_H + +#include "Function.h" +#include "GlobalVariable.h" +#include "Instruction.h" +#include "Type.h" +#include "Value.h" + +#include +#include +#include +#include +#include +#include + +class GlobalVariable; +class Function; +class Module { + public: + explicit Module(std::string name); + ~Module(); + + Type *get_void_type(); + Type *get_label_type(); + IntegerType *get_int1_type(); + IntegerType *get_int32_type(); + PointerType *get_int32_ptr_type(); + FloatType *get_float_type(); + PointerType *get_float_ptr_type(); + + PointerType *get_pointer_type(Type *contained); + ArrayType *get_array_type(Type *contained, unsigned num_elements); + FunctionType *get_function_type(Type *retty, std::vector &args); + + void add_function(Function *f); + llvm::ilist &get_functions(); + void add_global_variable(GlobalVariable *g); + llvm::ilist &get_global_variable(); + std::string get_instr_op_name(Instruction::OpID instr) { return instr_id2string_[instr]; } + void set_print_name(); + std::string print(); + + private: + llvm::ilist global_list_; // The Global Variables in the module + llvm::ilist function_list_; // The Functions in the module + std::map value_sym_; // Symbol table for values + std::map instr_id2string_; // Instruction from opid to string + + std::string module_name_; // Human readable identifier for the module + std::string source_file_name_; // Original source file name for module, for test and debug + + private: + std::unique_ptr int1_ty_; + std::unique_ptr int32_ty_; + std::unique_ptr label_ty_; + std::unique_ptr void_ty_; + std::unique_ptr float32_ty_; + std::map> pointer_map_; + std::map, std::unique_ptr> array_map_; + std::map>, std::unique_ptr> function_map_; +}; + +#endif // SYSYC_MODULE_H diff --git a/include/lightir/Type.h b/include/lightir/Type.h new file mode 100644 index 0000000000000000000000000000000000000000..945e807a28105acd0e12e3ad4bf89c2892a892a0 --- /dev/null +++ b/include/lightir/Type.h @@ -0,0 +1,148 @@ +#ifndef SYSYC_TYPE_H +#define SYSYC_TYPE_H + +#include +#include + +class Module; +class IntegerType; +class FunctionType; +class ArrayType; +class PointerType; +class FloatType; + +class Type { + public: + enum TypeID { + VoidTyID, // Void + LabelTyID, // Labels, e.g., BasicBlock + IntegerTyID, // Integers, include 32 bits and 1 bit + FunctionTyID, // Functions + ArrayTyID, // Arrays + PointerTyID, // Pointer + FloatTyID // float + }; + + explicit Type(TypeID tid, Module *m); + ~Type() = default; + + TypeID get_type_id() const { return tid_; } + + bool is_void_type() const { return get_type_id() == VoidTyID; } + + bool is_label_type() const { return get_type_id() == LabelTyID; } + + bool is_integer_type() const { return get_type_id() == IntegerTyID; } + + bool is_function_type() const { return get_type_id() == FunctionTyID; } + + bool is_array_type() const { return get_type_id() == ArrayTyID; } + + bool is_pointer_type() const { return get_type_id() == PointerTyID; } + + bool is_float_type() const { return get_type_id() == FloatTyID; } + + static bool is_eq_type(Type *ty1, Type *ty2); + + static Type *get_void_type(Module *m); + + static Type *get_label_type(Module *m); + + static IntegerType *get_int1_type(Module *m); + + static IntegerType *get_int32_type(Module *m); + + static PointerType *get_int32_ptr_type(Module *m); + + static FloatType *get_float_type(Module *m); + + static PointerType *get_float_ptr_type(Module *m); + + static PointerType *get_pointer_type(Type *contained); + + static ArrayType *get_array_type(Type *contained, unsigned num_elements); + + Type *get_pointer_element_type(); + + Type *get_array_element_type(); + + int get_size(); + + Module *get_module(); + + std::string print(); + + private: + TypeID tid_; + Module *m_; +}; + +class IntegerType : public Type { + public: + explicit IntegerType(unsigned num_bits, Module *m); + + static IntegerType *get(unsigned num_bits, Module *m); + + unsigned get_num_bits(); + + private: + unsigned num_bits_; +}; + +class FunctionType : public Type { + public: + FunctionType(Type *result, std::vector params); + + static bool is_valid_return_type(Type *ty); + static bool is_valid_argument_type(Type *ty); + + static FunctionType *get(Type *result, std::vector params); + + unsigned get_num_of_args() const; + + Type *get_param_type(unsigned i) const; + std::vector::iterator param_begin() { return args_.begin(); } + std::vector::iterator param_end() { return args_.end(); } + Type *get_return_type() const; + + private: + Type *result_; + std::vector args_; +}; + +class ArrayType : public Type { + public: + ArrayType(Type *contained, unsigned num_elements); + + static bool is_valid_element_type(Type *ty); + + static ArrayType *get(Type *contained, unsigned num_elements); + + Type *get_element_type() const { return contained_; } + unsigned get_num_of_elements() const { return num_elements_; } + + private: + Type *contained_; // The element type of the array. + unsigned num_elements_; // Number of elements in the array. +}; + +class PointerType : public Type { + public: + PointerType(Type *contained); + Type *get_element_type() const { return contained_; } + + static PointerType *get(Type *contained); + + private: + Type *contained_; // The element type of the ptr. +}; + +class FloatType : public Type { + public: + FloatType(Module *m); + static FloatType *get(Module *m); + + private: +}; + +#endif // SYSYC_TYPE_H \ No newline at end of file diff --git a/include/lightir/User.h b/include/lightir/User.h new file mode 100644 index 0000000000000000000000000000000000000000..09fb8044ac593420a0e9db4582b71c99d587aba2 --- /dev/null +++ b/include/lightir/User.h @@ -0,0 +1,34 @@ +#ifndef SYSYC_USER_H +#define SYSYC_USER_H + +#include "Value.h" + +#include +// #include + +class User : public Value { + public: + User(Type *ty, const std::string &name = "", unsigned num_ops = 0); + virtual ~User() = default; + + std::vector &get_operands(); + + // start from 0 + Value *get_operand(unsigned i) const; + + // start from 0 + void set_operand(unsigned i, Value *v); + void add_operand(Value *v); + + unsigned get_num_operand() const; + + void remove_use_of_ops(); + void remove_operands(int index1, int index2); + + private: + // std::unique_ptr< std::list > operands_; // operands of this value + std::vector operands_; // operands of this value + unsigned num_ops_; +}; + +#endif // SYSYC_USER_H diff --git a/include/lightir/Value.h b/include/lightir/Value.h new file mode 100644 index 0000000000000000000000000000000000000000..4eeb018231e7c79cfdb36535dba5982b9aef6ef9 --- /dev/null +++ b/include/lightir/Value.h @@ -0,0 +1,48 @@ +#ifndef SYSYC_VALUE_H +#define SYSYC_VALUE_H + +#include +#include +#include + +class Type; +class Value; + +struct Use { + Value *val_; + unsigned arg_no_; // the no. of operand, e.g., func(a, b), a is 0, b is 1 + Use(Value *val, unsigned no) : val_(val), arg_no_(no) {} +}; + +class Value { + public: + explicit Value(Type *ty, const std::string &name = ""); + virtual ~Value() = default; + + Type *get_type() const { return type_; } + + std::list &get_use_list() { return use_list_; } + + void add_use(Value *val, unsigned arg_no = 0); + + bool set_name(std::string name) { + if (name_ == "") { + name_ = name; + return true; + } + return false; + } + std::string get_name() const; + + void replace_all_use_with(Value *new_val); + void remove_use(Value *val); + + virtual std::string print() = 0; + + private: + Type *type_; + std::list use_list_; // who use this value + std::string name_; // should we put name field here ? +}; + +#endif // SYSYC_VALUE_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 379e17d20cd2aed92b602e3ce222cfbb469d2724..e595568acb5e0e71abbe87ee5b2754ddd654cff9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,2 +1,4 @@ add_subdirectory(parser) -add_subdirectory(common) \ No newline at end of file +add_subdirectory(common) +add_subdirectory(io) +add_subdirectory(lightir) \ No newline at end of file diff --git a/src/io/CMakeLists.txt b/src/io/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..3517289a664e381948d00149c869837d4a980430 --- /dev/null +++ b/src/io/CMakeLists.txt @@ -0,0 +1,6 @@ +add_library(cminus_io io.c) + +install( + TARGETS cminus_io + ARCHIVE DESTINATION lib +) diff --git a/src/io/io.c b/src/io/io.c new file mode 100644 index 0000000000000000000000000000000000000000..11b25c0bb5373ebe838c1d9631d749f9ae8c69c0 --- /dev/null +++ b/src/io/io.c @@ -0,0 +1,16 @@ +#include +#include +int input() { + int a; + scanf("%d", &a); + return a; +} + +void output(int a) { printf("%d\n", a); } + +void outputFloat(float a) { printf("%f\n", a); } + +void neg_idx_except() { + printf("negative index exception\n"); + exit(0); +} diff --git a/src/lightir/BasicBlock.cpp b/src/lightir/BasicBlock.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b2788a2312878ba34ee729dafc2e5b20a282222d --- /dev/null +++ b/src/lightir/BasicBlock.cpp @@ -0,0 +1,68 @@ +#include "BasicBlock.h" + +#include "Function.h" +#include "IRprinter.h" +#include "Module.h" + +#include + +BasicBlock::BasicBlock(Module *m, const std::string &name = "", Function *parent = nullptr) + : Value(Type::get_label_type(m), name), parent_(parent) { + assert(parent && "currently parent should not be nullptr"); + parent_->add_basic_block(this); +} + +Module *BasicBlock::get_module() { return get_parent()->get_parent(); } + +void BasicBlock::add_instruction(Instruction *instr) { instr_list_.push_back(instr); } + +void BasicBlock::add_instr_begin(Instruction *instr) { instr_list_.push_front(instr); } + +void BasicBlock::delete_instr(Instruction *instr) { + instr_list_.remove(instr); + instr->remove_use_of_ops(); +} + +const Instruction *BasicBlock::get_terminator() const { + if (instr_list_.empty()) { + return nullptr; + } + switch (instr_list_.back().get_instr_type()) { + case Instruction::ret: return &instr_list_.back(); + + case Instruction::br: return &instr_list_.back(); + + default: return nullptr; + } +} + +void BasicBlock::erase_from_parent() { this->get_parent()->remove(this); } + +std::string BasicBlock::print() { + std::string bb_ir; + bb_ir += this->get_name(); + bb_ir += ":"; + // print prebb + if (!this->get_pre_basic_blocks().empty()) { + bb_ir += " ; preds = "; + } + for (auto bb : this->get_pre_basic_blocks()) { + if (bb != *this->get_pre_basic_blocks().begin()) + bb_ir += ", "; + bb_ir += print_as_op(bb, false); + } + + // print prebb + if (!this->get_parent()) { + bb_ir += "\n"; + bb_ir += "; Error: Block without parent!"; + } + bb_ir += "\n"; + for (auto &instr : this->get_instructions()) { + bb_ir += " "; + bb_ir += instr.print(); + bb_ir += "\n"; + } + + return bb_ir; +} diff --git a/src/lightir/CMakeLists.txt b/src/lightir/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..f19ab03cbda1a2dc87b47304ae40e3e58854597c --- /dev/null +++ b/src/lightir/CMakeLists.txt @@ -0,0 +1,18 @@ +add_library( + IR_lib STATIC + Type.cpp + User.cpp + Value.cpp + BasicBlock.cpp + Constant.cpp + Function.cpp + GlobalVariable.cpp + Instruction.cpp + Module.cpp + IRprinter.cpp +) + +target_link_libraries( + IR_lib + LLVMSupport +) diff --git a/src/lightir/Constant.cpp b/src/lightir/Constant.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6f903466a206659e89cbe5797c548e04940839ca --- /dev/null +++ b/src/lightir/Constant.cpp @@ -0,0 +1,104 @@ +#include "Constant.h" + +#include "Module.h" + +#include +#include +#include + +struct pair_hash { + template + std::size_t operator()(const std::pair val) const { + auto lhs = std::hash()(val.first); + auto rhs = std::hash()(reinterpret_cast(val.second)); + return lhs ^ rhs; + } +}; + +static std::unordered_map, std::unique_ptr, pair_hash> cached_int; +static std::unordered_map, std::unique_ptr, pair_hash> cached_bool; +static std::unordered_map, std::unique_ptr, pair_hash> cached_float; +static std::unordered_map> cached_zero; + +ConstantInt *ConstantInt::get(int val, Module *m) { + if (cached_int.find(std::make_pair(val, m)) != cached_int.end()) + return cached_int[std::make_pair(val, m)].get(); + return (cached_int[std::make_pair(val, m)] = + std::unique_ptr(new ConstantInt(Type::get_int32_type(m), val))) + .get(); +} +ConstantInt *ConstantInt::get(bool val, Module *m) { + if (cached_bool.find(std::make_pair(val, m)) != cached_bool.end()) + return cached_bool[std::make_pair(val, m)].get(); + return (cached_bool[std::make_pair(val, m)] = + std::unique_ptr(new ConstantInt(Type::get_int1_type(m), val ? 1 : 0))) + .get(); +} +std::string ConstantInt::print() { + std::string const_ir; + Type *ty = this->get_type(); + if (ty->is_integer_type() && static_cast(ty)->get_num_bits() == 1) { + // int1 + const_ir += (this->get_value() == 0) ? "false" : "true"; + } else { + // int32 + const_ir += std::to_string(this->get_value()); + } + return const_ir; +} + +ConstantArray::ConstantArray(ArrayType *ty, const std::vector &val) : Constant(ty, "", val.size()) { + for (int i = 0; i < val.size(); i++) + set_operand(i, val[i]); + this->const_array.assign(val.begin(), val.end()); +} + +Constant *ConstantArray::get_element_value(int index) { return this->const_array[index]; } + +ConstantArray *ConstantArray::get(ArrayType *ty, const std::vector &val) { + return new ConstantArray(ty, val); +} + +std::string ConstantArray::print() { + std::string const_ir; + const_ir += this->get_type()->print(); + const_ir += " "; + const_ir += "["; + for (int i = 0; i < this->get_size_of_array(); i++) { + Constant *element = get_element_value(i); + if (!dynamic_cast(get_element_value(i))) { + const_ir += element->get_type()->print(); + } + const_ir += element->print(); + if (i < this->get_size_of_array()) { + const_ir += ", "; + } + } + const_ir += "]"; + return const_ir; +} + +ConstantFP *ConstantFP::get(float val, Module *m) { + if (cached_float.find(std::make_pair(val, m)) != cached_float.end()) + return cached_float[std::make_pair(val, m)].get(); + return (cached_float[std::make_pair(val, m)] = + std::unique_ptr(new ConstantFP(Type::get_float_type(m), val))) + .get(); +} + +std::string ConstantFP::print() { + std::stringstream fp_ir_ss; + std::string fp_ir; + double val = this->get_value(); + fp_ir_ss << "0x" << std::hex << *(uint64_t *)&val << std::endl; + fp_ir_ss >> fp_ir; + return fp_ir; +} + +ConstantZero *ConstantZero::get(Type *ty, Module *m) { + if (not cached_zero[ty]) + cached_zero[ty] = std::unique_ptr(new ConstantZero(ty)); + return cached_zero[ty].get(); +} + +std::string ConstantZero::print() { return "zeroinitializer"; } diff --git a/src/lightir/Function.cpp b/src/lightir/Function.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ac3f828d051866634af692af813503306a23c603 --- /dev/null +++ b/src/lightir/Function.cpp @@ -0,0 +1,133 @@ +#include "Function.h" + +#include "IRprinter.h" +#include "Module.h" + +Function::Function(FunctionType *ty, const std::string &name, Module *parent) + : Value(ty, name), parent_(parent), seq_cnt_(0) { + // num_args_ = ty->getNumParams(); + parent->add_function(this); + build_args(); +} +Function::~Function() { + for (auto *arg : arguments_) + delete arg; +} +Function *Function::create(FunctionType *ty, const std::string &name, Module *parent) { + return new Function(ty, name, parent); +} + +FunctionType *Function::get_function_type() const { return static_cast(get_type()); } + +Type *Function::get_return_type() const { return get_function_type()->get_return_type(); } + +unsigned Function::get_num_of_args() const { return get_function_type()->get_num_of_args(); } + +unsigned Function::get_num_basic_blocks() const { return basic_blocks_.size(); } + +Module *Function::get_parent() const { return parent_; } + +void Function::remove(BasicBlock *bb) { + basic_blocks_.remove(bb); + for (auto pre : bb->get_pre_basic_blocks()) { + pre->remove_succ_basic_block(bb); + } + for (auto succ : bb->get_succ_basic_blocks()) { + succ->remove_pre_basic_block(bb); + } +} + +void Function::build_args() { + auto *func_ty = get_function_type(); + unsigned num_args = get_num_of_args(); + for (int i = 0; i < num_args; i++) { + arguments_.push_back(new Argument(func_ty->get_param_type(i), "", this, i)); + } +} + +void Function::add_basic_block(BasicBlock *bb) { basic_blocks_.push_back(bb); } + +void Function::set_instr_name() { + std::map seq; + for (const auto &arg : this->get_args()) { + if (seq.find(&*arg) == seq.end()) { + auto seq_num = seq.size() + seq_cnt_; + if (arg->set_name("arg" + std::to_string(seq_num))) { + seq.insert({&*arg, seq_num}); + } + } + } + for (auto &bb1 : basic_blocks_) { + auto bb = &bb1; + if (seq.find(bb) == seq.end()) { + auto seq_num = seq.size() + seq_cnt_; + if (bb->set_name("label" + std::to_string(seq_num))) { + seq.insert({bb, seq_num}); + } + } + for (auto &instr : bb->get_instructions()) { + if (!instr.is_void() && seq.find(&instr) == seq.end()) { + auto seq_num = seq.size() + seq_cnt_; + if (instr.set_name("op" + std::to_string(seq_num))) { + seq.insert({&instr, seq_num}); + } + } + } + } + seq_cnt_ += seq.size(); +} + +std::string Function::print() { + set_instr_name(); + std::string func_ir; + if (this->is_declaration()) { + func_ir += "declare "; + } else { + func_ir += "define "; + } + + func_ir += this->get_return_type()->print(); + func_ir += " "; + func_ir += print_as_op(this, false); + func_ir += "("; + + // print arg + if (this->is_declaration()) { + for (int i = 0; i < this->get_num_of_args(); i++) { + if (i) + func_ir += ", "; + func_ir += static_cast(this->get_type())->get_param_type(i)->print(); + } + } else { + for (auto arg = this->arg_begin(); arg != arg_end(); arg++) { + if (arg != this->arg_begin()) { + func_ir += ", "; + } + func_ir += (*arg)->print(); + } + } + func_ir += ")"; + + // print bb + if (this->is_declaration()) { + func_ir += "\n"; + } else { + func_ir += " {"; + func_ir += "\n"; + for (auto &bb1 : this->get_basic_blocks()) { + auto bb = &bb1; + func_ir += bb->print(); + } + func_ir += "}"; + } + + return func_ir; +} + +std::string Argument::print() { + std::string arg_ir; + arg_ir += this->get_type()->print(); + arg_ir += " %"; + arg_ir += this->get_name(); + return arg_ir; +} \ No newline at end of file diff --git a/src/lightir/GlobalVariable.cpp b/src/lightir/GlobalVariable.cpp new file mode 100644 index 0000000000000000000000000000000000000000..27b4c13a5b80625bccf69682e9a82d5b412a7d6e --- /dev/null +++ b/src/lightir/GlobalVariable.cpp @@ -0,0 +1,29 @@ +// +// Created by cqy on 2020/6/29. +// +#include "GlobalVariable.h" + +#include "IRprinter.h" + +GlobalVariable::GlobalVariable(std::string name, Module *m, Type *ty, bool is_const, Constant *init) + : User(ty, name, init != nullptr), is_const_(is_const), init_val_(init) { + m->add_global_variable(this); + if (init) { + this->set_operand(0, init); + } +} // global操作数为initval + +GlobalVariable *GlobalVariable::create(std::string name, Module *m, Type *ty, bool is_const, Constant *init = nullptr) { + return new GlobalVariable(name, m, PointerType::get(ty), is_const, init); +} + +std::string GlobalVariable::print() { + std::string global_val_ir; + global_val_ir += print_as_op(this, false); + global_val_ir += " = "; + global_val_ir += (this->is_const() ? "constant " : "global "); + global_val_ir += this->get_type()->get_pointer_element_type()->print(); + global_val_ir += " "; + global_val_ir += this->get_init()->print(); + return global_val_ir; +} \ No newline at end of file diff --git a/src/lightir/IRprinter.cpp b/src/lightir/IRprinter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..da0a7342bb034899c1b49dd3ec17739f6fcf86c2 --- /dev/null +++ b/src/lightir/IRprinter.cpp @@ -0,0 +1,47 @@ +#include "IRprinter.h" + +std::string print_as_op(Value *v, bool print_ty) { + std::string op_ir; + if (print_ty) { + op_ir += v->get_type()->print(); + op_ir += " "; + } + + if (dynamic_cast(v)) { + op_ir += "@" + v->get_name(); + } else if (dynamic_cast(v)) { + op_ir += "@" + v->get_name(); + } else if (dynamic_cast(v)) { + op_ir += v->print(); + } else { + op_ir += "%" + v->get_name(); + } + + return op_ir; +} + +std::string print_cmp_type(CmpInst::CmpOp op) { + switch (op) { + case CmpInst::GE: return "sge"; break; + case CmpInst::GT: return "sgt"; break; + case CmpInst::LE: return "sle"; break; + case CmpInst::LT: return "slt"; break; + case CmpInst::EQ: return "eq"; break; + case CmpInst::NE: return "ne"; break; + default: break; + } + return "wrong cmpop"; +} + +std::string print_fcmp_type(FCmpInst::CmpOp op) { + switch (op) { + case FCmpInst::GE: return "uge"; break; + case FCmpInst::GT: return "ugt"; break; + case FCmpInst::LE: return "ule"; break; + case FCmpInst::LT: return "ult"; break; + case FCmpInst::EQ: return "ueq"; break; + case FCmpInst::NE: return "une"; break; + default: break; + } + return "wrong fcmpop"; +} \ No newline at end of file diff --git a/src/lightir/Instruction.cpp b/src/lightir/Instruction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e4cd6cd4c361c9f5f1e322e8274998fe5f2a3fcf --- /dev/null +++ b/src/lightir/Instruction.cpp @@ -0,0 +1,521 @@ +#include "Instruction.h" + +#include "BasicBlock.h" +#include "Function.h" +#include "IRprinter.h" +#include "Module.h" +#include "Type.h" + +#include +#include +#include + +Instruction::Instruction(Type *ty, OpID id, unsigned num_ops, BasicBlock *parent) + : User(ty, "", num_ops), op_id_(id), num_ops_(num_ops), parent_(parent) { + parent_->add_instruction(this); +} + +Instruction::Instruction(Type *ty, OpID id, unsigned num_ops) + : User(ty, "", num_ops), op_id_(id), num_ops_(num_ops), parent_(nullptr) {} + +Function *Instruction::get_function() { return parent_->get_parent(); } + +Module *Instruction::get_module() { return parent_->get_module(); } + +BinaryInst::BinaryInst(Type *ty, OpID id, Value *v1, Value *v2, BasicBlock *bb) : BaseInst(ty, id, 2, bb) { + set_operand(0, v1); + set_operand(1, v2); + // assertValid(); +} + +void BinaryInst::assertValid() { + assert(get_operand(0)->get_type()->is_integer_type()); + assert(get_operand(1)->get_type()->is_integer_type()); + assert(static_cast(get_operand(0)->get_type())->get_num_bits() == + static_cast(get_operand(1)->get_type())->get_num_bits()); +} + +BinaryInst *BinaryInst::create_add(Value *v1, Value *v2, BasicBlock *bb, Module *m) { + return create(Type::get_int32_type(m), Instruction::add, v1, v2, bb); +} + +BinaryInst *BinaryInst::create_sub(Value *v1, Value *v2, BasicBlock *bb, Module *m) { + return create(Type::get_int32_type(m), Instruction::sub, v1, v2, bb); +} + +BinaryInst *BinaryInst::create_mul(Value *v1, Value *v2, BasicBlock *bb, Module *m) { + return create(Type::get_int32_type(m), Instruction::mul, v1, v2, bb); +} + +BinaryInst *BinaryInst::create_sdiv(Value *v1, Value *v2, BasicBlock *bb, Module *m) { + return create(Type::get_int32_type(m), Instruction::sdiv, v1, v2, bb); +} + +BinaryInst *BinaryInst::create_fadd(Value *v1, Value *v2, BasicBlock *bb, Module *m) { + return create(Type::get_float_type(m), Instruction::fadd, v1, v2, bb); +} + +BinaryInst *BinaryInst::create_fsub(Value *v1, Value *v2, BasicBlock *bb, Module *m) { + return create(Type::get_float_type(m), Instruction::fsub, v1, v2, bb); +} + +BinaryInst *BinaryInst::create_fmul(Value *v1, Value *v2, BasicBlock *bb, Module *m) { + return create(Type::get_float_type(m), Instruction::fmul, v1, v2, bb); +} + +BinaryInst *BinaryInst::create_fdiv(Value *v1, Value *v2, BasicBlock *bb, Module *m) { + return create(Type::get_float_type(m), Instruction::fdiv, v1, v2, bb); +} + +std::string BinaryInst::print() { + std::string instr_ir; + instr_ir += "%"; + instr_ir += this->get_name(); + instr_ir += " = "; + instr_ir += this->get_module()->get_instr_op_name(this->get_instr_type()); + instr_ir += " "; + instr_ir += this->get_operand(0)->get_type()->print(); + instr_ir += " "; + instr_ir += print_as_op(this->get_operand(0), false); + instr_ir += ", "; + if (Type::is_eq_type(this->get_operand(0)->get_type(), this->get_operand(1)->get_type())) { + instr_ir += print_as_op(this->get_operand(1), false); + } else { + instr_ir += print_as_op(this->get_operand(1), true); + } + return instr_ir; +} + +CmpInst::CmpInst(Type *ty, CmpOp op, Value *lhs, Value *rhs, BasicBlock *bb) + : BaseInst(ty, Instruction::cmp, 2, bb), cmp_op_(op) { + set_operand(0, lhs); + set_operand(1, rhs); + // assertValid(); +} + +void CmpInst::assertValid() { + assert(get_operand(0)->get_type()->is_integer_type()); + assert(get_operand(1)->get_type()->is_integer_type()); + assert(static_cast(get_operand(0)->get_type())->get_num_bits() == + static_cast(get_operand(1)->get_type())->get_num_bits()); +} + +CmpInst *CmpInst::create_cmp(CmpOp op, Value *lhs, Value *rhs, BasicBlock *bb, Module *m) { + return create(m->get_int1_type(), op, lhs, rhs, bb); +} + +std::string CmpInst::print() { + std::string instr_ir; + instr_ir += "%"; + instr_ir += this->get_name(); + instr_ir += " = "; + instr_ir += this->get_module()->get_instr_op_name(this->get_instr_type()); + instr_ir += " "; + instr_ir += print_cmp_type(this->cmp_op_); + instr_ir += " "; + instr_ir += this->get_operand(0)->get_type()->print(); + instr_ir += " "; + instr_ir += print_as_op(this->get_operand(0), false); + instr_ir += ", "; + if (Type::is_eq_type(this->get_operand(0)->get_type(), this->get_operand(1)->get_type())) { + instr_ir += print_as_op(this->get_operand(1), false); + } else { + instr_ir += print_as_op(this->get_operand(1), true); + } + return instr_ir; +} + +FCmpInst::FCmpInst(Type *ty, CmpOp op, Value *lhs, Value *rhs, BasicBlock *bb) + : BaseInst(ty, Instruction::fcmp, 2, bb), cmp_op_(op) { + set_operand(0, lhs); + set_operand(1, rhs); + // assertValid(); +} + +void FCmpInst::assert_valid() { + assert(get_operand(0)->get_type()->is_float_type()); + assert(get_operand(1)->get_type()->is_float_type()); +} + +FCmpInst *FCmpInst::create_fcmp(CmpOp op, Value *lhs, Value *rhs, BasicBlock *bb, Module *m) { + return create(m->get_int1_type(), op, lhs, rhs, bb); +} + +std::string FCmpInst::print() { + std::string instr_ir; + instr_ir += "%"; + instr_ir += this->get_name(); + instr_ir += " = "; + instr_ir += this->get_module()->get_instr_op_name(this->get_instr_type()); + instr_ir += " "; + instr_ir += print_fcmp_type(this->cmp_op_); + instr_ir += " "; + instr_ir += this->get_operand(0)->get_type()->print(); + instr_ir += " "; + instr_ir += print_as_op(this->get_operand(0), false); + instr_ir += ","; + if (Type::is_eq_type(this->get_operand(0)->get_type(), this->get_operand(1)->get_type())) { + instr_ir += print_as_op(this->get_operand(1), false); + } else { + instr_ir += print_as_op(this->get_operand(1), true); + } + return instr_ir; +} + +CallInst::CallInst(Function *func, std::vector args, BasicBlock *bb) + : BaseInst(func->get_return_type(), Instruction::call, args.size() + 1, bb) { + assert(func->get_num_of_args() == args.size()); + int num_ops = args.size() + 1; + set_operand(0, func); + for (int i = 1; i < num_ops; i++) { + set_operand(i, args[i - 1]); + } +} + +CallInst *CallInst::create(Function *func, std::vector args, BasicBlock *bb) { + return BaseInst::create(func, args, bb); +} + +FunctionType *CallInst::get_function_type() const { return static_cast(get_operand(0)->get_type()); } + +std::string CallInst::print() { + std::string instr_ir; + if (!this->is_void()) { + instr_ir += "%"; + instr_ir += this->get_name(); + instr_ir += " = "; + } + instr_ir += this->get_module()->get_instr_op_name(this->get_instr_type()); + instr_ir += " "; + instr_ir += this->get_function_type()->get_return_type()->print(); + + instr_ir += " "; + assert(dynamic_cast(this->get_operand(0)) && "Wrong call operand function"); + instr_ir += print_as_op(this->get_operand(0), false); + instr_ir += "("; + for (int i = 1; i < this->get_num_operand(); i++) { + if (i > 1) + instr_ir += ", "; + instr_ir += this->get_operand(i)->get_type()->print(); + instr_ir += " "; + instr_ir += print_as_op(this->get_operand(i), false); + } + instr_ir += ")"; + return instr_ir; +} + +BranchInst::BranchInst(Value *cond, BasicBlock *if_true, BasicBlock *if_false, BasicBlock *bb) + : BaseInst(Type::get_void_type(if_true->get_module()), Instruction::br, 3, bb) { + set_operand(0, cond); + set_operand(1, if_true); + set_operand(2, if_false); +} + +BranchInst::BranchInst(BasicBlock *if_true, BasicBlock *bb) + : BaseInst(Type::get_void_type(if_true->get_module()), Instruction::br, 1, bb) { + set_operand(0, if_true); +} + +BranchInst *BranchInst::create_cond_br(Value *cond, BasicBlock *if_true, BasicBlock *if_false, BasicBlock *bb) { + if_true->add_pre_basic_block(bb); + if_false->add_pre_basic_block(bb); + bb->add_succ_basic_block(if_false); + bb->add_succ_basic_block(if_true); + + return create(cond, if_true, if_false, bb); +} + +BranchInst *BranchInst::create_br(BasicBlock *if_true, BasicBlock *bb) { + if_true->add_pre_basic_block(bb); + bb->add_succ_basic_block(if_true); + + return create(if_true, bb); +} + +bool BranchInst::is_cond_br() const { return get_num_operand() == 3; } + +std::string BranchInst::print() { + std::string instr_ir; + instr_ir += this->get_module()->get_instr_op_name(this->get_instr_type()); + instr_ir += " "; + // instr_ir += this->get_operand(0)->get_type()->print(); + instr_ir += print_as_op(this->get_operand(0), true); + if (is_cond_br()) { + instr_ir += ", "; + instr_ir += print_as_op(this->get_operand(1), true); + instr_ir += ", "; + instr_ir += print_as_op(this->get_operand(2), true); + } + return instr_ir; +} + +ReturnInst::ReturnInst(Value *val, BasicBlock *bb) + : BaseInst(Type::get_void_type(bb->get_module()), Instruction::ret, 1, bb) { + set_operand(0, val); +} + +ReturnInst::ReturnInst(BasicBlock *bb) + : BaseInst(Type::get_void_type(bb->get_module()), Instruction::ret, 0, bb) {} + +ReturnInst *ReturnInst::create_ret(Value *val, BasicBlock *bb) { return create(val, bb); } + +ReturnInst *ReturnInst::create_void_ret(BasicBlock *bb) { return create(bb); } + +bool ReturnInst::is_void_ret() const { return get_num_operand() == 0; } + +std::string ReturnInst::print() { + std::string instr_ir; + instr_ir += this->get_module()->get_instr_op_name(this->get_instr_type()); + instr_ir += " "; + if (!is_void_ret()) { + instr_ir += this->get_operand(0)->get_type()->print(); + instr_ir += " "; + instr_ir += print_as_op(this->get_operand(0), false); + } else { + instr_ir += "void"; + } + + return instr_ir; +} + +GetElementPtrInst::GetElementPtrInst(Value *ptr, std::vector idxs, BasicBlock *bb) + : BaseInst(PointerType::get(get_element_type(ptr, idxs)), + Instruction::getelementptr, + 1 + idxs.size(), + bb) { + set_operand(0, ptr); + for (int i = 0; i < idxs.size(); i++) { + set_operand(i + 1, idxs[i]); + } + element_ty_ = get_element_type(ptr, idxs); +} + +Type *GetElementPtrInst::get_element_type(Value *ptr, std::vector idxs) { + Type *ty = ptr->get_type()->get_pointer_element_type(); + assert("GetElementPtrInst ptr is wrong type" && + (ty->is_array_type() || ty->is_integer_type() || ty->is_float_type())); + if (ty->is_array_type()) { + ArrayType *arr_ty = static_cast(ty); + for (int i = 1; i < idxs.size(); i++) { + ty = arr_ty->get_element_type(); + if (i < idxs.size() - 1) { + assert(ty->is_array_type() && "Index error!"); + } + if (ty->is_array_type()) { + arr_ty = static_cast(ty); + } + } + } + return ty; +} + +Type *GetElementPtrInst::get_element_type() const { return element_ty_; } + +GetElementPtrInst *GetElementPtrInst::create_gep(Value *ptr, std::vector idxs, BasicBlock *bb) { + return create(ptr, idxs, bb); +} + +std::string GetElementPtrInst::print() { + std::string instr_ir; + instr_ir += "%"; + instr_ir += this->get_name(); + instr_ir += " = "; + instr_ir += this->get_module()->get_instr_op_name(this->get_instr_type()); + instr_ir += " "; + assert(this->get_operand(0)->get_type()->is_pointer_type()); + instr_ir += this->get_operand(0)->get_type()->get_pointer_element_type()->print(); + instr_ir += ", "; + for (int i = 0; i < this->get_num_operand(); i++) { + if (i > 0) + instr_ir += ", "; + instr_ir += this->get_operand(i)->get_type()->print(); + instr_ir += " "; + instr_ir += print_as_op(this->get_operand(i), false); + } + return instr_ir; +} + +StoreInst::StoreInst(Value *val, Value *ptr, BasicBlock *bb) + : BaseInst(Type::get_void_type(bb->get_module()), Instruction::store, 2, bb) { + set_operand(0, val); + set_operand(1, ptr); +} + +StoreInst *StoreInst::create_store(Value *val, Value *ptr, BasicBlock *bb) { return create(val, ptr, bb); } + +std::string StoreInst::print() { + std::string instr_ir; + instr_ir += this->get_module()->get_instr_op_name(this->get_instr_type()); + instr_ir += " "; + instr_ir += this->get_operand(0)->get_type()->print(); + instr_ir += " "; + instr_ir += print_as_op(this->get_operand(0), false); + instr_ir += ", "; + instr_ir += print_as_op(this->get_operand(1), true); + return instr_ir; +} + +LoadInst::LoadInst(Type *ty, Value *ptr, BasicBlock *bb) : BaseInst(ty, Instruction::load, 1, bb) { + assert(ptr->get_type()->is_pointer_type()); + assert(ty == static_cast(ptr->get_type())->get_element_type()); + set_operand(0, ptr); +} + +LoadInst *LoadInst::create_load(Type *ty, Value *ptr, BasicBlock *bb) { return create(ty, ptr, bb); } + +Type *LoadInst::get_load_type() const { + return static_cast(get_operand(0)->get_type())->get_element_type(); +} + +std::string LoadInst::print() { + std::string instr_ir; + instr_ir += "%"; + instr_ir += this->get_name(); + instr_ir += " = "; + instr_ir += this->get_module()->get_instr_op_name(this->get_instr_type()); + instr_ir += " "; + assert(this->get_operand(0)->get_type()->is_pointer_type()); + instr_ir += this->get_operand(0)->get_type()->get_pointer_element_type()->print(); + instr_ir += ","; + instr_ir += " "; + instr_ir += print_as_op(this->get_operand(0), true); + return instr_ir; +} + +AllocaInst::AllocaInst(Type *ty, BasicBlock *bb) + : BaseInst(PointerType::get(ty), Instruction::alloca, 0, bb), alloca_ty_(ty) {} + +AllocaInst *AllocaInst::create_alloca(Type *ty, BasicBlock *bb) { return create(ty, bb); } + +Type *AllocaInst::get_alloca_type() const { return alloca_ty_; } + +std::string AllocaInst::print() { + std::string instr_ir; + instr_ir += "%"; + instr_ir += this->get_name(); + instr_ir += " = "; + instr_ir += this->get_module()->get_instr_op_name(this->get_instr_type()); + instr_ir += " "; + instr_ir += get_alloca_type()->print(); + return instr_ir; +} + +ZextInst::ZextInst(OpID op, Value *val, Type *ty, BasicBlock *bb) : BaseInst(ty, op, 1, bb), dest_ty_(ty) { + set_operand(0, val); +} + +ZextInst *ZextInst::create_zext(Value *val, Type *ty, BasicBlock *bb) { return create(Instruction::zext, val, ty, bb); } + +Type *ZextInst::get_dest_type() const { return dest_ty_; } + +std::string ZextInst::print() { + std::string instr_ir; + instr_ir += "%"; + instr_ir += this->get_name(); + instr_ir += " = "; + instr_ir += this->get_module()->get_instr_op_name(this->get_instr_type()); + instr_ir += " "; + instr_ir += this->get_operand(0)->get_type()->print(); + instr_ir += " "; + instr_ir += print_as_op(this->get_operand(0), false); + instr_ir += " to "; + instr_ir += this->get_dest_type()->print(); + return instr_ir; +} + +FpToSiInst::FpToSiInst(OpID op, Value *val, Type *ty, BasicBlock *bb) + : BaseInst(ty, op, 1, bb), dest_ty_(ty) { + set_operand(0, val); +} + +FpToSiInst *FpToSiInst::create_fptosi(Value *val, Type *ty, BasicBlock *bb) { + return create(Instruction::fptosi, val, ty, bb); +} + +Type *FpToSiInst::get_dest_type() const { return dest_ty_; } + +std::string FpToSiInst::print() { + std::string instr_ir; + instr_ir += "%"; + instr_ir += this->get_name(); + instr_ir += " = "; + instr_ir += this->get_module()->get_instr_op_name(this->get_instr_type()); + instr_ir += " "; + instr_ir += this->get_operand(0)->get_type()->print(); + instr_ir += " "; + instr_ir += print_as_op(this->get_operand(0), false); + instr_ir += " to "; + instr_ir += this->get_dest_type()->print(); + return instr_ir; +} + +SiToFpInst::SiToFpInst(OpID op, Value *val, Type *ty, BasicBlock *bb) + : BaseInst(ty, op, 1, bb), dest_ty_(ty) { + set_operand(0, val); +} + +SiToFpInst *SiToFpInst::create_sitofp(Value *val, Type *ty, BasicBlock *bb) { + return create(Instruction::sitofp, val, ty, bb); +} + +Type *SiToFpInst::get_dest_type() const { return dest_ty_; } + +std::string SiToFpInst::print() { + std::string instr_ir; + instr_ir += "%"; + instr_ir += this->get_name(); + instr_ir += " = "; + instr_ir += this->get_module()->get_instr_op_name(this->get_instr_type()); + instr_ir += " "; + instr_ir += this->get_operand(0)->get_type()->print(); + instr_ir += " "; + instr_ir += print_as_op(this->get_operand(0), false); + instr_ir += " to "; + instr_ir += this->get_dest_type()->print(); + return instr_ir; +} + +PhiInst::PhiInst(OpID op, std::vector vals, std::vector val_bbs, Type *ty, BasicBlock *bb) + : BaseInst(ty, op, 2 * vals.size()) { + for (int i = 0; i < vals.size(); i++) { + set_operand(2 * i, vals[i]); + set_operand(2 * i + 1, val_bbs[i]); + } + this->set_parent(bb); +} + +PhiInst *PhiInst::create_phi(Type *ty, BasicBlock *bb) { + std::vector vals; + std::vector val_bbs; + return create(Instruction::phi, vals, val_bbs, ty, bb); +} + +std::string PhiInst::print() { + std::string instr_ir; + instr_ir += "%"; + instr_ir += this->get_name(); + instr_ir += " = "; + instr_ir += this->get_module()->get_instr_op_name(this->get_instr_type()); + instr_ir += " "; + instr_ir += this->get_operand(0)->get_type()->print(); + instr_ir += " "; + for (int i = 0; i < this->get_num_operand() / 2; i++) { + if (i > 0) + instr_ir += ", "; + instr_ir += "[ "; + instr_ir += print_as_op(this->get_operand(2 * i), false); + instr_ir += ", "; + instr_ir += print_as_op(this->get_operand(2 * i + 1), false); + instr_ir += " ]"; + } + if (this->get_num_operand() / 2 < this->get_parent()->get_pre_basic_blocks().size()) { + for (auto pre_bb : this->get_parent()->get_pre_basic_blocks()) { + if (std::find(this->get_operands().begin(), this->get_operands().end(), static_cast(pre_bb)) == + this->get_operands().end()) { + // find a pre_bb is not in phi + instr_ir += ", [ undef, " + print_as_op(pre_bb, false) + " ]"; + } + } + } + return instr_ir; +} diff --git a/src/lightir/Module.cpp b/src/lightir/Module.cpp new file mode 100644 index 0000000000000000000000000000000000000000..88cd072466f0d74e2c729d0745fdb760e535d256 --- /dev/null +++ b/src/lightir/Module.cpp @@ -0,0 +1,100 @@ +#include "Module.h" + +#include "Function.h" +#include "GlobalVariable.h" + +#include + +Module::Module(std::string name) : module_name_(name) { + void_ty_ = std::make_unique(Type::VoidTyID, this); + label_ty_ = std::make_unique(Type::LabelTyID, this); + int1_ty_ = std::make_unique(1, this); + int32_ty_ = std::make_unique(32, this); + float32_ty_ = std::make_unique(this); + // init instr_id2string + instr_id2string_.insert({Instruction::ret, "ret"}); + instr_id2string_.insert({Instruction::br, "br"}); + + instr_id2string_.insert({Instruction::add, "add"}); + instr_id2string_.insert({Instruction::sub, "sub"}); + instr_id2string_.insert({Instruction::mul, "mul"}); + instr_id2string_.insert({Instruction::sdiv, "sdiv"}); + + instr_id2string_.insert({Instruction::fadd, "fadd"}); + instr_id2string_.insert({Instruction::fsub, "fsub"}); + instr_id2string_.insert({Instruction::fmul, "fmul"}); + instr_id2string_.insert({Instruction::fdiv, "fdiv"}); + + instr_id2string_.insert({Instruction::alloca, "alloca"}); + instr_id2string_.insert({Instruction::load, "load"}); + instr_id2string_.insert({Instruction::store, "store"}); + instr_id2string_.insert({Instruction::cmp, "icmp"}); + instr_id2string_.insert({Instruction::fcmp, "fcmp"}); + instr_id2string_.insert({Instruction::phi, "phi"}); + instr_id2string_.insert({Instruction::call, "call"}); + instr_id2string_.insert({Instruction::getelementptr, "getelementptr"}); + instr_id2string_.insert({Instruction::zext, "zext"}); + instr_id2string_.insert({Instruction::sitofp, "sitofp"}); + instr_id2string_.insert({Instruction::fptosi, "fptosi"}); +} + +Module::~Module() {} + +Type *Module::get_void_type() { return void_ty_.get(); } + +Type *Module::get_label_type() { return label_ty_.get(); } + +IntegerType *Module::get_int1_type() { return int1_ty_.get(); } + +IntegerType *Module::get_int32_type() { return int32_ty_.get(); } + +PointerType *Module::get_pointer_type(Type *contained) { + if (pointer_map_.find(contained) == pointer_map_.end()) { + pointer_map_[contained] = std::make_unique(contained); + } + return pointer_map_[contained].get(); +} + +ArrayType *Module::get_array_type(Type *contained, unsigned num_elements) { + if (array_map_.find({contained, num_elements}) == array_map_.end()) { + array_map_[{contained, num_elements}] = std::make_unique(contained, num_elements); + } + return array_map_[{contained, num_elements}].get(); +} +FunctionType *Module::get_function_type(Type *retty, std::vector &args) { + if (not function_map_.count({retty, args})) { + function_map_[{retty, args}] = std::make_unique(retty, args); + } + return function_map_[{retty, args}].get(); +} + +PointerType *Module::get_int32_ptr_type() { return get_pointer_type(int32_ty_.get()); } + +FloatType *Module::get_float_type() { return float32_ty_.get(); } + +PointerType *Module::get_float_ptr_type() { return get_pointer_type(float32_ty_.get()); } + +void Module::add_function(Function *f) { function_list_.push_back(f); } +llvm::ilist &Module::get_functions() { return function_list_; } +void Module::add_global_variable(GlobalVariable *g) { global_list_.push_back(g); } +llvm::ilist &Module::get_global_variable() { return global_list_; } + +void Module::set_print_name() { + for (auto &func : this->get_functions()) { + func.set_instr_name(); + } + return; +} + +std::string Module::print() { + std::string module_ir; + for (auto &global_val : this->global_list_) { + module_ir += global_val.print(); + module_ir += "\n"; + } + for (auto &func : this->function_list_) { + module_ir += func.print(); + module_ir += "\n"; + } + return module_ir; +} diff --git a/src/lightir/Type.cpp b/src/lightir/Type.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3cbc28506f78c23bb33c020e8910a11b0eda8feb --- /dev/null +++ b/src/lightir/Type.cpp @@ -0,0 +1,171 @@ +#include "Type.h" + +#include "Module.h" + +#include + +Type::Type(TypeID tid, Module *m) { + tid_ = tid; + m_ = m; +} + +Module *Type::get_module() { return m_; } + +bool Type::is_eq_type(Type *ty1, Type *ty2) { return ty1 == ty2; } + +Type *Type::get_void_type(Module *m) { return m->get_void_type(); } + +Type *Type::get_label_type(Module *m) { return m->get_label_type(); } + +IntegerType *Type::get_int1_type(Module *m) { return m->get_int1_type(); } + +IntegerType *Type::get_int32_type(Module *m) { return m->get_int32_type(); } + +PointerType *Type::get_pointer_type(Type *contained) { return PointerType::get(contained); } + +ArrayType *Type::get_array_type(Type *contained, unsigned num_elements) { + return ArrayType::get(contained, num_elements); +} + +PointerType *Type::get_int32_ptr_type(Module *m) { return m->get_int32_ptr_type(); } + +FloatType *Type::get_float_type(Module *m) { return m->get_float_type(); } + +PointerType *Type::get_float_ptr_type(Module *m) { return m->get_float_ptr_type(); } + +Type *Type::get_pointer_element_type() { + if (this->is_pointer_type()) + return static_cast(this)->get_element_type(); + else + return nullptr; +} + +Type *Type::get_array_element_type() { + if (this->is_array_type()) + return static_cast(this)->get_element_type(); + else + return nullptr; +} + +int Type::get_size() { + if (this->is_integer_type()) { + auto bits = static_cast(this)->get_num_bits() / 8; + return bits > 0 ? bits : 1; + } + if (this->is_array_type()) { + auto element_size = static_cast(this)->get_element_type()->get_size(); + auto num_elements = static_cast(this)->get_num_of_elements(); + return element_size * num_elements; + } + if (this->is_pointer_type()) { + if (this->get_pointer_element_type()->is_array_type()) { + return this->get_pointer_element_type()->get_size(); + } else { + return 4; + } + } + if (this->is_float_type()) { + return 4; + } + return 0; +} + +std::string Type::print() { + std::string type_ir; + switch (this->get_type_id()) { + case VoidTyID: type_ir += "void"; break; + case LabelTyID: type_ir += "label"; break; + case IntegerTyID: + type_ir += "i"; + type_ir += std::to_string(static_cast(this)->get_num_bits()); + break; + case FunctionTyID: + type_ir += static_cast(this)->get_return_type()->print(); + type_ir += " ("; + for (int i = 0; i < static_cast(this)->get_num_of_args(); i++) { + if (i) + type_ir += ", "; + type_ir += static_cast(this)->get_param_type(i)->print(); + } + type_ir += ")"; + break; + case PointerTyID: + type_ir += this->get_pointer_element_type()->print(); + type_ir += "*"; + break; + case ArrayTyID: + type_ir += "["; + type_ir += std::to_string(static_cast(this)->get_num_of_elements()); + type_ir += " x "; + type_ir += static_cast(this)->get_element_type()->print(); + type_ir += "]"; + break; + case FloatTyID: type_ir += "float"; break; + default: break; + } + return type_ir; +} + +IntegerType::IntegerType(unsigned num_bits, Module *m) : Type(Type::IntegerTyID, m), num_bits_(num_bits) {} + +IntegerType *IntegerType::get(unsigned num_bits, Module *m) { + if (num_bits == 1) { + return m->get_int1_type(); + } else if (num_bits == 32) { + return m->get_int32_type(); + } else { + assert("IntegerType::get has error num_bits"); + } +} + +unsigned IntegerType::get_num_bits() { return num_bits_; } + +FunctionType::FunctionType(Type *result, std::vector params) : Type(Type::FunctionTyID, nullptr) { + assert(is_valid_return_type(result) && "Invalid return type for function!"); + result_ = result; + + for (auto p : params) { + assert(is_valid_argument_type(p) && "Not a valid type for function argument!"); + args_.push_back(p); + } +} + +bool FunctionType::is_valid_return_type(Type *ty) { + return ty->is_integer_type() || ty->is_void_type() || ty->is_float_type(); +} + +bool FunctionType::is_valid_argument_type(Type *ty) { + return ty->is_integer_type() || ty->is_pointer_type() || ty->is_float_type(); +} + +FunctionType *FunctionType::get(Type *result, std::vector params) { + return result->get_module()->get_function_type(result, params); +} + +unsigned FunctionType::get_num_of_args() const { return args_.size(); } + +Type *FunctionType::get_param_type(unsigned i) const { return args_[i]; } + +Type *FunctionType::get_return_type() const { return result_; } + +ArrayType::ArrayType(Type *contained, unsigned num_elements) + : Type(Type::ArrayTyID, contained->get_module()), num_elements_(num_elements) { + assert(is_valid_element_type(contained) && "Not a valid type for array element!"); + contained_ = contained; +} + +bool ArrayType::is_valid_element_type(Type *ty) { + return ty->is_integer_type() || ty->is_array_type() || ty->is_float_type(); +} + +ArrayType *ArrayType::get(Type *contained, unsigned num_elements) { + return contained->get_module()->get_array_type(contained, num_elements); +} + +PointerType::PointerType(Type *contained) : Type(Type::PointerTyID, contained->get_module()), contained_(contained) {} + +PointerType *PointerType::get(Type *contained) { return contained->get_module()->get_pointer_type(contained); } + +FloatType::FloatType(Module *m) : Type(Type::FloatTyID, m) {} + +FloatType *FloatType::get(Module *m) { return m->get_float_type(); } diff --git a/src/lightir/User.cpp b/src/lightir/User.cpp new file mode 100644 index 0000000000000000000000000000000000000000..92bb95e67cab70f82c831384843caafb3b275a51 --- /dev/null +++ b/src/lightir/User.cpp @@ -0,0 +1,43 @@ +#include "User.h" + +#include + +User::User(Type *ty, const std::string &name, unsigned num_ops) : Value(ty, name), num_ops_(num_ops) { + // if (num_ops_ > 0) + // operands_.reset(new std::list()); + operands_.resize(num_ops_, nullptr); +} + +std::vector &User::get_operands() { return operands_; } + +Value *User::get_operand(unsigned i) const { return operands_[i]; } + +void User::set_operand(unsigned i, Value *v) { + assert(i < num_ops_ && "set_operand out of index"); + // assert(operands_[i] == nullptr && "ith operand is not null"); + operands_[i] = v; + v->add_use(this, i); +} + +void User::add_operand(Value *v) { + operands_.push_back(v); + v->add_use(this, num_ops_); + num_ops_++; +} + +unsigned User::get_num_operand() const { return num_ops_; } + +void User::remove_use_of_ops() { + for (auto op : operands_) { + op->remove_use(this); + } +} + +void User::remove_operands(int index1, int index2) { + for (int i = index1; i <= index2; i++) { + operands_[i]->remove_use(this); + } + operands_.erase(operands_.begin() + index1, operands_.begin() + index2 + 1); + // std::cout< + +Value::Value(Type *ty, const std::string &name) : type_(ty), name_(name) {} + +void Value::add_use(Value *val, unsigned arg_no) { use_list_.push_back(Use(val, arg_no)); } + +std::string Value::get_name() const { return name_; } + +void Value::replace_all_use_with(Value *new_val) { + for (auto use : use_list_) { + auto val = dynamic_cast(use.val_); + assert(val && "new_val is not a user"); + val->set_operand(use.arg_no_, new_val); + } +} + +void Value::remove_use(Value *val) { + auto is_val = [val](const Use &use) { return use.val_ == val; }; + use_list_.remove_if(is_val); +} diff --git a/tests/2-ir-gen-warmup/CMakeLists.txt b/tests/2-ir-gen-warmup/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e70dd2644f0f3582e78b1cd75d9fd1bad08165b0 --- /dev/null +++ b/tests/2-ir-gen-warmup/CMakeLists.txt @@ -0,0 +1,42 @@ +add_subdirectory(calculator) +add_executable( + gcd_array_generator + ta_gcd/gcd_array_generator.cpp +) +target_link_libraries( + gcd_array_generator + IR_lib +) + +# add_executable( +# stu_assign_generator +# stu_cpp/assign_generator.cpp +# ) +# target_link_libraries( +# stu_assign_generator +# IR_lib +# ) +# add_executable( +# stu_fun_generator +# stu_cpp/fun_generator.cpp +# ) +# target_link_libraries( +# stu_fun_generator +# IR_lib +# ) +# add_executable( +# stu_if_generator +# stu_cpp/if_generator.cpp +# ) +# target_link_libraries( +# stu_if_generator +# IR_lib +# ) +# add_executable( +# stu_while_generator +# stu_cpp/while_generator.cpp +# ) +# target_link_libraries( +# stu_while_generator +# IR_lib +# ) diff --git a/tests/2-ir-gen-warmup/c_cases/assign.c b/tests/2-ir-gen-warmup/c_cases/assign.c new file mode 100644 index 0000000000000000000000000000000000000000..ba300f83040d2215484c4968f359df69da9169cf --- /dev/null +++ b/tests/2-ir-gen-warmup/c_cases/assign.c @@ -0,0 +1,6 @@ +int main(){ + int a[10]; + a[0] = 10; + a[1] = a[0] * 2; + return a[1]; +} diff --git a/tests/2-ir-gen-warmup/c_cases/fun.c b/tests/2-ir-gen-warmup/c_cases/fun.c new file mode 100644 index 0000000000000000000000000000000000000000..a0f082b493c20a42b21c3f55aaf6bfe4bd52afd1 --- /dev/null +++ b/tests/2-ir-gen-warmup/c_cases/fun.c @@ -0,0 +1,6 @@ +int callee(int a){ + return 2 * a; +} +int main(){ + return callee(110); +} diff --git a/tests/2-ir-gen-warmup/c_cases/if.c b/tests/2-ir-gen-warmup/c_cases/if.c new file mode 100644 index 0000000000000000000000000000000000000000..317ac49e70a0b121912630883d15d757113a7225 --- /dev/null +++ b/tests/2-ir-gen-warmup/c_cases/if.c @@ -0,0 +1,6 @@ +int main(){ + float a = 5.555; + if(a > 1) + return 233; + return 0; +} diff --git a/tests/2-ir-gen-warmup/c_cases/while.c b/tests/2-ir-gen-warmup/c_cases/while.c new file mode 100644 index 0000000000000000000000000000000000000000..6ba84de24aa6645269301da75ef9b1ed898f6a00 --- /dev/null +++ b/tests/2-ir-gen-warmup/c_cases/while.c @@ -0,0 +1,11 @@ +int main(){ + int a; + int i; + a = 10; + i = 0; + while(i < 10){ + i = i + 1; + a = a + i; + } + return a; +} diff --git a/tests/2-ir-gen-warmup/calculator/CMakeLists.txt b/tests/2-ir-gen-warmup/calculator/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..2dc3e4433ea413761986b34a87fbf86262279a4d --- /dev/null +++ b/tests/2-ir-gen-warmup/calculator/CMakeLists.txt @@ -0,0 +1,21 @@ +flex_target(calc_lex calculator.l ${CMAKE_CURRENT_BINARY_DIR}/calc_lex.c) +bison_target(calc_syntax calculator.y + ${CMAKE_CURRENT_BINARY_DIR}/calc_syntax.c + DEFINES_FILE ${PROJECT_BINARY_DIR}/calculator.h) +add_flex_bison_dependency(calc_lex calc_syntax) +add_library(calc_syntax STATIC + ${BISON_calc_syntax_OUTPUTS} + ${FLEX_calc_lex_OUTPUTS} +) +add_executable( + calc + calc.cpp + calc_ast.cpp + calc_builder.cpp +) +target_link_libraries( + calc + IR_lib + calc_syntax + common +) diff --git a/tests/2-ir-gen-warmup/calculator/calc.cpp b/tests/2-ir-gen-warmup/calculator/calc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..26d01a84cc5997fdc1006ea0bc7fb73d48b46924 --- /dev/null +++ b/tests/2-ir-gen-warmup/calculator/calc.cpp @@ -0,0 +1,48 @@ +extern "C" { + #include "syntax_tree.h" + extern syntax_tree *parse(const char*); +} +#include +#include +#include "calc_ast.hpp" +#include "calc_builder.hpp" +using namespace std::literals::string_literals; + +int main(int argc, char *argv[]) +{ + syntax_tree *tree = NULL; + const char *input = NULL; + + if (argc >= 3) { + printf("usage: %s\n", argv[0]); + printf("usage: %s \n", argv[0]); + return 1; + } + + if (argc == 2) { + input = argv[1]; + } else { + printf("Input an arithmatic expression (press Ctrl+D in a new line after you finish the expression):\n"); + } + + tree = parse(input); + CalcAST ast(tree); + CalcBuilder builder; + auto module = builder.build(ast); + auto IR = module->print(); + + std::ofstream output_stream; + auto output_file = "result.ll"; + output_stream.open(output_file, std::ios::out); + output_stream << "; ModuleID = 'calculator'\n"; + output_stream << IR; + output_stream.close(); + auto command_string = "clang -O0 -w "s + "result.ll -o result -L. -lcminus_io"; + auto ret = std::system(command_string.c_str()); + if (ret) { + printf("something went wrong!\n"); + } else { + printf("result and result.ll have been generated.\n"); + } + return ret; +} diff --git a/tests/2-ir-gen-warmup/calculator/calc_ast.cpp b/tests/2-ir-gen-warmup/calculator/calc_ast.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3d73e4afc3c7e631f8d27309b36ff490d0a914ff --- /dev/null +++ b/tests/2-ir-gen-warmup/calculator/calc_ast.cpp @@ -0,0 +1,125 @@ +#include "calc_ast.hpp" +#include +#include +#include +#define _AST_NODE_ERROR_ \ + std::cerr << "Abort due to node cast error."\ + "Contact with TAs to solve your problem."\ + << std::endl;\ + std::abort(); +#define _STR_EQ(a, b) (strcmp((a), (b)) == 0) + +void CalcAST::run_visitor(CalcASTVisitor &visitor) { + root->accept(visitor); +} + +CalcAST::CalcAST(syntax_tree* s) { + if (s == nullptr) { + std::cerr << "empty input tree!" << std::endl; + std::abort(); + } + auto node = transform_node_iter(s->root); + del_syntax_tree(s); + root = std::shared_ptr( + static_cast(node)); +} + +CalcASTNode * +CalcAST::transform_node_iter(syntax_tree_node *n) { + if (_STR_EQ(n->name, "input")) { + auto node = new CalcASTInput(); + auto expr_node = + static_cast( + transform_node_iter(n->children[0])); + node->expression = std::shared_ptr(expr_node); + return node; + } else if (_STR_EQ(n->name, "expression")) { + auto node = new CalcASTExpression(); + if (n->children_num == 3) { + auto add_expr_node = + static_cast( + transform_node_iter(n->children[0])); + node->expression = + std::shared_ptr(add_expr_node); + + auto op_name = n->children[1]->children[0]->name; + if (_STR_EQ(op_name, "+")) + node->op = OP_PLUS; + else if (_STR_EQ(op_name, "-")) + node->op = OP_MINUS; + + auto term_node = + static_cast( + transform_node_iter(n->children[2])); + node->term = std::shared_ptr(term_node); + } else { + auto term_node = + static_cast( + transform_node_iter(n->children[0])); + node->term = std::shared_ptr(term_node); + } + return node; + } else if (_STR_EQ(n->name, "term")) { + auto node = new CalcASTTerm(); + if (n->children_num == 3) { + auto term_node = + static_cast( + transform_node_iter(n->children[0])); + node->term = + std::shared_ptr(term_node); + + auto op_name = n->children[1]->children[0]->name; + if (_STR_EQ(op_name, "*")) + node->op = OP_MUL; + else if (_STR_EQ(op_name, "/")) + node->op = OP_DIV; + + auto factor_node = + static_cast( + transform_node_iter(n->children[2])); + node->factor = std::shared_ptr(factor_node); + } else { + auto factor_node = + static_cast( + transform_node_iter(n->children[0])); + node->factor = std::shared_ptr(factor_node); + } + return node; + } else if (_STR_EQ(n->name, "factor")) { + if (n->children_num == 3) { + return transform_node_iter(n->children[1]); + } else { + auto num_node = new CalcASTNum(); + num_node->val = std::stoi(n->children[0]->children[0]->name); + return num_node; + } + } else { + std::cerr << "[calc_ast]: transform failure!" << std::endl; + std::abort(); + } +} + + +void CalcASTNum::accept(CalcASTVisitor &visitor) { visitor.visit(*this); } +void CalcASTTerm::accept(CalcASTVisitor &visitor) { visitor.visit(*this); } +void CalcASTExpression::accept(CalcASTVisitor &visitor) { visitor.visit(*this); } + +void CalcASTInput::accept(CalcASTVisitor &visitor) { expression->accept(visitor); } + +void CalcASTFactor::accept(CalcASTVisitor &visitor) { + auto expr = + dynamic_cast(this); + if (expr) { + expr->accept(visitor); + return; + } + + auto num = + dynamic_cast(this); + if (num) { + num->accept(visitor); + return; + } + + _AST_NODE_ERROR_ +} diff --git a/tests/2-ir-gen-warmup/calculator/calc_ast.hpp b/tests/2-ir-gen-warmup/calculator/calc_ast.hpp new file mode 100644 index 0000000000000000000000000000000000000000..fd3d2f538fcd7eb63102a6f90d78e0ebd25ce576 --- /dev/null +++ b/tests/2-ir-gen-warmup/calculator/calc_ast.hpp @@ -0,0 +1,90 @@ +#ifndef _CALC_AST_HPP_ +#define _CALC_AST_HPP_ +extern "C" { + #include "syntax_tree.h" + extern syntax_tree *parse(const char *input); +} +#include +#include + +enum AddOp { + // + + OP_PLUS, + // - + OP_MINUS +}; + +enum MulOp { + // * + OP_MUL, + // / + OP_DIV +}; + +class CalcAST; + +struct CalcASTNode; +struct CalcASTInput; +struct CalcASTExpression; +struct CalcASTNum; +struct CalcASTTerm; +struct CalcASTFactor; + +class CalcASTVisitor; + +class CalcAST { +public: + CalcAST() = delete; + CalcAST(syntax_tree *); + CalcAST(CalcAST &&tree) { + root = tree.root; + tree.root = nullptr; + }; + CalcASTInput* get_root() { return root.get(); } + void run_visitor(CalcASTVisitor& visitor); +private: + CalcASTNode* transform_node_iter(syntax_tree_node *); + std::shared_ptr root = nullptr; +}; + +struct CalcASTNode { + virtual void accept(CalcASTVisitor &) = 0; +}; + +struct CalcASTInput: CalcASTNode { + virtual void accept(CalcASTVisitor &) override final; + std::shared_ptr expression; +}; + +struct CalcASTFactor: CalcASTNode { + virtual void accept(CalcASTVisitor &) override; +}; + +struct CalcASTNum: CalcASTFactor { + virtual void accept(CalcASTVisitor &) override final; + int val; +}; + +struct CalcASTExpression: CalcASTFactor { + virtual void accept(CalcASTVisitor &) override final; + std::shared_ptr expression; + AddOp op; + std::shared_ptr term; +}; + +struct CalcASTTerm : CalcASTNode { + virtual void accept(CalcASTVisitor &) override final; + std::shared_ptr term; + MulOp op; + std::shared_ptr factor; +}; + + +class CalcASTVisitor { +public: + virtual void visit(CalcASTInput &) = 0; + virtual void visit(CalcASTNum &) = 0; + virtual void visit(CalcASTExpression &) = 0; + virtual void visit(CalcASTTerm &) = 0; +}; +#endif diff --git a/tests/2-ir-gen-warmup/calculator/calc_builder.cpp b/tests/2-ir-gen-warmup/calculator/calc_builder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9b5c36bf155736ba56d4327a0e3338e4e45195c6 --- /dev/null +++ b/tests/2-ir-gen-warmup/calculator/calc_builder.cpp @@ -0,0 +1,69 @@ +#include "calc_builder.hpp" +std::unique_ptr +CalcBuilder::build(CalcAST &ast) { + module = std::unique_ptr(new Module("Cminus code")); + builder = new IRBuilder(nullptr, module.get()); + auto TyVoid = Type::get_void_type(module.get()); + TyInt32 = Type::get_int32_type(module.get()); + + std::vector output_params; + output_params.push_back(TyInt32); + auto output_type = FunctionType::get(TyVoid, output_params); + auto output_fun = + Function::create( + output_type, + "output", + module.get()); + auto main = Function::create(FunctionType::get(TyInt32, {}), + "main", module.get()); + auto bb = BasicBlock::create(module.get(), "entry", main); + builder->set_insert_point(bb); + ast.run_visitor(*this); + builder->create_call(output_fun, {val}); + builder->create_ret(ConstantInt::get(0, module.get())); + return std::move(module); +} +void CalcBuilder::visit(CalcASTInput &node) { + node.expression->accept(*this); +} +void CalcBuilder::visit(CalcASTExpression &node) { + if (node.expression == nullptr) { + node.term->accept(*this); + } else { + node.expression->accept(*this); + auto l_val = val; + node.term->accept(*this); + auto r_val = val; + switch (node.op) { + case OP_PLUS: + val = builder->create_iadd(l_val, r_val); + break; + case OP_MINUS: + val = builder->create_isub(l_val, r_val); + break; + } + } +} + +void CalcBuilder::visit(CalcASTTerm &node) { + if (node.term == nullptr) { + node.factor->accept(*this); + } else { + node.term->accept(*this); + auto l_val = val; + node.factor->accept(*this); + auto r_val = val; + switch (node.op) { + case OP_MUL: + val = builder->create_imul(l_val, r_val); + break; + case OP_DIV: + val = builder->create_isdiv(l_val, r_val); + break; + } + } +} + +void CalcBuilder::visit(CalcASTNum &node) { + val = ConstantInt::get(node.val, module.get()); +} diff --git a/tests/2-ir-gen-warmup/calculator/calc_builder.hpp b/tests/2-ir-gen-warmup/calculator/calc_builder.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b4e2f5e0fb70b0f8ec9413769ffc364b92f605bd --- /dev/null +++ b/tests/2-ir-gen-warmup/calculator/calc_builder.hpp @@ -0,0 +1,24 @@ +#ifndef _CALC_VISITOR_HPP_ +#define _CALC_VISITOR_HPP_ +#include "BasicBlock.h" +#include "Constant.h" +#include "Function.h" +#include "IRBuilder.h" +#include "Module.h" +#include "Type.h" +#include "calc_ast.hpp" +class CalcBuilder: public CalcASTVisitor { +public: + std::unique_ptr build(CalcAST &ast); +private: + virtual void visit(CalcASTInput &) override final; + virtual void visit(CalcASTNum &) override final; + virtual void visit(CalcASTExpression &) override final; + virtual void visit(CalcASTTerm &) override final; + + IRBuilder *builder; + Value *val; + Type *TyInt32; + std::unique_ptr module; +}; +#endif diff --git a/tests/2-ir-gen-warmup/calculator/calculator.l b/tests/2-ir-gen-warmup/calculator/calculator.l new file mode 100644 index 0000000000000000000000000000000000000000..f5bc734ec470e74f77439c99331f925c4272352e --- /dev/null +++ b/tests/2-ir-gen-warmup/calculator/calculator.l @@ -0,0 +1,36 @@ +%option noyywrap +%{ +/*****************声明和选项设置 begin*****************/ +#include +#include + +#include "syntax_tree.h" +#include "calculator.h" + +int lines; +int pos_start; +int pos_end; + +void pass_node(char *text){ + yylval.node = new_syntax_tree_node(text); +} + +/*****************声明和选项设置 end*****************/ + +%} + +%x COMMENT + +%% + +\+ {pos_start = pos_end; pos_end += 1; pass_node(yytext); return ADD;} +\- {pos_start = pos_end; pos_end += 1; pass_node(yytext); return SUB;} +\* {pos_start = pos_end; pos_end += 1; pass_node(yytext); return MUL;} +\/ {pos_start = pos_end; pos_end += 1; pass_node(yytext); return DIV;} +\( {pos_start = pos_end; pos_end += 1; pass_node(yytext); return LPARENTHESE;} +\) {pos_start = pos_end; pos_end += 1; pass_node(yytext); return RPARENTHESE;} +[0-9]+ { pos_start = pos_end; pos_end += strlen(yytext); pass_node(yytext); return NUM; } + +\n {lines++; pos_start = 1; pos_end = 1;} +[ \t] {pos_start = pos_end; pos_end += 1;} +%% diff --git a/tests/2-ir-gen-warmup/calculator/calculator.y b/tests/2-ir-gen-warmup/calculator/calculator.y new file mode 100644 index 0000000000000000000000000000000000000000..0590f0652e9bcc3af3dc894d4cdf3f3b3a6e0f42 --- /dev/null +++ b/tests/2-ir-gen-warmup/calculator/calculator.y @@ -0,0 +1,110 @@ +%{ +#include +#include +#include +#include + +#include "syntax_tree.h" + +// external functions from lex +extern int yylex(); +extern int yyparse(); +extern int yyrestart(); +extern FILE * yyin; + +// external variables from lexical_analyzer module +extern int lines; +extern char * yytext; +extern int pos_end; +extern int pos_start; + +// Global syntax tree +syntax_tree *gt; + +// Error reporting +void yyerror(const char *s); +syntax_tree_node *node(const char *node_name, int children_num, ...); +%} + +%union { + struct _syntax_tree_node * node; + char * name; +} + +%token ADD +%token SUB +%token MUL +%token DIV +%token NUM +%token LPARENTHESE +%token RPARENTHESE +%type input expression addop term mulop factor num + +%start input + +%% +input : expression {$$ = node( "input", 1, $1); gt->root = $$;} + ; +expression : expression addop term {$$ = node( "expression", 3, $1, $2, $3);} + | term {$$ = node( "expression", 1, $1);} + ; + +addop : ADD {$$ = node( "addop", 1, $1);} + | SUB {$$ = node( "addop", 1, $1);} + ; + +term : term mulop factor {$$ = node( "term", 3, $1, $2, $3);} + | factor {$$ = node( "term", 1, $1);} + ; + +mulop : MUL {$$ = node( "mulop", 1, $1);} + | DIV {$$ = node( "mulop", 1, $1);} + ; + +factor : LPARENTHESE expression RPARENTHESE {$$ = node( "factor", 3, $1, $2, $3);} + | num {$$ = node( "factor", 1, $1);} + ; + +num : NUM {$$ = node( "num", 1, $1);} +%% + +void yyerror(const char * s) { + fprintf(stderr, "error at line %d column %d: %s\n", lines, pos_start, s); +} + +syntax_tree *parse(const char *input_path) +{ + if (input_path != NULL) { + if (!(yyin = fopen(input_path, "r"))) { + fprintf(stderr, "[ERR] Open input file %s failed.\n", input_path); + exit(1); + } + } else { + yyin = stdin; + } + + lines = pos_start = pos_end = 1; + gt = new_syntax_tree(); + yyrestart(yyin); + yyparse(); + return gt; +} + +syntax_tree_node *node(const char *name, int children_num, ...) +{ + syntax_tree_node *p = new_syntax_tree_node(name); + syntax_tree_node *child; + if (children_num == 0) { + child = new_syntax_tree_node("epsilon"); + syntax_tree_add_child(p, child); + } else { + va_list ap; + va_start(ap, children_num); + for (int i = 0; i < children_num; ++i) { + child = va_arg(ap, syntax_tree_node *); + syntax_tree_add_child(p, child); + } + va_end(ap); + } + return p; +} diff --git a/tests/2-ir-gen-warmup/stu_cpp/assign_generator.cpp b/tests/2-ir-gen-warmup/stu_cpp/assign_generator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/2-ir-gen-warmup/stu_cpp/fun_generator.cpp b/tests/2-ir-gen-warmup/stu_cpp/fun_generator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/2-ir-gen-warmup/stu_cpp/if_generator.cpp b/tests/2-ir-gen-warmup/stu_cpp/if_generator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/2-ir-gen-warmup/stu_cpp/while_generator.cpp b/tests/2-ir-gen-warmup/stu_cpp/while_generator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/2-ir-gen-warmup/stu_ll/assign_hand.ll b/tests/2-ir-gen-warmup/stu_ll/assign_hand.ll new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/2-ir-gen-warmup/stu_ll/fun_hand.ll b/tests/2-ir-gen-warmup/stu_ll/fun_hand.ll new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/2-ir-gen-warmup/stu_ll/if_hand.ll b/tests/2-ir-gen-warmup/stu_ll/if_hand.ll new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/2-ir-gen-warmup/stu_ll/while_hand.ll b/tests/2-ir-gen-warmup/stu_ll/while_hand.ll new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/2-ir-gen-warmup/ta_gcd/gcd_array.c b/tests/2-ir-gen-warmup/ta_gcd/gcd_array.c new file mode 100644 index 0000000000000000000000000000000000000000..f747e812cbbfee3dfcb025a394543d60fd846879 --- /dev/null +++ b/tests/2-ir-gen-warmup/ta_gcd/gcd_array.c @@ -0,0 +1,27 @@ +int x[1]; +int y[1]; + +int gcd (int u, int v) { + if (v == 0) return u; + else return gcd(v, u - u / v * v); +} + +int funArray (int u[], int v[]) { + int a; + int b; + int temp; + a = u[0]; + b = v[0]; + if (a < b) { + temp = a; + a = b; + b = temp; + } + return gcd(a, b); +} + +int main(void) { + x[0] = 90; + y[0] = 18; + return funArray(x, y); +} \ No newline at end of file diff --git a/tests/2-ir-gen-warmup/ta_gcd/gcd_array_generator.cpp b/tests/2-ir-gen-warmup/ta_gcd/gcd_array_generator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f8f63a8e12bfd738b18fc4363af6a68fbc66786c --- /dev/null +++ b/tests/2-ir-gen-warmup/ta_gcd/gcd_array_generator.cpp @@ -0,0 +1,170 @@ +#include "BasicBlock.h" +#include "Constant.h" +#include "Function.h" +#include "IRBuilder.h" +#include "Module.h" +#include "Type.h" + +#include +#include + +#ifdef DEBUG // 用于调试信息,大家可以在编译过程中通过" -DDEBUG"来开启这一选项 +#define DEBUG_OUTPUT std::cout << __LINE__ << std::endl; // 输出行号的简单示例 +#else +#define DEBUG_OUTPUT +#endif + +#define CONST_INT(num) ConstantInt::get(num, module) + +#define CONST_FP(num) ConstantFP::get(num, module) // 得到常数值的表示,方便后面多次用到 + +int main() { + auto module = new Module("Cminus code"); // module name是什么无关紧要 + auto builder = new IRBuilder(nullptr, module); + Type *Int32Type = Type::get_int32_type(module); + + // 全局数组,x,y + auto *arrayType = ArrayType::get(Int32Type, 1); + auto initializer = ConstantZero::get(Int32Type, module); + auto x = GlobalVariable::create("x", module, arrayType, false, initializer);// 参数解释: 名字name,所属module,全局变量类型type, + auto y = GlobalVariable::create("y", module, arrayType, false, initializer);// 是否是常量定义(cminus中没有常量概念,应全都是false),初始化常量(ConstantZero类) + + // gcd函数 + // 函数参数类型的vector + std::vector Ints(2, Int32Type); + + //通过返回值类型与参数类型列表得到函数类型 + auto gcdFunTy = FunctionType::get(Int32Type, Ints); + + // 由函数类型得到函数 + auto gcdFun = Function::create(gcdFunTy, + "gcd", module); + + // BB的名字在生成中无所谓,但是可以方便阅读 + auto bb = BasicBlock::create(module, "entry", gcdFun); + + builder->set_insert_point(bb); // 一个BB的开始,将当前插入指令点的位置设在bb + + auto retAlloca = builder->create_alloca(Int32Type); // 在内存中分配返回值的位置 + auto uAlloca = builder->create_alloca(Int32Type); // 在内存中分配参数u的位置 + auto vAlloca = builder->create_alloca(Int32Type); // 在内存中分配参数v的位置 + + std::vector args; // 获取gcd函数的形参,通过Function中的iterator + for (auto arg = gcdFun->arg_begin(); arg != gcdFun->arg_end(); arg++) { + args.push_back(*arg); // * 号运算符是从迭代器中取出迭代器当前指向的元素 + } + + builder->create_store(args[0], uAlloca); // 将参数u store下来 + builder->create_store(args[1], vAlloca); // 将参数v store下来 + + auto vLoad = builder->create_load(vAlloca); // 将参数v load上来 + auto icmp = builder->create_icmp_eq(vLoad, CONST_INT(0)); // v和0的比较,注意ICMPEQ + + auto trueBB = BasicBlock::create(module, "trueBB", gcdFun); // true分支 + auto falseBB = BasicBlock::create(module, "falseBB", gcdFun); // false分支 + auto retBB = BasicBlock::create( + module, "", gcdFun); // return分支,提前create,以便true分支可以br + + auto br = builder->create_cond_br(icmp, trueBB, falseBB); // 条件BR + DEBUG_OUTPUT // 调试的时候故意留下来的,以醒目地提醒你这个调试用的宏定义方法 + builder->set_insert_point(trueBB); // if true; 分支的开始需要SetInsertPoint设置 + auto uLoad = builder->create_load(uAlloca); + builder->create_store(uLoad, retAlloca); + builder->create_br(retBB); // br retBB + + builder->set_insert_point(falseBB); // if false + uLoad = builder->create_load(uAlloca); + vLoad = builder->create_load(vAlloca); + auto div = builder->create_isdiv(uLoad, vLoad); // SDIV - div with S flag + auto mul = builder->create_imul(div, vLoad); // MUL - mul + auto sub = builder->create_isub(uLoad, mul); // the same + auto call = builder->create_call(gcdFun, {vLoad, sub}); // 创建call指令 + // {vLoad, sub} - 参数array + builder->create_store(call, retAlloca); + builder->create_br(retBB); // br retBB + + builder->set_insert_point(retBB); // ret分支 + auto retLoad = builder->create_load(retAlloca); + builder->create_ret(retLoad); + + // funArray函数 + auto Int32PtrType = Type::get_int32_ptr_type(module); // 单个参数的类型,指针 + std::vector IntPtrs(2, Int32PtrType); // 参数列表类型 + auto funArrayFunType = FunctionType::get(Int32Type, IntPtrs); // 函数类型 + auto funArrayFun = Function::create(funArrayFunType, "funArray", module); + bb = BasicBlock::create(module, "entry", funArrayFun); + builder->set_insert_point(bb); + auto upAlloca = builder->create_alloca(Int32PtrType); // u的存放 + auto vpAlloca = builder->create_alloca(Int32PtrType); // v的存放 + auto aAlloca = builder->create_alloca(Int32Type); // a的存放 + auto bAlloca = builder->create_alloca(Int32Type); // b的存放 + auto tempAlloca = builder->create_alloca(Int32Type); // temp的存放 + + std::vector args1; //获取funArrayFun函数的形参,通过Function中的iterator + for (auto arg = funArrayFun->arg_begin(); arg != funArrayFun->arg_end(); arg++) { + args1.push_back(*arg); // * 号运算符是从迭代器中取出迭代器当前指向的元素 + } + builder->create_store(args1[0], upAlloca); // 将参数u store下来 + builder->create_store(args1[1], vpAlloca); // 将参数v store下来 + + auto u0pLoad = builder->create_load(upAlloca); // 读取u + auto u0GEP = builder->create_gep(u0pLoad, {CONST_INT(0)}); // GEP: 获取u[0]地址 + auto u0Load = builder->create_load(u0GEP); // 从u[0]地址 读取u[0] + builder->create_store(u0Load, aAlloca); // 将u[0] 写入 a + auto v0pLoad = builder->create_load(vpAlloca); // 同上 + auto v0GEP = builder->create_gep(v0pLoad, {CONST_INT(0)}); + auto v0Load = builder->create_load(v0GEP); + builder->create_store(v0Load, bAlloca); + + auto aLoad = builder->create_load(aAlloca); + auto bLoad = builder->create_load(bAlloca); + icmp = builder->create_icmp_lt(aLoad, bLoad); + trueBB = BasicBlock::create(module, "trueBB", funArrayFun); + falseBB = BasicBlock::create(module, "falseBB", funArrayFun); + builder->create_cond_br(icmp, trueBB, falseBB); + + builder->set_insert_point(trueBB); + builder->create_store(aLoad, tempAlloca); + builder->create_store(bLoad, aAlloca); + auto tempLoad = builder->create_load(tempAlloca); + builder->create_store(tempLoad, bAlloca); + builder->create_br(falseBB); // 注意在下一个BB之前要Br一下 + + builder->set_insert_point(falseBB); + aLoad = builder->create_load(aAlloca); + bLoad = builder->create_load(bAlloca); + call = builder->create_call(gcdFun, {aLoad, bLoad}); + builder->create_ret(call); + + + // main函数 + auto mainFun = Function::create(FunctionType::get(Int32Type, {}), + "main", module); + bb = BasicBlock::create(module, "entry", mainFun); + // BasicBlock的名字在生成中无所谓,但是可以方便阅读 + builder->set_insert_point(bb); + + retAlloca = builder->create_alloca(Int32Type); + builder->create_store(CONST_INT(0), retAlloca); // 默认 ret 0 + + auto x0GEP = builder->create_gep(x, {CONST_INT(0), CONST_INT(0)}); // GEP: 这里为什么是{0, 0}呢? (实验报告相关) + builder->create_store(CONST_INT(90), x0GEP); + auto y0GEP = builder->create_gep(y, {CONST_INT(0), CONST_INT(0)}); // GEP: 这里为什么是{0, 0}呢? (实验报告相关) + builder->create_store(CONST_INT(18), y0GEP); + + x0GEP = builder->create_gep(x, {CONST_INT(0), CONST_INT(0)}); + y0GEP = builder->create_gep(y, {CONST_INT(0), CONST_INT(0)}); + call = builder->create_call(funArrayFun, {x0GEP, y0GEP}); // 为什么这里传的是{x0GEP, y0GEP}呢? + + builder->create_ret(call); + // 尽管已经有很多注释,但可能还是会遇到很多bug + // 所以强烈建议配置AutoComplete,效率会大大提高! + // 如果猜不到某个IR指令对应的C++的函数,建议把指令翻译成英语然后在method列表中搜索一下。 + // 最后,这个例子只涉及到了一些基本的指令生成, + // 对于额外的指令,包括数组,在之后的实验中可能需要大家自己搜索一下思考一下, + // 还有涉及到的C++语法,可以及时提问或者向大家提供指导哦! + // 对于这个例子里的代码风格/用法,如果有好的建议也欢迎提出! + std::cout << module->print(); + delete module; + return 0; +} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f485ab290b0a00f2837c47414fd2f16bc493ce6f..f8a17ad98e971a14fdc09eaa5c85feace28e45d5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(parser) +add_subdirectory(2-ir-gen-warmup) add_executable(test_ast test_ast.cpp) add_executable(test_logging test_logging.cpp) target_link_libraries(test_logging common)