add register usage pass (#194)

This commit is contained in:
water111 2021-01-12 19:20:08 -05:00 committed by GitHub
parent fe693b5da2
commit 8f86f0f00e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 273 additions and 14 deletions

View file

@ -32,6 +32,7 @@ add_library(
IR2/AtomicOpBuilder.cpp IR2/AtomicOpBuilder.cpp
IR2/AtomicOpTypeAnalysis.cpp IR2/AtomicOpTypeAnalysis.cpp
IR2/Env.cpp IR2/Env.cpp
IR2/reg_usage.cpp
ObjectFile/LinkedObjectFile.cpp ObjectFile/LinkedObjectFile.cpp
ObjectFile/LinkedObjectFileCreation.cpp ObjectFile/LinkedObjectFileCreation.cpp

View file

@ -6,13 +6,13 @@
#include "CfgVtx.h" #include "CfgVtx.h"
#include "decompiler/util/DecompilerTypeSystem.h" #include "decompiler/util/DecompilerTypeSystem.h"
#include "decompiler/util/TP_Type.h" #include "decompiler/util/TP_Type.h"
// for RegSet:
#include "decompiler/IR2/reg_usage.h"
namespace decompiler { namespace decompiler {
class LinkedObjectFile; class LinkedObjectFile;
class Function; class Function;
using RegSet = std::unordered_set<Register, Register::hash>;
struct BasicBlock { struct BasicBlock {
int start_word; int start_word;
int end_word; int end_word;

View file

@ -165,6 +165,9 @@ class Function {
bool atomic_ops_attempted = false; bool atomic_ops_attempted = false;
bool atomic_ops_succeeded = false; bool atomic_ops_succeeded = false;
std::shared_ptr<FunctionAtomicOps> atomic_ops = nullptr; std::shared_ptr<FunctionAtomicOps> atomic_ops = nullptr;
bool has_reg_use = false;
RegUsageInfo reg_use;
bool has_type_info = false;
Env env; Env env;
} ir2; } ir2;

View file

@ -1,4 +1,4 @@
#include "TypeAnalysis.h" #include "decompiler/Function/Function.h"
#include "decompiler/IR/IR.h" #include "decompiler/IR/IR.h"
#include "third-party/fmt/core.h" #include "third-party/fmt/core.h"
#include "decompiler/config.h" #include "decompiler/config.h"

View file

@ -1,2 +0,0 @@
#pragma once
#include "Function.h"

View file

@ -54,18 +54,17 @@ std::string AtomicOp::reg_type_info_as_string(const TypeState& init_types,
auto read_mask = regs_to_gpr_mask(m_read_regs); auto read_mask = regs_to_gpr_mask(m_read_regs);
auto write_mask = regs_to_gpr_mask(m_write_regs); auto write_mask = regs_to_gpr_mask(m_write_regs);
auto clobber_mask = regs_to_gpr_mask(m_clobber_regs);
result += fmt::format("[{}] -> [{}]", init_types.print_gpr_masked(read_mask), result += fmt::format("[{}] -> [{}]", init_types.print_gpr_masked(read_mask),
end_types.print_gpr_masked(write_mask)); end_types.print_gpr_masked(write_mask));
// auto clobber_mask = regs_to_gpr_mask(m_clobber_regs);
if (clobber_mask) { // if (clobber_mask) {
result += "cl: "; // result += "cl: ";
for (auto& reg : m_clobber_regs) { // for (auto& reg : m_clobber_regs) {
result += reg.to_string(); // result += reg.to_string();
result += ' '; // result += ' ';
} // }
} // }
return result; return result;
} }

View file

@ -0,0 +1,192 @@
#include "reg_usage.h"
#include "decompiler/Function/Function.h"
namespace decompiler {
RegUsageInfo::RegUsageInfo(int n_blocks, int n_ops) {
block.resize(n_blocks);
op.resize(n_ops);
}
namespace {
bool in_set(RegSet& set, const Register& obj) {
return set.find(obj) != set.end();
}
void phase1(const FunctionAtomicOps& ops, int block_id, RegUsageInfo* out) {
int end_op = ops.block_id_to_end_atomic_op.at(block_id);
int start_op = ops.block_id_to_first_atomic_op.at(block_id);
for (int i = end_op; i-- > start_op;) {
const auto& instr = ops.ops.at(i);
auto& lv = out->op.at(i).live;
auto& dd = out->op.at(i).dead;
auto& block = out->block.at(block_id);
// make all read live out
auto read = instr->read_regs();
lv.clear();
for (auto& x : read) {
lv.insert(x);
}
// kill things which are overwritten
dd.clear();
auto write = instr->write_regs();
for (auto& x : write) {
if (!in_set(lv, x)) {
dd.insert(x);
}
}
// b.use = i.liveout
RegSet use_old = block.use;
block.use.clear();
for (auto& x : lv) {
block.use.insert(x);
}
// | (bu.use & !i.dead)
for (auto& x : use_old) {
if (!in_set(dd, x)) {
block.use.insert(x);
}
}
// b.defs = i.dead
RegSet defs_old = block.defs;
block.defs.clear();
for (auto& x : dd) {
block.defs.insert(x);
}
// | b.defs & !i.lv
for (auto& x : defs_old) {
if (!in_set(lv, x)) {
block.defs.insert(x);
}
}
}
}
bool phase2(const std::vector<BasicBlock>& blocks, int block_id, RegUsageInfo* info) {
bool changed = false;
auto& block_info = info->block.at(block_id);
const auto& block_obj = blocks.at(block_id);
auto out = block_info.defs; // copy
for (auto s : {block_obj.succ_branch, block_obj.succ_ft}) {
if (s == -1) {
continue;
}
for (auto in : info->block.at(s).input) {
out.insert(in);
}
}
RegSet in = block_info.use;
for (auto x : out) {
if (!in_set(block_info.defs, x)) {
in.insert(x);
}
}
if (in != block_info.input || out != block_info.output) {
changed = true;
block_info.input = in;
block_info.output = out;
}
return changed;
}
void phase3(const FunctionAtomicOps& ops,
const std::vector<BasicBlock>& blocks,
int block_id,
RegUsageInfo* info) {
RegSet live_local;
const auto& block_obj = blocks.at(block_id);
for (auto s : {block_obj.succ_branch, block_obj.succ_ft}) {
if (s == -1) {
continue;
}
for (auto i : info->block.at(s).input) {
live_local.insert(i);
}
}
int end_op = ops.block_id_to_end_atomic_op.at(block_id);
int start_op = ops.block_id_to_first_atomic_op.at(block_id);
for (int i = end_op; i-- > start_op;) {
auto& lv = info->op.at(i).live;
auto& dd = info->op.at(i).dead;
RegSet new_live = lv;
for (auto x : live_local) {
if (!in_set(dd, x)) {
new_live.insert(x);
}
}
lv = live_local;
live_local = new_live;
}
}
} // namespace
RegUsageInfo analyze_ir2_register_usage(const Function& function) {
const auto& blocks = function.basic_blocks;
const auto& ops = function.ir2.atomic_ops;
RegUsageInfo result(blocks.size(), ops->ops.size());
for (int i = 0; i < int(blocks.size()); i++) {
phase1(*ops, i, &result);
}
bool changed = false;
do {
changed = false;
for (int i = 0; i < int(blocks.size()); i++) {
if (phase2(blocks, i, &result)) {
changed = true;
}
}
} while (changed);
for (int i = 0; i < int(blocks.size()); i++) {
phase3(*ops, blocks, i, &result);
}
// we want to know if an op "consumes" a register.
// this means the value of the register coming in is:
// A. read by the operation
// B. dead after the operation.
// loop over blocks, then
for (int i = 0; i < int(ops->ops.size()); i++) {
const auto& op = ops->ops.at(i);
auto& op_info = result.op.at(i);
// look at each register we read from:
for (auto reg : op->read_regs()) {
if (op_info.live.find(reg) == op_info.live.end()) {
// not live out, this means we must consume it.
op_info.consumes.insert(reg);
} else {
// the register has a live value, but is it a new value?
for (auto wr : op->write_regs()) {
if (wr == reg) {
op_info.consumes.insert(reg);
}
}
}
}
// also useful to know, written and unused.
for (auto reg : op->write_regs()) {
if (op_info.live.find(reg) == op_info.live.end()) {
op_info.written_and_unused.insert(reg);
}
}
}
return result;
}
} // namespace decompiler

View file

@ -0,0 +1,30 @@
#pragma once
#include <vector>
#include <unordered_set>
#include "decompiler/Disasm/Register.h"
namespace decompiler {
class Function;
using RegSet = std::unordered_set<Register, Register::hash>;
struct RegUsageInfo {
struct PerBlock {
RegSet use, defs, input, output;
};
struct PerOp {
RegSet live, dead, consumes, written_and_unused;
};
std::vector<PerBlock> block;
std::vector<PerOp> op;
RegUsageInfo() = default;
RegUsageInfo(int n_blocks, int n_ops);
};
RegUsageInfo analyze_ir2_register_usage(const Function& function);
} // namespace decompiler

View file

@ -71,6 +71,7 @@ class ObjectFileDB {
void ir2_basic_block_pass(); void ir2_basic_block_pass();
void ir2_atomic_op_pass(); void ir2_atomic_op_pass();
void ir2_type_analysis_pass(); void ir2_type_analysis_pass();
void ir2_register_usage_pass();
void ir2_write_results(const std::string& output_dir); void ir2_write_results(const std::string& output_dir);
std::string ir2_to_file(ObjectFileData& data); std::string ir2_to_file(ObjectFileData& data);
std::string ir2_function_to_string(ObjectFileData& data, Function& function, int seg); std::string ir2_function_to_string(ObjectFileData& data, Function& function, int seg);

View file

@ -8,6 +8,7 @@
#include "common/util/Timer.h" #include "common/util/Timer.h"
#include "common/util/FileUtil.h" #include "common/util/FileUtil.h"
#include "decompiler/Function/TypeInspector.h" #include "decompiler/Function/TypeInspector.h"
#include "decompiler/IR2/reg_usage.h"
namespace decompiler { namespace decompiler {
@ -26,6 +27,8 @@ void ObjectFileDB::analyze_functions_ir2(const std::string& output_dir) {
ir2_atomic_op_pass(); ir2_atomic_op_pass();
lg::info("Running type analysis..."); lg::info("Running type analysis...");
ir2_type_analysis_pass(); ir2_type_analysis_pass();
lg::info("Register usage analysis...");
ir2_register_usage_pass();
lg::info("Writing results..."); lg::info("Writing results...");
ir2_write_results(output_dir); ir2_write_results(output_dir);
} }
@ -239,6 +242,7 @@ void ObjectFileDB::ir2_atomic_op_pass() {
* Analyze registers and determine the type in each register at each instruction. * Analyze registers and determine the type in each register at each instruction.
* - Figure out the type of each function, from configs. * - Figure out the type of each function, from configs.
* - Propagate types. * - Propagate types.
* - NOTE: this will update register info usage more accurately for functions.
*/ */
void ObjectFileDB::ir2_type_analysis_pass() { void ObjectFileDB::ir2_type_analysis_pass() {
Timer timer; Timer timer;
@ -259,6 +263,7 @@ void ObjectFileDB::ir2_type_analysis_pass() {
auto hints = get_config().type_hints_by_function_by_idx[func.guessed_name.to_string()]; auto hints = get_config().type_hints_by_function_by_idx[func.guessed_name.to_string()];
if (func.run_type_analysis_ir2(ts, dts, data.linked_data, hints)) { if (func.run_type_analysis_ir2(ts, dts, data.linked_data, hints)) {
successful_functions++; successful_functions++;
func.ir2.has_type_info = true;
} else { } else {
func.warnings.append(";; Type analysis failed\n"); func.warnings.append(";; Type analysis failed\n");
} }
@ -273,6 +278,25 @@ void ObjectFileDB::ir2_type_analysis_pass() {
attempted_functions, non_asm_functions, total_functions, timer.getMs()); attempted_functions, non_asm_functions, total_functions, timer.getMs());
} }
void ObjectFileDB::ir2_register_usage_pass() {
Timer timer;
int total_funcs = 0, analyzed_funcs = 0;
for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) {
(void)segment_id;
(void)data;
total_funcs++;
if (!func.suspected_asm && func.ir2.atomic_ops_succeeded) {
analyzed_funcs++;
func.ir2.reg_use = analyze_ir2_register_usage(func);
func.ir2.has_reg_use = true;
}
});
lg::info("{}/{} functions had register usage analyzed in {:.2f} ms", analyzed_funcs, total_funcs,
timer.getMs());
}
void ObjectFileDB::ir2_write_results(const std::string& output_dir) { void ObjectFileDB::ir2_write_results(const std::string& output_dir) {
Timer timer; Timer timer;
lg::info("Writing IR2 results to file..."); lg::info("Writing IR2 results to file...");
@ -457,6 +481,17 @@ std::string ObjectFileDB::ir2_function_to_string(ObjectFileData& data, Function&
line, printed_comment, line, printed_comment,
op.reg_type_info_as_string(*init_types, func.ir2.env.get_types_after_op(op_id)), 50); op.reg_type_info_as_string(*init_types, func.ir2.env.get_types_after_op(op_id)), 50);
} }
if (func.ir2.has_reg_use) {
std::string regs;
for (auto r : func.ir2.reg_use.op.at(op_id).consumes) {
regs += r.to_charp();
regs += ' ';
}
if (!regs.empty()) {
append_commented(line, printed_comment, "cs: " + regs, 50);
}
}
} }
auto& instr = func.instructions.at(instr_id); auto& instr = func.instructions.at(instr_id);
// print linked strings // print linked strings