[Decompiler] IR2 form implementation (#197)

* begin ir2 form implementation

* temp

* small fixes

* fix test
This commit is contained in:
water111 2021-01-17 18:08:18 -05:00 committed by GitHub
parent 1071ff6003
commit d6bbca5620
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 2516 additions and 211 deletions

View file

@ -28,10 +28,13 @@ add_library(
IR/IR_ExpressionStack.cpp
IR/IR_TypeAnalysis.cpp
IR2/atomic_op_builder.cpp
IR2/AtomicOp.cpp
IR2/AtomicOpBuilder.cpp
IR2/AtomicOpForm.cpp
IR2/AtomicOpTypeAnalysis.cpp
IR2/cfg_builder.cpp
IR2/Env.cpp
IR2/Form.cpp
IR2/reg_usage.cpp
IR2/variable_naming.cpp

View file

@ -134,7 +134,7 @@ std::string CfgVtx::links_to_string() {
/// VERTICES
/////////////////////////////////////////
std::string BlockVtx::to_string() {
std::string BlockVtx::to_string() const {
if (is_early_exit_block) {
return "Block (EA) " + std::to_string(block_id);
} else {
@ -142,11 +142,11 @@ std::string BlockVtx::to_string() {
}
}
goos::Object BlockVtx::to_form() {
goos::Object BlockVtx::to_form() const {
return pretty_print::to_symbol("b" + std::to_string(block_id));
}
std::string SequenceVtx::to_string() {
std::string SequenceVtx::to_string() const {
assert(!seq.empty());
// todo - this is not a great way to print it. Maybe sequences should have an ID or name?
std::string result =
@ -154,7 +154,7 @@ std::string SequenceVtx::to_string() {
return result;
}
goos::Object SequenceVtx::to_form() {
goos::Object SequenceVtx::to_form() const {
std::vector<goos::Object> forms;
forms.push_back(pretty_print::to_symbol("seq"));
for (auto* x : seq) {
@ -163,27 +163,27 @@ goos::Object SequenceVtx::to_form() {
return pretty_print::build_list(forms);
}
std::string EntryVtx::to_string() {
std::string EntryVtx::to_string() const {
return "ENTRY";
}
goos::Object EntryVtx::to_form() {
goos::Object EntryVtx::to_form() const {
return pretty_print::to_symbol("entry");
}
std::string ExitVtx::to_string() {
std::string ExitVtx::to_string() const {
return "EXIT";
}
goos::Object ExitVtx::to_form() {
goos::Object ExitVtx::to_form() const {
return pretty_print::to_symbol("exit");
}
std::string CondWithElse::to_string() {
std::string CondWithElse::to_string() const {
return "CONDWE" + std::to_string(uid);
}
goos::Object CondWithElse::to_form() {
goos::Object CondWithElse::to_form() const {
std::vector<goos::Object> forms;
forms.push_back(pretty_print::to_symbol("cond"));
for (const auto& x : entries) {
@ -195,11 +195,11 @@ goos::Object CondWithElse::to_form() {
return pretty_print::build_list(forms);
}
std::string CondNoElse::to_string() {
std::string CondNoElse::to_string() const {
return "CONDNE" + std::to_string(uid);
}
goos::Object CondNoElse::to_form() {
goos::Object CondNoElse::to_form() const {
std::vector<goos::Object> forms;
forms.push_back(pretty_print::to_symbol("cond"));
for (const auto& x : entries) {
@ -209,49 +209,49 @@ goos::Object CondNoElse::to_form() {
return pretty_print::build_list(forms);
}
std::string WhileLoop::to_string() {
std::string WhileLoop::to_string() const {
return "WHL" + std::to_string(uid);
}
goos::Object WhileLoop::to_form() {
goos::Object WhileLoop::to_form() const {
std::vector<goos::Object> forms = {pretty_print::to_symbol("while"), condition->to_form(),
body->to_form()};
return pretty_print::build_list(forms);
}
std::string UntilLoop::to_string() {
std::string UntilLoop::to_string() const {
return "UNTL" + std::to_string(uid);
}
goos::Object UntilLoop::to_form() {
goos::Object UntilLoop::to_form() const {
std::vector<goos::Object> forms = {pretty_print::to_symbol("until"), condition->to_form(),
body->to_form()};
return pretty_print::build_list(forms);
}
std::string UntilLoop_single::to_string() {
std::string UntilLoop_single::to_string() const {
return "UNTLS" + std::to_string(uid);
}
goos::Object UntilLoop_single::to_form() {
goos::Object UntilLoop_single::to_form() const {
std::vector<goos::Object> forms = {pretty_print::to_symbol("until1"), block->to_form()};
return pretty_print::build_list(forms);
}
std::string InfiniteLoopBlock::to_string() {
std::string InfiniteLoopBlock::to_string() const {
return "INFL" + std::to_string(uid);
}
goos::Object InfiniteLoopBlock::to_form() {
goos::Object InfiniteLoopBlock::to_form() const {
std::vector<goos::Object> forms = {pretty_print::to_symbol("inf-loop"), block->to_form()};
return pretty_print::build_list(forms);
}
std::string ShortCircuit::to_string() {
std::string ShortCircuit::to_string() const {
return "SC" + std::to_string(uid);
}
goos::Object ShortCircuit::to_form() {
goos::Object ShortCircuit::to_form() const {
std::vector<goos::Object> forms;
forms.push_back(pretty_print::to_symbol("sc"));
for (const auto& x : entries) {
@ -260,21 +260,21 @@ goos::Object ShortCircuit::to_form() {
return pretty_print::build_list(forms);
}
std::string GotoEnd::to_string() {
std::string GotoEnd::to_string() const {
return "goto_end" + std::to_string(uid);
}
goos::Object GotoEnd::to_form() {
goos::Object GotoEnd::to_form() const {
std::vector<goos::Object> forms = {pretty_print::to_symbol("return-from-function"),
body->to_form(), unreachable_block->to_form()};
return pretty_print::build_list(forms);
}
std::string Break::to_string() {
std::string Break::to_string() const {
return "goto" + std::to_string(uid);
}
goos::Object Break::to_form() {
goos::Object Break::to_form() const {
std::vector<goos::Object> forms = {pretty_print::to_symbol("break"),
pretty_print::to_symbol(std::to_string(dest_block)),
body->to_form(), unreachable_block->to_form()};

View file

@ -65,8 +65,8 @@ void replace_exactly_one_in(std::vector<T>& v, T old, T replace) {
*/
class CfgVtx {
public:
virtual std::string to_string() = 0; // convert to a single line string for debugging
virtual goos::Object to_form() = 0; // recursive print as LISP form.
virtual std::string to_string() const = 0; // convert to a single line string for debugging
virtual goos::Object to_form() const = 0; // recursive print as LISP form.
virtual ~CfgVtx() = default;
CfgVtx* parent = nullptr; // parent structure, or nullptr if top level
@ -132,8 +132,8 @@ class CfgVtx {
class EntryVtx : public CfgVtx {
public:
EntryVtx() = default;
goos::Object to_form() override;
std::string to_string() override;
goos::Object to_form() const override;
std::string to_string() const override;
};
/*!
@ -141,8 +141,8 @@ class EntryVtx : public CfgVtx {
*/
class ExitVtx : public CfgVtx {
public:
std::string to_string() override;
goos::Object to_form() override;
std::string to_string() const override;
goos::Object to_form() const override;
};
/*!
@ -151,8 +151,8 @@ class ExitVtx : public CfgVtx {
class BlockVtx : public CfgVtx {
public:
explicit BlockVtx(int id) : block_id(id) {}
std::string to_string() override;
goos::Object to_form() override;
std::string to_string() const override;
goos::Object to_form() const override;
int block_id = -1; // which block are we?
bool is_early_exit_block = false; // are we an empty block at the end for early exits to jump to?
};
@ -163,8 +163,8 @@ class BlockVtx : public CfgVtx {
*/
class SequenceVtx : public CfgVtx {
public:
std::string to_string() override;
goos::Object to_form() override;
std::string to_string() const override;
goos::Object to_form() const override;
std::vector<CfgVtx*> seq;
};
@ -175,8 +175,8 @@ class SequenceVtx : public CfgVtx {
*/
class CondWithElse : public CfgVtx {
public:
std::string to_string() override;
goos::Object to_form() override;
std::string to_string() const override;
goos::Object to_form() const override;
struct Entry {
Entry() = default;
@ -196,8 +196,8 @@ class CondWithElse : public CfgVtx {
*/
class CondNoElse : public CfgVtx {
public:
std::string to_string() override;
goos::Object to_form() override;
std::string to_string() const override;
goos::Object to_form() const override;
struct Entry {
Entry() = default;
@ -211,8 +211,8 @@ class CondNoElse : public CfgVtx {
class WhileLoop : public CfgVtx {
public:
std::string to_string() override;
goos::Object to_form() override;
std::string to_string() const override;
goos::Object to_form() const override;
CfgVtx* condition = nullptr;
CfgVtx* body = nullptr;
@ -220,8 +220,8 @@ class WhileLoop : public CfgVtx {
class UntilLoop : public CfgVtx {
public:
std::string to_string() override;
goos::Object to_form() override;
std::string to_string() const override;
goos::Object to_form() const override;
CfgVtx* condition = nullptr;
CfgVtx* body = nullptr;
@ -229,38 +229,38 @@ class UntilLoop : public CfgVtx {
class UntilLoop_single : public CfgVtx {
public:
std::string to_string() override;
goos::Object to_form() override;
std::string to_string() const override;
goos::Object to_form() const override;
CfgVtx* block = nullptr;
};
class ShortCircuit : public CfgVtx {
public:
std::string to_string() override;
goos::Object to_form() override;
std::string to_string() const override;
goos::Object to_form() const override;
std::vector<CfgVtx*> entries;
};
class InfiniteLoopBlock : public CfgVtx {
public:
std::string to_string() override;
goos::Object to_form() override;
std::string to_string() const override;
goos::Object to_form() const override;
CfgVtx* block;
};
class GotoEnd : public CfgVtx {
public:
std::string to_string() override;
goos::Object to_form() override;
std::string to_string() const override;
goos::Object to_form() const override;
CfgVtx* body = nullptr;
CfgVtx* unreachable_block = nullptr;
};
class Break : public CfgVtx {
public:
std::string to_string() override;
goos::Object to_form() override;
std::string to_string() const override;
goos::Object to_form() const override;
int dest_block = -1;
CfgVtx* body = nullptr;
CfgVtx* unreachable_block = nullptr;

View file

@ -8,13 +8,14 @@
#include <unordered_map>
#include <stdexcept>
#include <unordered_set>
#include "decompiler/IR2/AtomicOpBuilder.h"
#include "decompiler/IR2/atomic_op_builder.h"
#include "decompiler/Disasm/Instruction.h"
#include "decompiler/Disasm/Register.h"
#include "BasicBlocks.h"
#include "CfgVtx.h"
#include "common/type_system/TypeSpec.h"
#include "decompiler/config.h"
#include "decompiler/IR2/Form.h"
namespace decompiler {
class DecompilerTypeSystem;
@ -169,6 +170,8 @@ class Function {
RegUsageInfo reg_use;
bool has_type_info = false;
Env env;
FormPool form_pool;
Form* top_form = nullptr;
} ir2;
private:

View file

@ -254,6 +254,7 @@ std::string get_simple_expression_op_name(SimpleExpression::Kind kind) {
assert(false);
}
}
} // namespace
int get_simple_expression_arg_count(SimpleExpression::Kind kind) {
switch (kind) {
@ -302,7 +303,6 @@ int get_simple_expression_arg_count(SimpleExpression::Kind kind) {
assert(false);
}
}
} // namespace
SimpleExpression::SimpleExpression(Kind kind, const SimpleAtom& arg0) : n_args(1) {
m_args[0] = arg0;
@ -372,10 +372,6 @@ bool SetVarOp::operator==(const AtomicOp& other) const {
return m_dst == po->m_dst && m_src == po->m_src;
}
bool SetVarOp::is_variable_set() const {
return true;
}
bool SetVarOp::is_sequence_point() const {
if (m_src.is_identity()) {
auto& atom = m_src.get_arg(0);
@ -394,14 +390,6 @@ Variable SetVarOp::get_set_destination() const {
return m_dst;
}
std::unique_ptr<Expr> SetVarOp::get_set_source_as_expr() const {
throw std::runtime_error("get_set_source_as_expr NYI for SetVarOp");
}
std::unique_ptr<Expr> SetVarOp::get_as_expr() const {
throw std::runtime_error("get_as_expr NYI for SetVarOp");
}
void SetVarOp::update_register_info() {
m_write_regs.push_back(m_dst.reg());
m_src.get_regs(&m_read_regs);
@ -474,10 +462,6 @@ bool AsmOp::operator==(const AtomicOp& other) const {
(m_src[1] == po->m_src[1]) && (m_src[2] == po->m_src[2]);
}
bool AsmOp::is_variable_set() const {
return false;
}
bool AsmOp::is_sequence_point() const {
return true;
}
@ -486,14 +470,6 @@ Variable AsmOp::get_set_destination() const {
throw std::runtime_error("AsmOp cannot be treated as a set! operation");
}
std::unique_ptr<Expr> AsmOp::get_set_source_as_expr() const {
throw std::runtime_error("AsmOp cannot be treated as a set! operation");
}
std::unique_ptr<Expr> AsmOp::get_as_expr() const {
throw std::runtime_error("AsmOp::get_as_expr is not implemented.");
}
void AsmOp::update_register_info() {
if (m_dst.has_value()) {
m_write_regs.push_back(m_dst->reg());
@ -510,7 +486,6 @@ void AsmOp::update_register_info() {
// Condition
/////////////////////////////
namespace {
std::string get_condition_kind_name(IR2_Condition::Kind kind) {
switch (kind) {
case IR2_Condition::Kind::NOT_EQUAL:
@ -694,7 +669,6 @@ IR2_Condition::Kind get_condition_opposite(IR2_Condition::Kind kind) {
assert(false);
}
}
} // namespace
IR2_Condition::IR2_Condition(Kind kind) : m_kind(kind) {
assert(get_condition_num_args(m_kind) == 0);
@ -774,10 +748,6 @@ bool SetVarConditionOp::operator==(const AtomicOp& other) const {
return m_dst == po->m_dst && m_condition == po->m_condition;
}
bool SetVarConditionOp::is_variable_set() const {
return true;
}
bool SetVarConditionOp::is_sequence_point() const {
return true;
}
@ -786,14 +756,6 @@ Variable SetVarConditionOp::get_set_destination() const {
return m_dst;
}
std::unique_ptr<Expr> SetVarConditionOp::get_set_source_as_expr() const {
throw std::runtime_error("SetVarConditionOp::get_source_as_expr is not yet implemented.");
}
std::unique_ptr<Expr> SetVarConditionOp::get_as_expr() const {
throw std::runtime_error("SetVarConditionOp::get_as_expr is not yet implemented.");
}
void SetVarConditionOp::update_register_info() {
m_write_regs.push_back(m_dst.reg());
m_condition.get_regs(&m_read_regs);
@ -849,10 +811,6 @@ bool StoreOp::operator==(const AtomicOp& other) const {
return m_addr == po->m_addr && m_value == po->m_value;
}
bool StoreOp::is_variable_set() const {
return false;
}
bool StoreOp::is_sequence_point() const {
return true;
}
@ -861,14 +819,6 @@ Variable StoreOp::get_set_destination() const {
throw std::runtime_error("StoreOp cannot be treated as a set! operation");
}
std::unique_ptr<Expr> StoreOp::get_set_source_as_expr() const {
throw std::runtime_error("StoreOp cannot be treated as a set! operation");
}
std::unique_ptr<Expr> StoreOp::get_as_expr() const {
throw std::runtime_error("StoreOp::get_as_expr is not yet implemented");
}
void StoreOp::update_register_info() {
m_addr.get_regs(&m_read_regs);
m_value.get_regs(&m_read_regs);
@ -939,10 +889,6 @@ bool LoadVarOp::operator==(const AtomicOp& other) const {
return m_dst == po->m_dst && m_src == po->m_src;
}
bool LoadVarOp::is_variable_set() const {
return true;
}
bool LoadVarOp::is_sequence_point() const {
return true;
}
@ -951,14 +897,6 @@ Variable LoadVarOp::get_set_destination() const {
return m_dst;
}
std::unique_ptr<Expr> LoadVarOp::get_set_source_as_expr() const {
throw std::runtime_error("LoadVarOp::get_set_source_as_expr is not yet implemented");
}
std::unique_ptr<Expr> LoadVarOp::get_as_expr() const {
throw std::runtime_error("LoadVarOp::get_as_expr is not yet implemented");
}
void LoadVarOp::update_register_info() {
m_src.get_regs(&m_read_regs);
m_write_regs.push_back(m_dst.reg());
@ -1113,10 +1051,6 @@ bool BranchOp::operator==(const AtomicOp& other) const {
m_branch_delay == po->m_branch_delay;
}
bool BranchOp::is_variable_set() const {
return false;
}
bool BranchOp::is_sequence_point() const {
return true;
}
@ -1125,14 +1059,6 @@ Variable BranchOp::get_set_destination() const {
throw std::runtime_error("BranchOp cannot be treated as a set! operation");
}
std::unique_ptr<Expr> BranchOp::get_set_source_as_expr() const {
throw std::runtime_error("BranchOp cannot be treated as a set! operation");
}
std::unique_ptr<Expr> BranchOp::get_as_expr() const {
throw std::runtime_error("BranchOp::get_as_expr is not yet implemented");
}
void BranchOp::update_register_info() {
m_condition.get_regs(&m_read_regs);
m_branch_delay.get_regs(&m_write_regs, &m_read_regs);
@ -1172,10 +1098,6 @@ bool SpecialOp::operator==(const AtomicOp& other) const {
return m_kind == po->m_kind;
}
bool SpecialOp::is_variable_set() const {
return false;
}
bool SpecialOp::is_sequence_point() const {
return true;
}
@ -1184,14 +1106,6 @@ Variable SpecialOp::get_set_destination() const {
throw std::runtime_error("SpecialOp cannot be treated as a set! operation");
}
std::unique_ptr<Expr> SpecialOp::get_set_source_as_expr() const {
throw std::runtime_error("SpecialOp cannot be treated as a set! operation");
}
std::unique_ptr<Expr> SpecialOp::get_as_expr() const {
throw std::runtime_error("SpecialOp::get_as_expr not yet implemented");
}
void SpecialOp::update_register_info() {
switch (m_kind) {
case Kind::NOP:
@ -1232,10 +1146,6 @@ bool CallOp::operator==(const AtomicOp& other) const {
return true;
}
bool CallOp::is_variable_set() const {
return false;
}
bool CallOp::is_sequence_point() const {
return true;
}
@ -1244,17 +1154,13 @@ Variable CallOp::get_set_destination() const {
throw std::runtime_error("CallOp cannot be treated as a set! operation");
}
std::unique_ptr<Expr> CallOp::get_set_source_as_expr() const {
throw std::runtime_error("CallOp cannot be treated as a set! operation");
}
std::unique_ptr<Expr> CallOp::get_as_expr() const {
throw std::runtime_error("CallOp::get_as_expr not yet implemented");
}
void CallOp::update_register_info() {
// throw std::runtime_error("CallOp::update_register_info cannot be done until types are known");
m_read_regs.push_back(Register(Reg::GPR, Reg::T9));
// if the type analysis succeeds, it will remove this if the function doesn't return a value.
// but, in the case we want to keep running without type information, we may need a
// renamed variable here, so we add this.
m_write_regs.push_back(Register(Reg::GPR, Reg::V0));
clobber_temps();
}
@ -1282,10 +1188,6 @@ bool ConditionalMoveFalseOp::operator==(const AtomicOp& other) const {
return m_dst == po->m_dst && m_src == po->m_src && m_on_zero == po->m_on_zero;
}
bool ConditionalMoveFalseOp::is_variable_set() const {
return false;
}
bool ConditionalMoveFalseOp::is_sequence_point() const {
return true;
}
@ -1294,14 +1196,6 @@ Variable ConditionalMoveFalseOp::get_set_destination() const {
throw std::runtime_error("ConditionalMoveFalseOp cannot be treated as a set! operation");
}
std::unique_ptr<Expr> ConditionalMoveFalseOp::get_set_source_as_expr() const {
throw std::runtime_error("ConditionalMoveFalseOp cannot be treated as a set! operation");
}
std::unique_ptr<Expr> ConditionalMoveFalseOp::get_as_expr() const {
throw std::runtime_error("ConditionalMoveFalseOp::get_as_expr is not yet implemented");
}
void ConditionalMoveFalseOp::update_register_info() {
m_write_regs.push_back(m_dst.reg());
m_read_regs.push_back(m_src.reg());

View file

@ -11,7 +11,9 @@
#include "Env.h"
namespace decompiler {
class Expr;
class FormElement;
class ConditionElement;
class FormPool;
class DecompilerTypeSystem;
/*!
@ -94,10 +96,6 @@ class AtomicOp {
virtual bool operator==(const AtomicOp& other) const = 0;
bool operator!=(const AtomicOp& other) const;
// determine if this is a (set! <var> thing) form. These will be handled differently in expression
// building.
virtual bool is_variable_set() const = 0;
// determine if this is a GOAL "sequence point".
// non-sequence point instructions may be out of order from the point of view of the expression
// stack.
@ -106,13 +104,9 @@ class AtomicOp {
// get the variable being set by this operation. Only call this if is_variable_set returns true.
virtual Variable get_set_destination() const = 0;
// get the value of the variable being set, as an expression. Only call this if is_variable_set
// returns true.
virtual std::unique_ptr<Expr> get_set_source_as_expr() const = 0;
// convert me to an expression. If I'm a set!, this will produce a (set! x y), which may be
// undesirable when expression stacking.
virtual std::unique_ptr<Expr> get_as_expr() const = 0;
virtual FormElement* get_as_form(FormPool& pool) const = 0;
// figure out what registers are read and written in this AtomicOp and update read_regs,
// write_regs, and clobber_regs. It's expected that these have duplicates if a register appears
@ -122,6 +116,7 @@ class AtomicOp {
TypeState propagate_types(const TypeState& input, const Env& env, DecompilerTypeSystem& dts);
int op_id() const { return m_my_idx; }
const std::vector<Register>& read_regs() { return m_read_regs; }
const std::vector<Register>& write_regs() { return m_write_regs; }
const std::vector<Register>& clobber_regs() { return m_clobber_regs; }
@ -196,6 +191,10 @@ class SimpleAtom {
void get_regs(std::vector<Register>* out) const;
SimpleExpression as_expr() const;
TP_Type get_type(const TypeState& input, const Env& env, const DecompilerTypeSystem& dts) const;
const std::string& get_str() const {
assert(is_sym_ptr() || is_sym_val());
return m_string;
}
private:
Kind m_kind = Kind::INVALID;
@ -282,6 +281,8 @@ class SimpleExpression {
s8 n_args = -1;
};
int get_simple_expression_arg_count(SimpleExpression::Kind kind);
/*!
* Set a variable equal to a Simple Expression
*/
@ -294,11 +295,9 @@ class SetVarOp : public AtomicOp {
virtual goos::Object to_form(const std::vector<DecompilerLabel>& labels,
const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
FormElement* get_as_form(FormPool& pool) const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
@ -321,11 +320,9 @@ class AsmOp : public AtomicOp {
AsmOp(Instruction instr, int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
FormElement* get_as_form(FormPool& pool) const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
@ -392,12 +389,19 @@ class IR2_Condition {
bool operator!=(const IR2_Condition& other) const { return !((*this) == other); }
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const;
void get_regs(std::vector<Register>* out) const;
Kind kind() const { return m_kind; }
const SimpleAtom& src(int i) const { return m_src[i]; }
ConditionElement* get_as_form(FormPool& pool) const;
private:
Kind m_kind = Kind::INVALID;
SimpleAtom m_src[2];
};
std::string get_condition_kind_name(IR2_Condition::Kind kind);
int get_condition_num_args(IR2_Condition::Kind kind);
IR2_Condition::Kind get_condition_opposite(IR2_Condition::Kind kind);
/*!
* Set a variable to a GOAL boolean, based off of a condition.
*/
@ -406,11 +410,9 @@ class SetVarConditionOp : public AtomicOp {
SetVarConditionOp(Variable dst, IR2_Condition condition, int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
FormElement* get_as_form(FormPool& pool) const override;
void update_register_info() override;
void invert() { m_condition.invert(); }
TypeState propagate_types_internal(const TypeState& input,
@ -432,11 +434,9 @@ class StoreOp : public AtomicOp {
StoreOp(int size, bool is_float, SimpleExpression addr, SimpleAtom value, int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
FormElement* get_as_form(FormPool& pool) const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
@ -459,11 +459,9 @@ class LoadVarOp : public AtomicOp {
LoadVarOp(Kind kind, int size, Variable dst, SimpleExpression src, int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
FormElement* get_as_form(FormPool& pool) const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
@ -509,6 +507,12 @@ class IR2_BranchDelay {
TypeState propagate_types(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) const;
Kind kind() const { return m_kind; }
const Variable& var(int idx) const {
assert(idx < 3);
assert(m_var[idx].has_value());
return m_var[idx].value();
}
private:
std::optional<Variable> m_var[3];
@ -528,15 +532,16 @@ class BranchOp : public AtomicOp {
int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
FormElement* get_as_form(FormPool& pool) const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) override;
const IR2_BranchDelay& branch_delay() const { return m_branch_delay; }
const IR2_Condition& condition() const { return m_condition; }
bool likely() const { return m_likely; }
private:
bool m_likely = false;
@ -561,11 +566,9 @@ class SpecialOp : public AtomicOp {
SpecialOp(Kind kind, int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
FormElement* get_as_form(FormPool& pool) const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
@ -584,11 +587,9 @@ class CallOp : public AtomicOp {
CallOp(int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
FormElement* get_as_form(FormPool& pool) const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
@ -616,11 +617,9 @@ class ConditionalMoveFalseOp : public AtomicOp {
ConditionalMoveFalseOp(Variable dst, Variable src, bool on_zero, int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
FormElement* get_as_form(FormPool& pool) const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,

View file

@ -0,0 +1,68 @@
#include "AtomicOp.h"
#include "Form.h"
namespace decompiler {
ConditionElement* IR2_Condition::get_as_form(FormPool& pool) const {
Form* sources[2] = {nullptr, nullptr};
int n_sources = get_condition_num_args(m_kind);
for (int i = 0; i < n_sources; i++) {
sources[i] = pool.alloc_single_element_form<SimpleAtomElement>(nullptr, m_src[i]);
}
return pool.alloc_element<ConditionElement>(m_kind, sources[0], sources[1]);
}
FormElement* SetVarOp::get_as_form(FormPool& pool) const {
auto source = pool.alloc_single_element_form<SimpleExpressionElement>(nullptr, m_src);
return pool.alloc_element<SetVarElement>(m_dst, source, is_sequence_point());
}
FormElement* AsmOp::get_as_form(FormPool& pool) const {
return pool.alloc_element<AtomicOpElement>(this);
}
FormElement* SetVarConditionOp::get_as_form(FormPool& pool) const {
return pool.alloc_element<SetVarElement>(
m_dst, pool.alloc_single_form(nullptr, m_condition.get_as_form(pool)), is_sequence_point());
}
FormElement* StoreOp::get_as_form(FormPool& pool) const {
return pool.alloc_element<StoreElement>(this);
}
FormElement* LoadVarOp::get_as_form(FormPool& pool) const {
auto source = pool.alloc_single_element_form<SimpleExpressionElement>(nullptr, m_src);
auto load = pool.alloc_single_element_form<LoadSourceElement>(nullptr, source, m_size, m_kind);
return pool.alloc_element<SetVarElement>(m_dst, load, true);
}
FormElement* BranchOp::get_as_form(FormPool& pool) const {
return pool.alloc_element<BranchElement>(this);
}
FormElement* SpecialOp::get_as_form(FormPool& pool) const {
return pool.alloc_element<AtomicOpElement>(this);
}
FormElement* CallOp::get_as_form(FormPool& pool) const {
auto call = pool.alloc_element<FunctionCallElement>(this);
if (m_write_regs.empty() && m_call_type_set == true) {
return call;
} else if (m_write_regs.size() == 1 || !m_call_type_set) {
// this is a little scary in the case that type analysis doesn't run and relies on the fact
// that CallOp falls back to writing v0 in the case where the function type isn't known.
Variable out_var(VariableMode::WRITE, Register(Reg::GPR, Reg::V0), m_my_idx);
return pool.alloc_element<SetVarElement>(out_var, pool.alloc_single_form(nullptr, call), true);
} else {
throw std::runtime_error("CallOp::get_as_expr not yet implemented");
}
}
FormElement* ConditionalMoveFalseOp::get_as_form(FormPool& pool) const {
auto source =
pool.alloc_single_element_form<SimpleAtomElement>(nullptr, SimpleAtom::make_var(m_src));
return pool.alloc_element<ConditionalMoveFalseElement>(m_dst, source, m_on_zero);
}
} // namespace decompiler

View file

@ -748,6 +748,11 @@ TypeState CallOp::propagate_types_internal(const TypeState& input,
m_read_regs.emplace_back(Reg::GPR, arg_regs[i]);
}
m_write_regs.clear();
if (in_type.last_arg() != TypeSpec("none")) {
m_write_regs.emplace_back(Reg::GPR, Reg::V0);
}
return end_types;
}

635
decompiler/IR2/Form.cpp Normal file
View file

@ -0,0 +1,635 @@
#include "Form.h"
#include "decompiler/ObjectFile/LinkedObjectFile.h"
#include "common/goos/PrettyPrinter.h"
namespace decompiler {
///////////////////
// FormPool
///////////////////
FormPool::~FormPool() {
for (auto& x : m_forms) {
delete x;
}
for (auto& x : m_elements) {
delete x;
}
}
///////////////////
// Form
//////////////////
goos::Object Form::to_form(const Env& env) const {
assert(!m_elements.empty());
if (m_elements.size() == 1) {
return m_elements.front()->to_form(env);
} else {
std::vector<goos::Object> forms;
forms.push_back(pretty_print::to_symbol("begin"));
for (auto& x : m_elements) {
forms.push_back(x->to_form(env));
}
return pretty_print::build_list(forms);
}
}
void Form::inline_forms(std::vector<goos::Object>& forms, const Env& env) const {
for (auto& x : m_elements) {
forms.push_back(x->to_form(env));
}
}
void Form::apply(const std::function<void(FormElement*)>& f) {
for (auto& x : m_elements) {
x->apply(f);
}
}
void Form::apply_form(const std::function<void(Form*)>& f) {
f(this);
for (auto& x : m_elements) {
x->apply_form(f);
}
}
/////////////////////////////
// SimpleExpressionElement
/////////////////////////////
SimpleExpressionElement::SimpleExpressionElement(const SimpleExpression& expr) : m_expr(expr) {}
goos::Object SimpleExpressionElement::to_form(const Env& env) const {
return m_expr.to_form(env.file->labels, &env);
}
void SimpleExpressionElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
}
void SimpleExpressionElement::apply_form(const std::function<void(Form*)>&) {}
bool SimpleExpressionElement::is_sequence_point() const {
throw std::runtime_error("Should not check if a SimpleExpressionElement is a sequence point");
}
/////////////////////////////
// SetVarElement
/////////////////////////////
SetVarElement::SetVarElement(const Variable& var, Form* value, bool is_sequence_point)
: m_dst(var), m_src(value), m_is_sequence_point(is_sequence_point) {
value->parent_element = this;
}
goos::Object SetVarElement::to_form(const Env& env) const {
return pretty_print::build_list("set!", m_dst.to_string(&env), m_src->to_form(env));
}
void SetVarElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
m_src->apply(f);
}
void SetVarElement::apply_form(const std::function<void(Form*)>& f) {
m_src->apply_form(f);
}
bool SetVarElement::is_sequence_point() const {
return m_is_sequence_point;
}
/////////////////////////////
// AtomicOpElement
/////////////////////////////
AtomicOpElement::AtomicOpElement(const AtomicOp* op) : m_op(op) {}
goos::Object AtomicOpElement::to_form(const Env& env) const {
return m_op->to_form(env.file->labels, &env);
}
void AtomicOpElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
}
void AtomicOpElement::apply_form(const std::function<void(Form*)>&) {}
/////////////////////////////
// ConditionElement
/////////////////////////////
ConditionElement::ConditionElement(IR2_Condition::Kind kind, Form* src0, Form* src1)
: m_kind(kind) {
m_src[0] = src0;
m_src[1] = src1;
for (int i = 0; i < 2; i++) {
if (m_src[i]) {
m_src[i]->parent_element = this;
}
}
}
goos::Object ConditionElement::to_form(const Env& env) const {
std::vector<goos::Object> forms;
forms.push_back(pretty_print::to_symbol(get_condition_kind_name(m_kind)));
for (int i = 0; i < get_condition_num_args(m_kind); i++) {
forms.push_back(m_src[i]->to_form(env));
}
if (forms.size() > 1) {
return pretty_print::build_list(forms);
} else {
return forms.front();
}
}
void ConditionElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
for (int i = 0; i < 2; i++) {
if (m_src[i]) {
m_src[i]->apply(f);
}
}
}
void ConditionElement::apply_form(const std::function<void(Form*)>& f) {
for (int i = 0; i < 2; i++) {
if (m_src[i]) {
m_src[i]->apply_form(f);
}
}
}
void ConditionElement::invert() {
m_kind = get_condition_opposite(m_kind);
}
/////////////////////////////
// StoreElement
/////////////////////////////
StoreElement::StoreElement(const StoreOp* op) : m_op(op) {}
goos::Object StoreElement::to_form(const Env& env) const {
return m_op->to_form(env.file->labels, &env);
}
void StoreElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
}
void StoreElement::apply_form(const std::function<void(Form*)>&) {}
/////////////////////////////
// LoadSourceElement
/////////////////////////////
LoadSourceElement::LoadSourceElement(Form* addr, int size, LoadVarOp::Kind kind)
: m_addr(addr), m_size(size), m_kind(kind) {
m_addr->parent_element = this;
}
goos::Object LoadSourceElement::to_form(const Env& env) const {
switch (m_kind) {
case LoadVarOp::Kind::FLOAT:
assert(m_size == 4);
return pretty_print::build_list("l.f", m_addr->to_form(env));
case LoadVarOp::Kind::UNSIGNED:
switch (m_size) {
case 1:
return pretty_print::build_list("l.bu", m_addr->to_form(env));
case 2:
return pretty_print::build_list("l.hu", m_addr->to_form(env));
case 4:
return pretty_print::build_list("l.wu", m_addr->to_form(env));
case 8:
return pretty_print::build_list("l.d", m_addr->to_form(env));
default:
assert(false);
}
break;
case LoadVarOp::Kind::SIGNED:
switch (m_size) {
case 1:
return pretty_print::build_list("l.b", m_addr->to_form(env));
case 2:
return pretty_print::build_list("l.h", m_addr->to_form(env));
case 4:
return pretty_print::build_list("l.w", m_addr->to_form(env));
default:
assert(false);
}
break;
default:
assert(false);
}
}
void LoadSourceElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
m_addr->apply(f);
}
void LoadSourceElement::apply_form(const std::function<void(Form*)>& f) {
m_addr->apply_form(f);
}
/////////////////////////////
// SimpleAtomElement
/////////////////////////////
SimpleAtomElement::SimpleAtomElement(const SimpleAtom& atom) : m_atom(atom) {}
goos::Object SimpleAtomElement::to_form(const Env& env) const {
return m_atom.to_form(env.file->labels, &env);
}
void SimpleAtomElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
}
void SimpleAtomElement::apply_form(const std::function<void(Form*)>&) {}
/////////////////////////////
// FunctionCallElement
/////////////////////////////
FunctionCallElement::FunctionCallElement(const CallOp* op) : m_op(op) {}
goos::Object FunctionCallElement::to_form(const Env& env) const {
return m_op->to_form(env.file->labels, &env);
}
void FunctionCallElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
}
void FunctionCallElement::apply_form(const std::function<void(Form*)>&) {}
/////////////////////////////
// BranchElement
/////////////////////////////
BranchElement::BranchElement(const BranchOp* op) : m_op(op) {}
goos::Object BranchElement::to_form(const Env& env) const {
return m_op->to_form(env.file->labels, &env);
}
void BranchElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
}
void BranchElement::apply_form(const std::function<void(Form*)>&) {}
/////////////////////////////
// ReturnElement
/////////////////////////////
goos::Object ReturnElement::to_form(const Env& env) const {
std::vector<goos::Object> forms;
forms.push_back(pretty_print::to_symbol("return"));
forms.push_back(pretty_print::build_list(return_code->to_form(env)));
forms.push_back(pretty_print::build_list(dead_code->to_form(env)));
return pretty_print::build_list(forms);
}
void ReturnElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
return_code->apply(f);
dead_code->apply(f);
}
void ReturnElement::apply_form(const std::function<void(Form*)>& f) {
return_code->apply_form(f);
dead_code->apply_form(f);
}
/////////////////////////////
// BreakElement
/////////////////////////////
goos::Object BreakElement::to_form(const Env& env) const {
std::vector<goos::Object> forms;
forms.push_back(pretty_print::to_symbol("break"));
forms.push_back(pretty_print::build_list(return_code->to_form(env)));
forms.push_back(pretty_print::build_list(dead_code->to_form(env)));
return pretty_print::build_list(forms);
}
void BreakElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
return_code->apply(f);
dead_code->apply(f);
}
void BreakElement::apply_form(const std::function<void(Form*)>& f) {
return_code->apply_form(f);
dead_code->apply_form(f);
}
/////////////////////////////
// CondWithElseElement
/////////////////////////////
goos::Object CondWithElseElement::to_form(const Env& env) const {
// for now we only turn it into an if statement if both cases won't require a begin at the top
// level. I think it is more common to write these as a two-case cond instead of an if with begin.
if (entries.size() == 1 && entries.front().body->is_single_element() &&
else_ir->is_single_element()) {
std::vector<goos::Object> list;
list.push_back(pretty_print::to_symbol("if"));
list.push_back(entries.front().condition->to_form(env));
list.push_back(entries.front().body->to_form(env));
list.push_back(else_ir->to_form(env));
return pretty_print::build_list(list);
} else {
std::vector<goos::Object> list;
list.push_back(pretty_print::to_symbol("cond"));
for (auto& e : entries) {
std::vector<goos::Object> entry;
entry.push_back(e.condition->to_form(env));
e.body->inline_forms(entry, env);
list.push_back(pretty_print::build_list(entry));
}
std::vector<goos::Object> else_form;
else_form.push_back(pretty_print::to_symbol("else"));
else_ir->inline_forms(else_form, env);
list.push_back(pretty_print::build_list(else_form));
return pretty_print::build_list(list);
}
}
void CondWithElseElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
for (auto& entry : entries) {
entry.condition->apply(f);
entry.body->apply(f);
}
else_ir->apply(f);
}
void CondWithElseElement::apply_form(const std::function<void(Form*)>& f) {
for (auto& entry : entries) {
entry.condition->apply_form(f);
entry.body->apply_form(f);
}
else_ir->apply_form(f);
}
/////////////////////////////
// EmptyElement
/////////////////////////////
goos::Object EmptyElement::to_form(const Env& env) const {
return pretty_print::build_list("empty");
}
void EmptyElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
}
void EmptyElement::apply_form(const std::function<void(Form*)>&) {}
/////////////////////////////
// WhileElement
/////////////////////////////
void WhileElement::apply(const std::function<void(FormElement*)>& f) {
// note - this is done in program order, rather than print order. Not sure if this makes sense.
f(this);
body->apply(f);
condition->apply(f);
}
goos::Object WhileElement::to_form(const Env& env) const {
std::vector<goos::Object> list;
list.push_back(pretty_print::to_symbol("while"));
list.push_back(condition->to_form(env));
body->inline_forms(list, env);
return pretty_print::build_list(list);
}
void WhileElement::apply_form(const std::function<void(Form*)>& f) {
body->apply_form(f);
condition->apply_form(f);
}
/////////////////////////////
// UntilElement
/////////////////////////////
void UntilElement::apply(const std::function<void(FormElement*)>& f) {
// note - this is done in program order, rather than print order. Not sure if this makes sense.
f(this);
body->apply(f);
condition->apply(f);
}
goos::Object UntilElement::to_form(const Env& env) const {
std::vector<goos::Object> list;
list.push_back(pretty_print::to_symbol("until"));
list.push_back(condition->to_form(env));
body->inline_forms(list, env);
return pretty_print::build_list(list);
}
void UntilElement::apply_form(const std::function<void(Form*)>& f) {
body->apply_form(f);
condition->apply_form(f);
}
/////////////////////////////
// ShortCircuitElement
/////////////////////////////
void ShortCircuitElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
for (auto& x : entries) {
x.condition->apply(f);
// if (x.output) {
// // not sure about this...
// x.output->apply(f);
// }
}
}
void ShortCircuitElement::apply_form(const std::function<void(Form*)>& f) {
for (auto& x : entries) {
x.condition->apply_form(f);
// if (x.output) {
// // not sure about this...
// x.output->apply(f);
// }
}
}
goos::Object ShortCircuitElement::to_form(const Env& env) const {
std::vector<goos::Object> forms;
switch (kind) {
case UNKNOWN:
forms.push_back(pretty_print::to_symbol("unknown-sc"));
break;
case AND:
forms.push_back(pretty_print::to_symbol("and"));
break;
case OR:
forms.push_back(pretty_print::to_symbol("or"));
break;
default:
assert(false);
}
for (auto& x : entries) {
forms.push_back(x.condition->to_form(env));
}
return pretty_print::build_list(forms);
}
/////////////////////////////
// ShortCircuitElement
/////////////////////////////
goos::Object CondNoElseElement::to_form(const Env& env) const {
if (entries.size() == 1 && entries.front().body->is_single_element()) {
// print as an if statement if we can put the body in a single form.
std::vector<goos::Object> list;
list.push_back(pretty_print::to_symbol("if"));
list.push_back(entries.front().condition->to_form(env));
list.push_back(entries.front().body->to_form(env));
return pretty_print::build_list(list);
} else if (entries.size() == 1) {
// turn into a when if the body requires multiple forms
// todo check to see if the condition starts with a NOT and this can be simplified to an
// unless.
std::vector<goos::Object> list;
list.push_back(pretty_print::to_symbol("when"));
list.push_back(entries.front().condition->to_form(env));
entries.front().body->inline_forms(list, env);
return pretty_print::build_list(list);
} else {
std::vector<goos::Object> list;
list.push_back(pretty_print::to_symbol("cond"));
for (auto& e : entries) {
std::vector<goos::Object> entry;
entry.push_back(e.condition->to_form(env));
entries.front().body->inline_forms(list, env);
list.push_back(pretty_print::build_list(entry));
}
return pretty_print::build_list(list);
}
}
void CondNoElseElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
for (auto& e : entries) {
e.condition->apply(f);
e.body->apply(f);
}
}
void CondNoElseElement::apply_form(const std::function<void(Form*)>& f) {
for (auto& e : entries) {
e.condition->apply_form(f);
e.body->apply_form(f);
}
}
/////////////////////////////
// AbsElement
/////////////////////////////
AbsElement::AbsElement(Form* _source) : source(_source) {
source->parent_element = this;
}
goos::Object AbsElement::to_form(const Env& env) const {
return pretty_print::build_list("abs", source->to_form(env));
}
void AbsElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
source->apply(f);
}
void AbsElement::apply_form(const std::function<void(Form*)>& f) {
source->apply_form(f);
}
/////////////////////////////
// AshElement
/////////////////////////////
AshElement::AshElement(Form* _shift_amount,
Form* _value,
std::optional<Variable> _clobber,
bool _is_signed)
: shift_amount(_shift_amount), value(_value), clobber(_clobber), is_signed(_is_signed) {
_shift_amount->parent_element = this;
_value->parent_element = this;
}
goos::Object AshElement::to_form(const Env& env) const {
return pretty_print::build_list(pretty_print::to_symbol(is_signed ? "ash.si" : "ash.ui"),
value->to_form(env), shift_amount->to_form(env));
}
void AshElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
shift_amount->apply(f);
value->apply(f);
}
void AshElement::apply_form(const std::function<void(Form*)>& f) {
shift_amount->apply_form(f);
value->apply_form(f);
}
/////////////////////////////
// TypeOfElement
/////////////////////////////
TypeOfElement::TypeOfElement(Form* _value, std::optional<Variable> _clobber)
: value(_value), clobber(_clobber) {
value->parent_element = this;
}
goos::Object TypeOfElement::to_form(const Env& env) const {
return pretty_print::build_list("type-of", value->to_form(env));
}
void TypeOfElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
value->apply(f);
}
void TypeOfElement::apply_form(const std::function<void(Form*)>& f) {
value->apply_form(f);
}
/////////////////////////////
// ConditionalMoveFalseElement
/////////////////////////////
ConditionalMoveFalseElement::ConditionalMoveFalseElement(Variable _dest,
Form* _source,
bool _on_zero)
: dest(_dest), source(_source), on_zero(_on_zero) {
source->parent_element = this;
}
goos::Object ConditionalMoveFalseElement::to_form(const Env& env) const {
return pretty_print::build_list(on_zero ? "cmove-#f-zero" : "cmove-#f-nonzero",
dest.to_string(&env), source->to_form(env));
}
void ConditionalMoveFalseElement::apply(const std::function<void(FormElement*)>& f) {
f(this);
source->apply(f);
}
void ConditionalMoveFalseElement::apply_form(const std::function<void(Form*)>& f) {
source->apply_form(f);
}
} // namespace decompiler

432
decompiler/IR2/Form.h Normal file
View file

@ -0,0 +1,432 @@
#pragma once
#include <vector>
#include <unordered_set>
#include <memory>
#include <functional>
#include "decompiler/Disasm/Register.h"
#include "decompiler/IR2/AtomicOp.h"
#include "common/goos/Object.h"
namespace decompiler {
class Form;
class Env;
/*!
* A "FormElement" represents a single LISP form that's not a begin.
* This is a abstract base class that all types of forms should be based on.
*/
class FormElement {
public:
Form* parent_form = nullptr;
virtual goos::Object to_form(const Env& env) const = 0;
virtual ~FormElement() = default;
virtual void apply(const std::function<void(FormElement*)>& f) = 0;
virtual void apply_form(const std::function<void(Form*)>& f) = 0;
virtual bool is_sequence_point() const { return true; }
protected:
friend class Form;
};
/*!
* A SimpleExpressionElement is a form which has the value of a SimpleExpression.
* Like a SimpleExpression, it has no side effects.
*/
class SimpleExpressionElement : public FormElement {
public:
explicit SimpleExpressionElement(const SimpleExpression& expr);
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
bool is_sequence_point() const override;
const SimpleExpression& expr() const { return m_expr; }
private:
SimpleExpression m_expr;
};
/*!
* Represents storing a value into memory.
* Because a value can be propagated "into" the source value, this will have to be special cased
* in expression propagation.
*/
class StoreElement : public FormElement {
public:
explicit StoreElement(const StoreOp* op);
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
private:
// todo - we may eventually want to use a different representation for more
// complicated store paths.
const StoreOp* m_op;
};
/*!
* Representing a value loaded from memory.
* Unclear if this should have some common base with store?
*/
class LoadSourceElement : public FormElement {
public:
LoadSourceElement(Form* addr, int size, LoadVarOp::Kind kind);
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
int size() const { return m_size; }
LoadVarOp::Kind kind() const { return m_kind; }
const Form* location() const { return m_addr; }
private:
Form* m_addr = nullptr;
int m_size = -1;
LoadVarOp::Kind m_kind;
};
class SimpleAtomElement : public FormElement {
public:
explicit SimpleAtomElement(const SimpleAtom& var);
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
private:
SimpleAtom m_atom;
};
/*!
* Set a variable to a Form. This is the set! form to be used for expression building.
*/
class SetVarElement : public FormElement {
public:
SetVarElement(const Variable& var, Form* value, bool is_sequence_point);
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
bool is_sequence_point() const override;
const Variable& dst() const { return m_dst; }
const Form* src() const { return m_src; }
private:
Variable m_dst;
Form* m_src = nullptr;
bool m_is_sequence_point = true;
};
class AtomicOpElement : public FormElement {
public:
explicit AtomicOpElement(const AtomicOp* op);
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
private:
const AtomicOp* m_op;
};
class ConditionElement : public FormElement {
public:
ConditionElement(IR2_Condition::Kind kind, Form* src0, Form* src1);
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
void invert();
private:
IR2_Condition::Kind m_kind;
Form* m_src[2] = {nullptr, nullptr};
};
class FunctionCallElement : public FormElement {
public:
explicit FunctionCallElement(const CallOp* op);
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
private:
const CallOp* m_op;
};
class BranchElement : public FormElement {
public:
explicit BranchElement(const BranchOp* op);
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
const BranchOp* op() const { return m_op; }
private:
const BranchOp* m_op;
};
class ReturnElement : public FormElement {
public:
Form* return_code = nullptr;
Form* dead_code = nullptr;
ReturnElement(Form* _return_code, Form* _dead_code)
: return_code(_return_code), dead_code(_dead_code) {}
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
};
class BreakElement : public FormElement {
public:
Form* return_code = nullptr;
Form* dead_code = nullptr;
BreakElement(Form* _return_code, Form* _dead_code)
: return_code(_return_code), dead_code(_dead_code) {}
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
};
class CondWithElseElement : public FormElement {
public:
struct Entry {
Form* condition = nullptr;
Form* body = nullptr;
bool cleaned = false;
};
std::vector<Entry> entries;
Form* else_ir = nullptr;
CondWithElseElement(std::vector<Entry> _entries, Form* _else_ir)
: entries(std::move(_entries)), else_ir(_else_ir) {}
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
};
class EmptyElement : public FormElement {
public:
EmptyElement() = default;
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
};
class WhileElement : public FormElement {
public:
WhileElement(Form* _condition, Form* _body) : condition(_condition), body(_body) {}
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
Form* condition = nullptr;
Form* body = nullptr;
bool cleaned = false;
};
class UntilElement : public FormElement {
public:
UntilElement(Form* _condition, Form* _body) : condition(_condition), body(_body) {}
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
Form* condition = nullptr;
Form* body = nullptr;
};
class ShortCircuitElement : public FormElement {
public:
struct Entry {
Form* condition = nullptr;
// in the case where there's no else, each delay slot will write #f to the "output" register.
// this can be with an or <output>, s7, r0
Form* output = nullptr;
bool is_output_trick = false;
bool cleaned = false;
};
enum Kind { UNKNOWN, AND, OR } kind = UNKNOWN;
Variable final_result;
std::vector<Entry> entries;
std::optional<bool> used_as_value = std::nullopt;
explicit ShortCircuitElement(std::vector<Entry> _entries) : entries(std::move(_entries)) {}
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
};
class CondNoElseElement : public FormElement {
public:
struct Entry {
Form* condition = nullptr;
Form* body = nullptr;
std::optional<Variable> false_destination;
FormElement* original_condition_branch = nullptr;
bool cleaned = false;
};
Register final_destination;
bool used_as_value = false;
std::vector<Entry> entries;
explicit CondNoElseElement(std::vector<Entry> _entries) : entries(std::move(_entries)) {}
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
};
class AbsElement : public FormElement {
public:
explicit AbsElement(Form* _source);
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
Form* source = nullptr;
};
class AshElement : public FormElement {
public:
Form* shift_amount = nullptr;
Form* value = nullptr;
std::optional<Variable> clobber;
bool is_signed = true;
AshElement(Form* _shift_amount, Form* _value, std::optional<Variable> _clobber, bool _is_signed);
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
};
class TypeOfElement : public FormElement {
public:
Form* value;
std::optional<Variable> clobber;
TypeOfElement(Form* _value, std::optional<Variable> _clobber);
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
};
class ConditionalMoveFalseElement : public FormElement {
public:
Variable dest;
Form* source = nullptr;
bool on_zero = false;
ConditionalMoveFalseElement(Variable _dest, Form* _source, bool _on_zero);
goos::Object to_form(const Env& env) const override;
void apply(const std::function<void(FormElement*)>& f) override;
void apply_form(const std::function<void(Form*)>& f) override;
};
/*!
* A Form is a wrapper around one or more FormElements.
* This is done for two reasons:
* - Easier to "inline" begins, prevents stupid nesting of begins.
* - Easier to manage ownership.
*/
class Form {
public:
Form() = default;
Form(FormElement* parent, FormElement* single_child)
: parent_element(parent), m_elements({single_child}) {
single_child->parent_form = this;
}
Form(FormElement* parent, const std::vector<FormElement*> sequence)
: parent_element(parent), m_elements(sequence) {
for (auto& x : sequence) {
x->parent_form = this;
}
}
FormElement* try_as_single_element() const {
if (is_single_element()) {
return m_elements.front();
}
return nullptr;
}
bool is_single_element() const { return m_elements.size() == 1; }
FormElement* operator[](int idx) { return m_elements.at(idx); }
FormElement* at(int idx) { return m_elements.at(idx); }
const FormElement* operator[](int idx) const { return m_elements.at(idx); }
int size() const { return int(m_elements.size()); }
FormElement* back() const {
assert(!m_elements.empty());
return m_elements.back();
}
FormElement** back_ref() {
assert(!m_elements.empty());
return &m_elements.back();
}
void pop_back() {
assert(!m_elements.empty());
m_elements.pop_back();
}
const std::vector<FormElement*>& elts() const { return m_elements; }
std::vector<FormElement*>& elts() { return m_elements; }
void push_back(FormElement* elt) { m_elements.push_back(elt); }
goos::Object to_form(const Env& env) const;
void inline_forms(std::vector<goos::Object>& forms, const Env& env) const;
void apply(const std::function<void(FormElement*)>& f);
void apply_form(const std::function<void(Form*)>& f);
FormElement* parent_element = nullptr;
private:
std::vector<FormElement*> m_elements;
};
/*!
* A FormPool is used to allocate forms and form elements.
* It will clean up everything when it is destroyed.
* As a result, you don't need to worry about deleting / referencing counting when manipulating
* a Form graph.
*/
class FormPool {
public:
template <typename T, class... Args>
T* alloc_element(Args&&... args) {
auto elt = new T(std::forward<Args>(args)...);
m_elements.emplace_back(elt);
return elt;
}
template <typename T, class... Args>
Form* alloc_single_element_form(FormElement* parent, Args&&... args) {
auto elt = new T(std::forward<Args>(args)...);
m_elements.emplace_back(elt);
auto form = alloc_single_form(parent, elt);
return form;
}
Form* alloc_single_form(FormElement* parent, FormElement* elt) {
auto form = new Form(parent, elt);
m_forms.push_back(form);
return form;
}
Form* alloc_sequence_form(FormElement* parent, const std::vector<FormElement*> sequence) {
auto form = new Form(parent, sequence);
m_forms.push_back(form);
return form;
}
Form* acquire(std::unique_ptr<Form> form_ptr) {
Form* form = form_ptr.release();
m_forms.push_back(form);
return form;
}
Form* alloc_empty_form() {
Form* form = new Form;
m_forms.push_back(form);
return form;
}
~FormPool();
private:
std::vector<Form*> m_forms;
std::vector<FormElement*> m_elements;
};
} // namespace decompiler

View file

@ -1,4 +1,4 @@
#include "AtomicOpBuilder.h"
#include "atomic_op_builder.h"
#include <memory>
#include "common/log/log.h"

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,7 @@
#pragma once
#include "decompiler/Function/Function.h"
namespace decompiler {
void build_initial_forms(Function& function);
}

View file

@ -73,6 +73,7 @@ class ObjectFileDB {
void ir2_type_analysis_pass();
void ir2_register_usage_pass();
void ir2_variable_pass();
void ir2_cfg_build_pass();
void ir2_write_results(const std::string& output_dir);
std::string ir2_to_file(ObjectFileData& data);
std::string ir2_function_to_string(ObjectFileData& data, Function& function, int seg);

View file

@ -10,6 +10,8 @@
#include "decompiler/Function/TypeInspector.h"
#include "decompiler/IR2/reg_usage.h"
#include "decompiler/IR2/variable_naming.h"
#include "decompiler/IR2/cfg_builder.h"
#include "common/goos/PrettyPrinter.h"
namespace decompiler {
@ -32,6 +34,8 @@ void ObjectFileDB::analyze_functions_ir2(const std::string& output_dir) {
ir2_register_usage_pass();
lg::info("Variable analysis...");
ir2_variable_pass();
lg::info("Initial conversion to Form...");
ir2_cfg_build_pass();
lg::info("Writing results...");
ir2_write_results(output_dir);
}
@ -307,7 +311,7 @@ void ObjectFileDB::ir2_variable_pass() {
for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) {
(void)segment_id;
(void)data;
if (!func.suspected_asm && func.ir2.atomic_ops_succeeded) {
if (!func.suspected_asm && func.ir2.atomic_ops_succeeded && func.ir2.env.has_type_analysis()) {
try {
attempted++;
auto result = run_variable_renaming(func, func.ir2.reg_use, *func.ir2.atomic_ops, dts);
@ -324,6 +328,28 @@ void ObjectFileDB::ir2_variable_pass() {
attempted, timer.getMs());
}
void ObjectFileDB::ir2_cfg_build_pass() {
Timer timer;
int total = 0;
int attempted = 0;
int successful = 0;
for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) {
(void)segment_id;
(void)data;
total++;
if (!func.suspected_asm && func.ir2.atomic_ops_succeeded && func.cfg->is_fully_resolved()) {
attempted++;
build_initial_forms(func);
}
if (func.ir2.top_form) {
successful++;
}
});
lg::info("{}/{}/{} cfg build in {:.2f} ms\n", successful, attempted, total, timer.getMs());
}
void ObjectFileDB::ir2_write_results(const std::string& output_dir) {
Timer timer;
lg::info("Writing IR2 results to file...");
@ -358,6 +384,11 @@ std::string ObjectFileDB::ir2_to_file(ObjectFileData& data) {
// functions
for (auto& func : data.linked_data.functions_by_seg.at(seg)) {
result += ir2_function_to_string(data, func, seg);
if (func.ir2.top_form) {
result += '\n';
result += pretty_print::to_string(func.ir2.top_form->to_form(func.ir2.env));
result += '\n';
}
}
// print data

View file

@ -1,6 +1,6 @@
#include "gtest/gtest.h"
#include "decompiler/IR2/AtomicOp.h"
#include "decompiler/IR2/AtomicOpBuilder.h"
#include "decompiler/IR2/atomic_op_builder.h"
#include "decompiler/Disasm/InstructionParser.h"
#include "third-party/fmt/core.h"
#include "third-party/fmt/format.h"
@ -67,7 +67,7 @@ void test_case(std::string assembly_lines,
// the ordering of the registers doesn't matter. It could happen to be in the same order
// as the opcode here, but it may not always be the case.
bool found = false;
for (const std::string reg : write_regs.at(i)) {
for (const std::string& reg : write_regs.at(i)) {
// TODO - is there a potential bug here in the event that either list has duplicate
// registers?
if (reg == expected_reg) {
@ -86,7 +86,7 @@ void test_case(std::string assembly_lines,
// the ordering of the registers doesn't matter. It could happen to be in the same order
// as the opcode here, but it may not always be the case.
bool found = false;
for (const std::string reg : read_regs.at(i)) {
for (const std::string& reg : read_regs.at(i)) {
// TODO - is there a potential bug here in the event that either list has duplicate
// registers?
if (reg == expected_reg) {
@ -104,7 +104,7 @@ void test_case(std::string assembly_lines,
// the ordering of the registers doesn't matter. It could happen to be in the same order
// as the opcode here, but it may not always be the case.
bool found = false;
for (const std::string reg : clobbered_regs.at(i)) {
for (const std::string& reg : clobbered_regs.at(i)) {
// TODO - is there a potential bug here in the event that either list has duplicate
// registers?
if (reg == expected_reg) {
@ -417,7 +417,7 @@ TEST(DecompilerAtomicOpBuilder, DSUBU_DADDIU_MOVZ) {
}
TEST(DecompilerAtomicOpBuilder, JALR_SLL) {
test_case(assembly_from_list({"jalr ra, t9", "sll v0, ra, 0"}), {"(call!)"}, {{}}, {{"t9"}},
test_case(assembly_from_list({"jalr ra, t9", "sll v0, ra, 0"}), {"(call!)"}, {{"v0"}}, {{"t9"}},
{{"a0", "a1", "a2", "a3", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9",
"at", "v1"}});
}