From 2f722e6379f6816a8166fae776c9d411c2915dbd Mon Sep 17 00:00:00 2001 From: water111 <48171810+water111@users.noreply.github.com> Date: Sun, 24 Jan 2021 16:39:15 -0500 Subject: [PATCH] [Decompiler] Expression Building (#211) * up to ash * add more expressions * fix some return variable usage nonsense * bfloat print working * basic-type working * type working, fix decompiler on all files * clang format --- decompiler/Function/BasicBlocks.h | 4 - decompiler/Function/Function.cpp | 7 +- decompiler/Function/Function.h | 4 +- decompiler/IR2/AtomicOp.cpp | 19 + decompiler/IR2/AtomicOp.h | 33 +- decompiler/IR2/AtomicOpForm.cpp | 117 ++++- decompiler/IR2/AtomicOpTypeAnalysis.cpp | 38 +- decompiler/IR2/Env.h | 26 +- decompiler/IR2/Form.cpp | 253 ++++++++--- decompiler/IR2/Form.h | 117 ++++- decompiler/IR2/FormExpressionAnalysis.cpp | 349 ++++++++++++++- decompiler/IR2/FormStack.cpp | 7 +- decompiler/IR2/FormStack.h | 2 +- decompiler/IR2/IR2_common.h | 46 +- decompiler/ObjectFile/ObjectFileDB_IR2.cpp | 14 +- decompiler/analysis/atomic_op_builder.cpp | 5 +- decompiler/analysis/cfg_builder.cpp | 73 ++-- decompiler/analysis/cfg_builder.h | 5 +- decompiler/analysis/expression_build.cpp | 20 +- decompiler/analysis/expression_build.h | 6 +- decompiler/analysis/reg_usage.cpp | 47 +- decompiler/analysis/variable_naming.cpp | 2 + decompiler/analysis/variable_naming.h | 2 +- decompiler/util/TP_Type.cpp | 2 +- decompiler/util/TP_Type.h | 3 +- test/decompiler/FormRegressionTest.cpp | 37 +- .../decompiler/test_FormBeforeExpressions.cpp | 48 +-- test/decompiler/test_FormExpressionBuild.cpp | 401 +++++++++++++++++- 28 files changed, 1440 insertions(+), 247 deletions(-) diff --git a/decompiler/Function/BasicBlocks.h b/decompiler/Function/BasicBlocks.h index c8ae14c62..5f74c65e8 100644 --- a/decompiler/Function/BasicBlocks.h +++ b/decompiler/Function/BasicBlocks.h @@ -3,9 +3,6 @@ #include #include -#include "CfgVtx.h" -#include "decompiler/util/DecompilerTypeSystem.h" -#include "decompiler/util/TP_Type.h" // for RegSet: #include "decompiler/analysis/reg_usage.h" @@ -16,7 +13,6 @@ class Function; struct BasicBlock { int start_word; int end_word; - TypeState init_types; // [start, end) int start_basic_op = -1; diff --git a/decompiler/Function/Function.cpp b/decompiler/Function/Function.cpp index 7048cf711..bbc6a300a 100644 --- a/decompiler/Function/Function.cpp +++ b/decompiler/Function/Function.cpp @@ -7,6 +7,7 @@ #include "decompiler/util/DecompilerTypeSystem.h" #include "TypeInspector.h" #include "decompiler/IR/IR.h" +#include "decompiler/IR2/Form.h" namespace decompiler { namespace { @@ -43,7 +44,11 @@ uint32_t align4(uint32_t in) { } // namespace -Function::Function(int _start_word, int _end_word) : start_word(_start_word), end_word(_end_word) {} +Function::Function(int _start_word, int _end_word) : start_word(_start_word), end_word(_end_word) { + ir2.form_pool.reset(new FormPool()); +} + +Function::~Function() {} /*! * Remove the function prologue from the first basic block and populate this->prologue with info. diff --git a/decompiler/Function/Function.h b/decompiler/Function/Function.h index a8e303e99..39d3154c7 100644 --- a/decompiler/Function/Function.h +++ b/decompiler/Function/Function.h @@ -15,7 +15,6 @@ #include "CfgVtx.h" #include "common/type_system/TypeSpec.h" #include "decompiler/config.h" -#include "decompiler/IR2/Form.h" namespace decompiler { class DecompilerTypeSystem; @@ -77,6 +76,7 @@ struct FunctionName { class Function { public: Function(int _start_word, int _end_word); + ~Function(); void analyze_prologue(const LinkedObjectFile& file); void find_global_function_defs(LinkedObjectFile& file, DecompilerTypeSystem& dts); void find_method_defs(LinkedObjectFile& file, DecompilerTypeSystem& dts); @@ -161,7 +161,7 @@ class Function { bool atomic_ops_succeeded = false; std::shared_ptr atomic_ops = nullptr; Env env; - FormPool form_pool; + std::shared_ptr form_pool = nullptr; Form* top_form = nullptr; std::string debug_form_string; bool print_debug_forms = false; diff --git a/decompiler/IR2/AtomicOp.cpp b/decompiler/IR2/AtomicOp.cpp index e7da60148..4258c3043 100644 --- a/decompiler/IR2/AtomicOp.cpp +++ b/decompiler/IR2/AtomicOp.cpp @@ -1,6 +1,7 @@ #include #include #include +#include "common/goal_constants.h" #include "third-party/fmt/core.h" #include "common/goos/PrettyPrinter.h" #include "decompiler/ObjectFile/LinkedObjectFile.h" @@ -1288,4 +1289,22 @@ void ConditionalMoveFalseOp::collect_vars(VariableSet& vars) const { vars.insert(m_dst); vars.insert(m_src); } + +bool get_as_reg_offset(const SimpleExpression& expr, IR2_RegOffset* out) { + if (expr.kind() == SimpleExpression::Kind::ADD && expr.get_arg(0).is_var() && + expr.get_arg(1).is_int()) { + out->var = expr.get_arg(0).var(); + out->reg = expr.get_arg(0).var().reg(); + out->offset = expr.get_arg(1).get_int(); + return true; + } + + if (expr.is_identity() && expr.get_arg(0).is_var()) { + out->var = expr.get_arg(0).var(); + out->reg = expr.get_arg(0).var().reg(); + out->offset = 0; + return true; + } + return false; +} } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/AtomicOp.h b/decompiler/IR2/AtomicOp.h index 02b7b0664..3600db48d 100644 --- a/decompiler/IR2/AtomicOp.h +++ b/decompiler/IR2/AtomicOp.h @@ -61,7 +61,7 @@ class AtomicOp { // convert me to an expression. If I'm a set!, this will produce a (set! x y), which may be // undesirable when expression stacking. - virtual FormElement* get_as_form(FormPool& pool) const = 0; + virtual FormElement* get_as_form(FormPool& pool, const Env& env) const = 0; // figure out what registers are read and written in this AtomicOp and update read_regs, // write_regs, and clobber_regs. It's expected that these have duplicates if a register appears @@ -256,7 +256,7 @@ class SetVarOp : public AtomicOp { bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - FormElement* get_as_form(FormPool& pool) const override; + FormElement* get_as_form(FormPool& pool, const Env& env) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, @@ -282,7 +282,7 @@ class AsmOp : public AtomicOp { bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - FormElement* get_as_form(FormPool& pool) const override; + FormElement* get_as_form(FormPool& pool, const Env& env) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, @@ -352,7 +352,7 @@ class IR2_Condition { void get_regs(std::vector* out) const; Kind kind() const { return m_kind; } const SimpleAtom& src(int i) const { return m_src[i]; } - ConditionElement* get_as_form(FormPool& pool) const; + ConditionElement* get_as_form(FormPool& pool, const Env& env, int my_idx) const; void collect_vars(VariableSet& vars) const; private: @@ -374,7 +374,7 @@ class SetVarConditionOp : public AtomicOp { bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - FormElement* get_as_form(FormPool& pool) const override; + FormElement* get_as_form(FormPool& pool, const Env& env) const override; void update_register_info() override; void invert() { m_condition.invert(); } TypeState propagate_types_internal(const TypeState& input, @@ -399,7 +399,7 @@ class StoreOp : public AtomicOp { bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - FormElement* get_as_form(FormPool& pool) const override; + FormElement* get_as_form(FormPool& pool, const Env& env) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, @@ -425,7 +425,7 @@ class LoadVarOp : public AtomicOp { bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - FormElement* get_as_form(FormPool& pool) const override; + FormElement* get_as_form(FormPool& pool, const Env& env) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, @@ -500,7 +500,7 @@ class BranchOp : public AtomicOp { bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - FormElement* get_as_form(FormPool& pool) const override; + FormElement* get_as_form(FormPool& pool, const Env& env) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, @@ -508,7 +508,7 @@ class BranchOp : public AtomicOp { void collect_vars(VariableSet& vars) const override; const IR2_BranchDelay& branch_delay() const { return m_branch_delay; } const IR2_Condition& condition() const { return m_condition; } - ConditionElement* get_condition_as_form(FormPool& pool) const; + ConditionElement* get_condition_as_form(FormPool& pool, const Env& env) const; bool likely() const { return m_likely; } private: @@ -536,7 +536,7 @@ class SpecialOp : public AtomicOp { bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - FormElement* get_as_form(FormPool& pool) const override; + FormElement* get_as_form(FormPool& pool, const Env& env) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, @@ -558,12 +558,14 @@ class CallOp : public AtomicOp { bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - FormElement* get_as_form(FormPool& pool) const override; + FormElement* get_as_form(FormPool& pool, const Env& env) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, DecompilerTypeSystem& dts) override; void collect_vars(VariableSet& vars) const override; + const std::vector& arg_vars() const { return m_arg_vars; } + Variable function_var() const { return m_function_var; } protected: TypeSpec m_call_type; @@ -593,7 +595,7 @@ class ConditionalMoveFalseOp : public AtomicOp { bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - FormElement* get_as_form(FormPool& pool) const override; + FormElement* get_as_form(FormPool& pool, const Env& env) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, @@ -604,4 +606,11 @@ class ConditionalMoveFalseOp : public AtomicOp { Variable m_dst, m_src; bool m_on_zero; }; + +struct IR2_RegOffset { + Register reg; + Variable var; + int offset; +}; +bool get_as_reg_offset(const SimpleExpression& expr, IR2_RegOffset* out); } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/AtomicOpForm.cpp b/decompiler/IR2/AtomicOpForm.cpp index bb82fee00..cdcc3b3aa 100644 --- a/decompiler/IR2/AtomicOpForm.cpp +++ b/decompiler/IR2/AtomicOpForm.cpp @@ -1,59 +1,140 @@ #include "AtomicOp.h" #include "Form.h" +#include "common/type_system/TypeSystem.h" +#include "decompiler/util/DecompilerTypeSystem.h" +#include "decompiler/ObjectFile/LinkedObjectFile.h" namespace decompiler { -ConditionElement* BranchOp::get_condition_as_form(FormPool& pool) const { - return m_condition.get_as_form(pool); +namespace { +RegClass get_reg_kind(const Register& r) { + switch (r.get_kind()) { + case Reg::GPR: + return RegClass::GPR_64; + case Reg::FPR: + return RegClass::FLOAT; + default: + assert(false); + } } -ConditionElement* IR2_Condition::get_as_form(FormPool& pool) const { - Form* sources[2] = {nullptr, nullptr}; - int n_sources = get_condition_num_args(m_kind); - for (int i = 0; i < n_sources; i++) { - sources[i] = pool.alloc_single_element_form(nullptr, m_src[i]); +DerefToken to_token(FieldReverseLookupOutput::Token in) { + switch (in.kind) { + case FieldReverseLookupOutput::Token::Kind::FIELD: + return DerefToken::make_field_name(in.name); + case FieldReverseLookupOutput::Token::Kind::CONSTANT_IDX: + return DerefToken::make_int_constant(in.idx); + default: + assert(false); + } +} +} // namespace + +ConditionElement* BranchOp::get_condition_as_form(FormPool& pool, const Env& env) const { + return m_condition.get_as_form(pool, env, m_my_idx); +} + +ConditionElement* IR2_Condition::get_as_form(FormPool& pool, const Env& env, int my_idx) const { + RegSet consumed; + if (env.has_reg_use()) { + consumed = env.reg_use().op.at(my_idx).consumes; } - return pool.alloc_element(m_kind, sources[0], sources[1]); + std::optional vars[2]; + for (int i = 0; i < get_condition_num_args(m_kind); i++) { + vars[i] = m_src[i]; + } + return pool.alloc_element(m_kind, vars[0], vars[1], consumed); } -FormElement* SetVarOp::get_as_form(FormPool& pool) const { +FormElement* SetVarOp::get_as_form(FormPool& pool, const Env&) const { auto source = pool.alloc_single_element_form(nullptr, m_src, m_my_idx); return pool.alloc_element(m_dst, source, is_sequence_point()); } -FormElement* AsmOp::get_as_form(FormPool& pool) const { +FormElement* AsmOp::get_as_form(FormPool& pool, const Env&) const { return pool.alloc_element(this); } -FormElement* SetVarConditionOp::get_as_form(FormPool& pool) const { +FormElement* SetVarConditionOp::get_as_form(FormPool& pool, const Env& env) const { return pool.alloc_element( - m_dst, pool.alloc_single_form(nullptr, m_condition.get_as_form(pool)), is_sequence_point()); + m_dst, pool.alloc_single_form(nullptr, m_condition.get_as_form(pool, env, m_my_idx)), + is_sequence_point()); } -FormElement* StoreOp::get_as_form(FormPool& pool) const { +FormElement* StoreOp::get_as_form(FormPool& pool, const Env&) const { return pool.alloc_element(this); } -FormElement* LoadVarOp::get_as_form(FormPool& pool) const { +FormElement* LoadVarOp::get_as_form(FormPool& pool, const Env& env) const { + if (env.has_type_analysis()) { + IR2_RegOffset ro; + if (get_as_reg_offset(m_src, &ro)) { + auto& input_type = env.get_types_before_op(m_my_idx).get(ro.reg); + + // todo basic method + // todo structure method + // todo pointer + // todo product trick + // todo type of basic fallback + // todo dynamic method id access + + // Assume we're accessing a field of an object. + FieldReverseLookupInput rd_in; + DerefKind dk; + dk.is_store = false; + dk.reg_kind = get_reg_kind(ro.reg); + dk.sign_extend = m_kind == Kind::SIGNED; + dk.size = m_size; + rd_in.deref = dk; + rd_in.base_type = input_type.typespec(); + rd_in.stride = 0; + rd_in.offset = ro.offset; + auto rd = env.dts->ts.reverse_field_lookup(rd_in); + + // todo, error here? + + if (rd.success) { + auto source = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_var(ro.var).as_expr(), m_my_idx); + std::vector tokens; + for (auto& x : rd.tokens) { + tokens.push_back(to_token(x)); + } + auto load = + pool.alloc_single_element_form(nullptr, source, rd.addr_of, tokens); + return pool.alloc_element(m_dst, load, true); + } + + // todo, try as pair + } + } + auto source = pool.alloc_single_element_form(nullptr, m_src, m_my_idx); auto load = pool.alloc_single_element_form(nullptr, source, m_size, m_kind); return pool.alloc_element(m_dst, load, true); } -FormElement* BranchOp::get_as_form(FormPool& pool) const { +FormElement* BranchOp::get_as_form(FormPool& pool, const Env&) const { return pool.alloc_element(this); } -FormElement* SpecialOp::get_as_form(FormPool& pool) const { +FormElement* SpecialOp::get_as_form(FormPool& pool, const Env&) const { return pool.alloc_element(this); } -FormElement* CallOp::get_as_form(FormPool& pool) const { +FormElement* CallOp::get_as_form(FormPool& pool, const Env& env) const { auto call = pool.alloc_element(this); if (m_write_regs.empty() && m_call_type_set == true) { return call; } else if (m_write_regs.size() == 1 || !m_call_type_set) { + if (env.has_reg_use() && m_write_regs.size() == 1) { + auto& written_and_unused = env.reg_use().op.at(m_my_idx).written_and_unused; + if (written_and_unused.find(m_write_regs.front()) != written_and_unused.end()) { + return call; + } + } + // this is a little scary in the case that type analysis doesn't run and relies on the fact // that CallOp falls back to writing v0 in the case where the function type isn't known. Variable out_var(VariableMode::WRITE, Register(Reg::GPR, Reg::V0), m_my_idx); @@ -63,7 +144,7 @@ FormElement* CallOp::get_as_form(FormPool& pool) const { } } -FormElement* ConditionalMoveFalseOp::get_as_form(FormPool& pool) const { +FormElement* ConditionalMoveFalseOp::get_as_form(FormPool& pool, const Env&) const { auto source = pool.alloc_single_element_form(nullptr, SimpleAtom::make_var(m_src)); return pool.alloc_element(m_dst, source, m_on_zero); diff --git a/decompiler/IR2/AtomicOpTypeAnalysis.cpp b/decompiler/IR2/AtomicOpTypeAnalysis.cpp index bd1dee89e..849cef7a2 100644 --- a/decompiler/IR2/AtomicOpTypeAnalysis.cpp +++ b/decompiler/IR2/AtomicOpTypeAnalysis.cpp @@ -2,6 +2,7 @@ #include "decompiler/ObjectFile/LinkedObjectFile.h" #include "common/log/log.h" #include "AtomicOp.h" +#include "decompiler/util/DecompilerTypeSystem.h" namespace decompiler { @@ -14,25 +15,8 @@ bool is_int_or_uint(const DecompilerTypeSystem& dts, const TP_Type& type) { return tc(dts, TypeSpec("int"), type) || tc(dts, TypeSpec("uint"), type); } -struct IR2_RegOffset { - Register reg; - int offset; -}; - -bool get_as_reg_offset(const SimpleExpression& expr, IR2_RegOffset* out) { - if (expr.kind() == SimpleExpression::Kind::ADD && expr.get_arg(0).is_var() && - expr.get_arg(1).is_int()) { - out->reg = expr.get_arg(0).var().reg(); - out->offset = expr.get_arg(1).get_int(); - return true; - } - - if (expr.is_identity() && expr.get_arg(0).is_var()) { - out->reg = expr.get_arg(0).var().reg(); - out->offset = 0; - return true; - } - return false; +bool is_signed(const DecompilerTypeSystem& dts, const TP_Type& type) { + return tc(dts, TypeSpec("int"), type) && !tc(dts, TypeSpec("uint"), type); } RegClass get_reg_kind(const Register& r) { @@ -222,13 +206,25 @@ TP_Type SimpleExpression::get_type_int2(const TypeState& input, if (m_args[1].is_int() && is_int_or_uint(dts, arg0_type)) { assert(m_args[1].get_int() >= 0); assert(m_args[1].get_int() < 64); - return TP_Type::make_from_product(1ull << m_args[1].get_int()); + return TP_Type::make_from_product(1ull << m_args[1].get_int(), is_signed(dts, arg0_type)); } break; case Kind::MUL_SIGNED: { if (arg0_type.is_integer_constant() && is_int_or_uint(dts, arg1_type)) { - return TP_Type::make_from_product(arg0_type.get_integer_constant()); + return TP_Type::make_from_product(arg0_type.get_integer_constant(), + is_signed(dts, arg0_type)); + } else if (is_int_or_uint(dts, arg0_type) && is_int_or_uint(dts, arg1_type)) { + // signed multiply will always return a signed number. + return TP_Type::make_from_ts("int"); + } + } break; + + case Kind::DIV_SIGNED: + case Kind::MOD_SIGNED: { + if (is_int_or_uint(dts, arg0_type) && is_int_or_uint(dts, arg1_type)) { + // signed division will always return a signed number. + return TP_Type::make_from_ts("int"); } } break; diff --git a/decompiler/IR2/Env.h b/decompiler/IR2/Env.h index d2095ec0a..2a7821bde 100644 --- a/decompiler/IR2/Env.h +++ b/decompiler/IR2/Env.h @@ -11,32 +11,9 @@ namespace decompiler { class LinkedObjectFile; class Form; +class DecompilerTypeSystem; struct FunctionAtomicOps; -struct VariableNames { - struct VarInfo { - VarInfo() = default; - std::string name() const { return fmt::format("{}-{}", reg_id.reg.to_charp(), reg_id.id); } - TP_Type type; - RegId reg_id; - bool initialized = false; - }; - - // todo - this is kind of gross. - std::unordered_map, Register::hash> read_vars, - write_vars; - std::unordered_map, Register::hash> read_opid_to_varid, - write_opid_to_varid; - - const VarInfo& lookup(Register reg, int op_id, VariableMode mode) const { - if (mode == VariableMode::READ) { - return read_vars.at(reg).at(read_opid_to_varid.at(reg).at(op_id)); - } else { - return write_vars.at(reg).at(write_opid_to_varid.at(reg).at(op_id)); - } - } -}; - /*! * An "environment" for a single function. * This contains data for an entire function, like which registers are live when, the types of @@ -98,6 +75,7 @@ class Env { RegId get_ssa_var(const Variable& var) const; LinkedObjectFile* file = nullptr; + DecompilerTypeSystem* dts = nullptr; private: bool m_has_reg_use = false; diff --git a/decompiler/IR2/Form.cpp b/decompiler/IR2/Form.cpp index 1fea898b4..130d548f8 100644 --- a/decompiler/IR2/Form.cpp +++ b/decompiler/IR2/Form.cpp @@ -257,22 +257,20 @@ void AtomicOpElement::collect_vars(VariableSet& vars) const { // ConditionElement ///////////////////////////// -ConditionElement::ConditionElement(IR2_Condition::Kind kind, Form* src0, Form* src1) - : m_kind(kind) { +ConditionElement::ConditionElement(IR2_Condition::Kind kind, + std::optional src0, + std::optional src1, + RegSet consumed) + : m_kind(kind), m_consumed(std::move(consumed)) { m_src[0] = src0; m_src[1] = src1; - for (int i = 0; i < 2; i++) { - if (m_src[i]) { - m_src[i]->parent_element = this; - } - } } goos::Object ConditionElement::to_form(const Env& env) const { std::vector forms; forms.push_back(pretty_print::to_symbol(get_condition_kind_name(m_kind))); for (int i = 0; i < get_condition_num_args(m_kind); i++) { - forms.push_back(m_src[i]->to_form(env)); + forms.push_back(m_src[i]->to_form(env.file->labels, &env)); } if (forms.size() > 1) { return pretty_print::build_list(forms); @@ -283,20 +281,9 @@ goos::Object ConditionElement::to_form(const Env& env) const { void ConditionElement::apply(const std::function& f) { f(this); - for (int i = 0; i < 2; i++) { - if (m_src[i]) { - m_src[i]->apply(f); - } - } } -void ConditionElement::apply_form(const std::function& f) { - for (int i = 0; i < 2; i++) { - if (m_src[i]) { - m_src[i]->apply_form(f); - } - } -} +void ConditionElement::apply_form(const std::function&) {} void ConditionElement::invert() { m_kind = get_condition_opposite(m_kind); @@ -304,8 +291,8 @@ void ConditionElement::invert() { void ConditionElement::collect_vars(VariableSet& vars) const { for (auto src : m_src) { - if (src) { - src->collect_vars(vars); + if (src.has_value() && src->is_var()) { + vars.insert(src->var()); } } } @@ -652,59 +639,52 @@ void CondNoElseElement::collect_vars(VariableSet& vars) const { // AbsElement ///////////////////////////// -AbsElement::AbsElement(Form* _source) : source(_source) { - source->parent_element = this; -} +AbsElement::AbsElement(Variable _source, RegSet _consumed) + : source(_source), consumed(std::move(_consumed)) {} goos::Object AbsElement::to_form(const Env& env) const { - return pretty_print::build_list("abs", source->to_form(env)); + return pretty_print::build_list("abs", source.to_string(&env)); } void AbsElement::apply(const std::function& f) { f(this); - source->apply(f); } -void AbsElement::apply_form(const std::function& f) { - source->apply_form(f); -} +void AbsElement::apply_form(const std::function&) {} void AbsElement::collect_vars(VariableSet& vars) const { - source->collect_vars(vars); + vars.insert(source); } ///////////////////////////// // AshElement ///////////////////////////// -AshElement::AshElement(Form* _shift_amount, - Form* _value, +AshElement::AshElement(Variable _shift_amount, + Variable _value, std::optional _clobber, - bool _is_signed) - : shift_amount(_shift_amount), value(_value), clobber(_clobber), is_signed(_is_signed) { - _shift_amount->parent_element = this; - _value->parent_element = this; -} + bool _is_signed, + RegSet _consumed) + : shift_amount(_shift_amount), + value(_value), + clobber(_clobber), + is_signed(_is_signed), + consumed(_consumed) {} goos::Object AshElement::to_form(const Env& env) const { return pretty_print::build_list(pretty_print::to_symbol(is_signed ? "ash.si" : "ash.ui"), - value->to_form(env), shift_amount->to_form(env)); + value.to_string(&env), shift_amount.to_string(&env)); } void AshElement::apply(const std::function& f) { f(this); - shift_amount->apply(f); - value->apply(f); } -void AshElement::apply_form(const std::function& f) { - shift_amount->apply_form(f); - value->apply_form(f); -} +void AshElement::apply_form(const std::function&) {} void AshElement::collect_vars(VariableSet& vars) const { - shift_amount->collect_vars(vars); - value->collect_vars(vars); + vars.insert(value); + vars.insert(shift_amount); } ///////////////////////////// @@ -774,36 +754,66 @@ GenericOperator GenericOperator::make_fixed(FixedOperatorKind kind) { return op; } -void GenericOperator::collect_vars(VariableSet&) const { +GenericOperator GenericOperator::make_function(Form* value) { + GenericOperator op; + op.m_kind = Kind::FUNCTION_EXPR; + op.m_function = value; + return op; +} + +GenericOperator GenericOperator::make_compare(IR2_Condition::Kind kind) { + GenericOperator op; + op.m_kind = Kind::CONDITION_OPERATOR; + op.m_condition_kind = kind; + return op; +} + +void GenericOperator::collect_vars(VariableSet& vars) const { switch (m_kind) { case Kind::FIXED_OPERATOR: + case Kind::CONDITION_OPERATOR: + return; + case Kind::FUNCTION_EXPR: + m_function->collect_vars(vars); return; default: assert(false); } } -goos::Object GenericOperator::to_form(const Env&) const { +goos::Object GenericOperator::to_form(const Env& env) const { switch (m_kind) { case Kind::FIXED_OPERATOR: return pretty_print::to_symbol(fixed_operator_to_string(m_fixed_kind)); + case Kind::CONDITION_OPERATOR: + return pretty_print::to_symbol(get_condition_kind_name(m_condition_kind)); + case Kind::FUNCTION_EXPR: + return m_function->to_form(env); default: assert(false); } } -void GenericOperator::apply(const std::function&) { +void GenericOperator::apply(const std::function& f) { switch (m_kind) { case Kind::FIXED_OPERATOR: + case Kind::CONDITION_OPERATOR: + break; + case Kind::FUNCTION_EXPR: + m_function->apply(f); break; default: assert(false); } } -void GenericOperator::apply_form(const std::function&) { +void GenericOperator::apply_form(const std::function& f) { switch (m_kind) { case Kind::FIXED_OPERATOR: + case Kind::CONDITION_OPERATOR: + break; + case Kind::FUNCTION_EXPR: + m_function->apply_form(f); break; default: assert(false); @@ -818,6 +828,32 @@ std::string fixed_operator_to_string(FixedOperatorKind kind) { return "/"; case FixedOperatorKind::ADDITION: return "+"; + case FixedOperatorKind::SUBTRACTION: + return "-"; + case FixedOperatorKind::MULTIPLICATION: + return "*"; + case FixedOperatorKind::ARITH_SHIFT: + return "ash"; + case FixedOperatorKind::MOD: + return "mod"; + case FixedOperatorKind::ABS: + return "abs"; + case FixedOperatorKind::MIN: + return "min"; + case FixedOperatorKind::MAX: + return "max"; + case FixedOperatorKind::LOGAND: + return "logand"; + case FixedOperatorKind::LOGIOR: + return "logior"; + case FixedOperatorKind::LOGXOR: + return "logxor"; + case FixedOperatorKind::LOGNOR: + return "lognor"; + case FixedOperatorKind::LOGNOT: + return "lognot"; + case FixedOperatorKind::SLL: + return "sll"; default: assert(false); } @@ -883,4 +919,119 @@ void CastElement::apply_form(const std::function& f) { void CastElement::collect_vars(VariableSet& vars) const { m_source->collect_vars(vars); } + +///////////////////////////// +// DerefElement +///////////////////////////// + +DerefToken DerefToken::make_int_constant(s64 int_constant) { + DerefToken x; + x.m_kind = Kind::INTEGER_CONSTANT; + x.m_int_constant = int_constant; + return x; +} + +DerefToken DerefToken::make_int_expr(Form* expr) { + DerefToken x; + x.m_kind = Kind::INTEGER_EXPRESSION; + x.m_expr = expr; + return x; +} + +DerefToken DerefToken::make_field_name(const std::string& name) { + DerefToken x; + x.m_kind = Kind::FIELD_NAME; + x.m_name = name; + return x; +} + +void DerefToken::collect_vars(VariableSet& vars) const { + switch (m_kind) { + case Kind::INTEGER_CONSTANT: + case Kind::FIELD_NAME: + break; + case Kind::INTEGER_EXPRESSION: + m_expr->collect_vars(vars); + break; + default: + assert(false); + } +} + +goos::Object DerefToken::to_form(const Env& env) const { + switch (m_kind) { + case Kind::INTEGER_CONSTANT: + return pretty_print::to_symbol(fmt::format("{}", m_int_constant)); + case Kind::INTEGER_EXPRESSION: + return m_expr->to_form(env); + case Kind::FIELD_NAME: + return pretty_print::to_symbol(m_name); + default: + assert(false); + } +} + +void DerefToken::apply(const std::function& f) { + switch (m_kind) { + case Kind::INTEGER_CONSTANT: + case Kind::FIELD_NAME: + break; + case Kind::INTEGER_EXPRESSION: + m_expr->apply(f); + break; + default: + assert(false); + } +} + +void DerefToken::apply_form(const std::function& f) { + switch (m_kind) { + case Kind::INTEGER_CONSTANT: + case Kind::FIELD_NAME: + break; + case Kind::INTEGER_EXPRESSION: + m_expr->apply_form(f); + break; + default: + assert(false); + } +} + +DerefElement::DerefElement(Form* base, bool is_addr_of, DerefToken token) + : m_base(base), m_is_addr_of(is_addr_of), m_tokens({std::move(token)}) {} + +DerefElement::DerefElement(Form* base, bool is_addr_of, std::vector tokens) + : m_base(base), m_is_addr_of(is_addr_of), m_tokens(std::move(tokens)) {} + +goos::Object DerefElement::to_form(const Env& env) const { + std::vector forms = {pretty_print::to_symbol(m_is_addr_of ? "&->" : "->"), + m_base->to_form(env)}; + for (auto& tok : m_tokens) { + forms.push_back(tok.to_form(env)); + } + return pretty_print::build_list(forms); +} + +void DerefElement::apply(const std::function& f) { + f(this); + m_base->apply(f); + for (auto& tok : m_tokens) { + tok.apply(f); + } +} + +void DerefElement::apply_form(const std::function& f) { + m_base->apply_form(f); + for (auto& tok : m_tokens) { + tok.apply_form(f); + } +} + +void DerefElement::collect_vars(VariableSet& vars) const { + m_base->collect_vars(vars); + for (auto& tok : m_tokens) { + tok.collect_vars(vars); + } +} + } // namespace decompiler diff --git a/decompiler/IR2/Form.h b/decompiler/IR2/Form.h index 3f6b58a29..d01c2e06a 100644 --- a/decompiler/IR2/Form.h +++ b/decompiler/IR2/Form.h @@ -79,6 +79,30 @@ class SimpleExpressionElement : public FormElement { FormPool& pool, FormStack& stack, std::vector* result); + void update_from_stack_mult_si(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result); + void update_from_stack_lognot(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result); + + void update_from_stack_force_si_2(const Env& env, + FixedOperatorKind kind, + FormPool& pool, + FormStack& stack, + std::vector* result); + void update_from_stack_force_ui_2(const Env& env, + FixedOperatorKind kind, + FormPool& pool, + FormStack& stack, + std::vector* result); + void update_from_stack_copy_first_int_2(const Env& env, + FixedOperatorKind kind, + FormPool& pool, + FormStack& stack, + std::vector* result); const SimpleExpression& expr() const { return m_expr; } @@ -121,10 +145,10 @@ class LoadSourceElement : public FormElement { int size() const { return m_size; } LoadVarOp::Kind kind() const { return m_kind; } const Form* location() const { return m_addr; } - virtual void update_from_stack(const Env& env, - FormPool& pool, - FormStack& stack, - std::vector* result); + void update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) override; private: Form* m_addr = nullptr; @@ -197,16 +221,22 @@ class AtomicOpElement : public FormElement { */ class ConditionElement : public FormElement { public: - ConditionElement(IR2_Condition::Kind kind, Form* src0, Form* src1); + ConditionElement(IR2_Condition::Kind kind, + std::optional src0, + std::optional src1, + RegSet consumed); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; void invert(); + const RegSet& consume() const { return m_consumed; } private: IR2_Condition::Kind m_kind; - Form* m_src[2] = {nullptr, nullptr}; + std::optional m_src[2]; + RegSet m_consumed; }; /*! @@ -219,6 +249,11 @@ class FunctionCallElement : public FormElement { void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) override; + void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; private: const CallOp* m_op; @@ -255,6 +290,7 @@ class ReturnElement : public FormElement { void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; }; /*! @@ -368,6 +404,7 @@ class UntilElement : public FormElement { void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; Form* condition = nullptr; Form* body = nullptr; }; @@ -423,6 +460,7 @@ class CondNoElseElement : public FormElement { void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; }; /*! @@ -430,12 +468,17 @@ class CondNoElseElement : public FormElement { */ class AbsElement : public FormElement { public: - explicit AbsElement(Form* _source); + explicit AbsElement(Variable _source, RegSet _consumed); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; - Form* source = nullptr; + void update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) override; + Variable source; + RegSet consumed; }; /*! @@ -445,15 +488,23 @@ class AbsElement : public FormElement { */ class AshElement : public FormElement { public: - Form* shift_amount = nullptr; - Form* value = nullptr; + Variable shift_amount, value; std::optional clobber; bool is_signed = true; - AshElement(Form* _shift_amount, Form* _value, std::optional _clobber, bool _is_signed); + RegSet consumed; + AshElement(Variable _shift_amount, + Variable _value, + std::optional _clobber, + bool _is_signed, + RegSet _consumed); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) override; }; /*! @@ -509,9 +560,11 @@ std::string fixed_operator_to_string(FixedOperatorKind kind); */ class GenericOperator { public: - enum class Kind { FIXED_OPERATOR, INVALID }; + enum class Kind { FIXED_OPERATOR, CONDITION_OPERATOR, FUNCTION_EXPR, INVALID }; static GenericOperator make_fixed(FixedOperatorKind kind); + static GenericOperator make_function(Form* value); + static GenericOperator make_compare(IR2_Condition::Kind kind); void collect_vars(VariableSet& vars) const; goos::Object to_form(const Env& env) const; void apply(const std::function& f); @@ -519,7 +572,9 @@ class GenericOperator { private: Kind m_kind = Kind::INVALID; + IR2_Condition::Kind m_condition_kind = IR2_Condition::Kind::INVALID; FixedOperatorKind m_fixed_kind = FixedOperatorKind::INVALID; + Form* m_function = nullptr; }; class GenericElement : public FormElement { @@ -552,6 +607,44 @@ class CastElement : public FormElement { Form* m_source = nullptr; }; +class DerefToken { + public: + enum class Kind { INTEGER_CONSTANT, INTEGER_EXPRESSION, FIELD_NAME, INVALID }; + static DerefToken make_int_constant(s64 int_constant); + static DerefToken make_int_expr(Form* expr); + static DerefToken make_field_name(const std::string& name); + + void collect_vars(VariableSet& vars) const; + goos::Object to_form(const Env& env) const; + void apply(const std::function& f); + void apply_form(const std::function& f); + + private: + Kind m_kind = Kind::INVALID; + s64 m_int_constant = -1; + std::string m_name; + Form* m_expr = nullptr; +}; + +class DerefElement : public FormElement { + public: + DerefElement(Form* base, bool is_addr_of, DerefToken token); + DerefElement(Form* base, bool is_addr_of, std::vector tokens); + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + void collect_vars(VariableSet& vars) const override; + void update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) override; + + private: + Form* m_base = nullptr; + bool m_is_addr_of = false; + std::vector m_tokens; +}; + /*! * A Form is a wrapper around one or more FormElements. * This is done for two reasons: diff --git a/decompiler/IR2/FormExpressionAnalysis.cpp b/decompiler/IR2/FormExpressionAnalysis.cpp index ec72dc6c8..e9596e7c6 100644 --- a/decompiler/IR2/FormExpressionAnalysis.cpp +++ b/decompiler/IR2/FormExpressionAnalysis.cpp @@ -5,6 +5,27 @@ namespace decompiler { namespace { +void update_var_from_stack_helper(int my_idx, + Variable input, + FormPool& pool, + FormStack& stack, + const RegSet& consumes, + std::vector* result) { + if (consumes.find(input.reg()) != consumes.end()) { + // is consumed. + auto stack_val = stack.pop_reg(input); + if (stack_val) { + for (auto x : stack_val->elts()) { + result->push_back(x); + } + return; + } + } + auto elt = + pool.alloc_element(SimpleAtom::make_var(input).as_expr(), my_idx); + result->push_back(elt); +} + void update_var_from_stack_helper(int my_idx, Variable input, const Env& env, @@ -37,6 +58,16 @@ Form* update_var_from_stack_to_form(int my_idx, return pool.alloc_sequence_form(nullptr, elts); } +Form* update_var_from_stack_to_form(int my_idx, + Variable input, + const RegSet& consumes, + FormPool& pool, + FormStack& stack) { + std::vector elts; + update_var_from_stack_helper(my_idx, input, pool, stack, consumes, &elts); + return pool.alloc_sequence_form(nullptr, elts); +} + bool is_float_type(const Env& env, int my_idx, Variable var) { auto type = env.get_types_before_op(my_idx).get(var.reg()).typespec(); return type == TypeSpec("float"); @@ -86,9 +117,11 @@ void SimpleExpressionElement::update_from_stack_identity(const Env& env, } else if (arg.is_static_addr()) { // for now, do nothing. result->push_back(this); + } else if (arg.is_sym_ptr() || arg.is_sym_val()) { + result->push_back(this); } else { - throw std::runtime_error( - fmt::format("SimpleExpressionElement::update_from_stack NYI for {}", to_string(env))); + throw std::runtime_error(fmt::format( + "SimpleExpressionElement::update_from_stack_identity NYI for {}", to_string(env))); } } @@ -147,11 +180,23 @@ void SimpleExpressionElement::update_from_stack_add_i(const Env& env, std::vector* result) { auto arg0_i = is_int_type(env, m_my_idx, m_expr.get_arg(0).var()); auto arg0_u = is_uint_type(env, m_my_idx, m_expr.get_arg(0).var()); - auto arg1_i = is_int_type(env, m_my_idx, m_expr.get_arg(1).var()); - auto arg1_u = is_uint_type(env, m_my_idx, m_expr.get_arg(1).var()); + + bool arg1_reg = m_expr.get_arg(1).is_var(); + bool arg1_i = true; + bool arg1_u = true; + if (arg1_reg) { + arg1_i = is_int_type(env, m_my_idx, m_expr.get_arg(1).var()); + arg1_u = is_uint_type(env, m_my_idx, m_expr.get_arg(1).var()); + } auto arg0 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(0).var(), env, pool, stack); - auto arg1 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(1).var(), env, pool, stack); + Form* arg1; + + if (arg1_reg) { + arg1 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(1).var(), env, pool, stack); + } else { + arg1 = pool.alloc_single_element_form(nullptr, m_expr.get_arg(1)); + } if ((arg0_i && arg1_i) || (arg0_u && arg1_u)) { auto new_form = pool.alloc_element( @@ -166,6 +211,123 @@ void SimpleExpressionElement::update_from_stack_add_i(const Env& env, } } +void SimpleExpressionElement::update_from_stack_mult_si(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) { + auto arg0_i = is_int_type(env, m_my_idx, m_expr.get_arg(0).var()); + auto arg1_i = is_int_type(env, m_my_idx, m_expr.get_arg(1).var()); + + auto arg0 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(0).var(), env, pool, stack); + auto arg1 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(1).var(), env, pool, stack); + + if (!arg0_i) { + arg0 = pool.alloc_single_element_form(nullptr, TypeSpec("int"), arg0); + } + + if (!arg1_i) { + arg1 = pool.alloc_single_element_form(nullptr, TypeSpec("int"), arg1); + } + + auto new_form = pool.alloc_element( + GenericOperator::make_fixed(FixedOperatorKind::MULTIPLICATION), arg0, arg1); + result->push_back(new_form); +} + +void SimpleExpressionElement::update_from_stack_force_si_2(const Env& env, + FixedOperatorKind kind, + FormPool& pool, + FormStack& stack, + std::vector* result) { + auto arg0_i = is_int_type(env, m_my_idx, m_expr.get_arg(0).var()); + auto arg1_i = is_int_type(env, m_my_idx, m_expr.get_arg(1).var()); + + auto arg0 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(0).var(), env, pool, stack); + auto arg1 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(1).var(), env, pool, stack); + + if (!arg0_i) { + arg0 = pool.alloc_single_element_form(nullptr, TypeSpec("int"), arg0); + } + + if (!arg1_i) { + arg1 = pool.alloc_single_element_form(nullptr, TypeSpec("int"), arg1); + } + + auto new_form = pool.alloc_element(GenericOperator::make_fixed(kind), arg0, arg1); + result->push_back(new_form); +} + +void SimpleExpressionElement::update_from_stack_force_ui_2(const Env& env, + FixedOperatorKind kind, + FormPool& pool, + FormStack& stack, + std::vector* result) { + auto arg0_u = is_uint_type(env, m_my_idx, m_expr.get_arg(0).var()); + bool arg1_u = true; + bool arg1_reg = m_expr.get_arg(1).is_var(); + if (arg1_reg) { + arg1_u = is_uint_type(env, m_my_idx, m_expr.get_arg(1).var()); + } else { + assert(m_expr.get_arg(1).is_int()); + } + + Form* arg0 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(0).var(), env, pool, stack); + Form* arg1; + if (arg1_reg) { + arg1 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(1).var(), env, pool, stack); + } else { + arg1 = pool.alloc_single_element_form(nullptr, m_expr.get_arg(1)); + } + + if (!arg0_u) { + arg0 = pool.alloc_single_element_form(nullptr, TypeSpec("uint"), arg0); + } + + if (!arg1_u) { + arg1 = pool.alloc_single_element_form(nullptr, TypeSpec("uint"), arg1); + } + + auto new_form = pool.alloc_element(GenericOperator::make_fixed(kind), arg0, arg1); + result->push_back(new_form); +} + +void SimpleExpressionElement::update_from_stack_copy_first_int_2( + const Env& env, + FixedOperatorKind kind, + FormPool& pool, + FormStack& stack, + std::vector* result) { + auto arg0_i = is_int_type(env, m_my_idx, m_expr.get_arg(0).var()); + auto arg0_u = is_uint_type(env, m_my_idx, m_expr.get_arg(0).var()); + auto arg1_i = is_int_type(env, m_my_idx, m_expr.get_arg(1).var()); + auto arg1_u = is_uint_type(env, m_my_idx, m_expr.get_arg(1).var()); + + auto arg1 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(1).var(), env, pool, stack); + auto arg0 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(0).var(), env, pool, stack); + + if ((arg0_i && arg1_i) || (arg0_u && arg1_u)) { + auto new_form = + pool.alloc_element(GenericOperator::make_fixed(kind), arg0, arg1); + result->push_back(new_form); + } else { + auto cast = pool.alloc_single_element_form( + nullptr, TypeSpec(arg0_i ? "int" : "uint"), arg1); + auto new_form = + pool.alloc_element(GenericOperator::make_fixed(kind), arg0, cast); + result->push_back(new_form); + } +} + +void SimpleExpressionElement::update_from_stack_lognot(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) { + auto arg0 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(0).var(), env, pool, stack); + auto new_form = pool.alloc_element( + GenericOperator::make_fixed(FixedOperatorKind::LOGNOT), arg0); + result->push_back(new_form); +} + void SimpleExpressionElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, @@ -186,6 +348,42 @@ void SimpleExpressionElement::update_from_stack(const Env& env, case SimpleExpression::Kind::ADD: update_from_stack_add_i(env, pool, stack, result); break; + case SimpleExpression::Kind::SUB: + update_from_stack_copy_first_int_2(env, FixedOperatorKind::SUBTRACTION, pool, stack, result); + break; + case SimpleExpression::Kind::MUL_SIGNED: + update_from_stack_mult_si(env, pool, stack, result); + break; + case SimpleExpression::Kind::DIV_SIGNED: + update_from_stack_force_si_2(env, FixedOperatorKind::DIVISION, pool, stack, result); + break; + case SimpleExpression::Kind::MOD_SIGNED: + update_from_stack_force_si_2(env, FixedOperatorKind::MOD, pool, stack, result); + break; + case SimpleExpression::Kind::MIN_SIGNED: + update_from_stack_force_si_2(env, FixedOperatorKind::MIN, pool, stack, result); + break; + case SimpleExpression::Kind::MAX_SIGNED: + update_from_stack_force_si_2(env, FixedOperatorKind::MAX, pool, stack, result); + break; + case SimpleExpression::Kind::AND: + update_from_stack_copy_first_int_2(env, FixedOperatorKind::LOGAND, pool, stack, result); + break; + case SimpleExpression::Kind::OR: + update_from_stack_copy_first_int_2(env, FixedOperatorKind::LOGIOR, pool, stack, result); + break; + case SimpleExpression::Kind::NOR: + update_from_stack_copy_first_int_2(env, FixedOperatorKind::LOGNOR, pool, stack, result); + break; + case SimpleExpression::Kind::XOR: + update_from_stack_copy_first_int_2(env, FixedOperatorKind::LOGXOR, pool, stack, result); + break; + case SimpleExpression::Kind::LOGNOT: + update_from_stack_lognot(env, pool, stack, result); + break; + case SimpleExpression::Kind::LEFT_SHIFT: + update_from_stack_force_ui_2(env, FixedOperatorKind::SLL, pool, stack, result); + break; default: throw std::runtime_error( fmt::format("SimpleExpressionElement::update_from_stack NYI for {}", to_string(env))); @@ -211,4 +409,145 @@ void SetVarElement::push_to_stack(const Env& env, FormPool& pool, FormStack& sta stack.push_value_to_reg(m_dst, m_src, true); } + +/////////////////// +// AshElement +/////////////////// + +void AshElement::update_from_stack(const Env&, + FormPool& pool, + FormStack& stack, + std::vector* result) { + auto val_form = update_var_from_stack_to_form(value.idx(), value, consumed, pool, stack); + auto sa_form = + update_var_from_stack_to_form(shift_amount.idx(), shift_amount, consumed, pool, stack); + auto new_form = pool.alloc_element( + GenericOperator::make_fixed(FixedOperatorKind::ARITH_SHIFT), val_form, sa_form); + result->push_back(new_form); +} + +/////////////////// +// AbsElement +/////////////////// + +void AbsElement::update_from_stack(const Env&, + FormPool& pool, + FormStack& stack, + std::vector* result) { + auto source_form = update_var_from_stack_to_form(source.idx(), source, consumed, pool, stack); + auto new_form = pool.alloc_element( + GenericOperator::make_fixed(FixedOperatorKind::ABS), source_form); + result->push_back(new_form); +} + +/////////////////// +// FunctionCallElement +/////////////////// + +void FunctionCallElement::update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) { + std::vector args; + auto nargs = m_op->arg_vars().size(); + args.resize(nargs, nullptr); + + for (size_t i = nargs; i-- > 0;) { + auto var = m_op->arg_vars().at(i); + args.at(i) = update_var_from_stack_to_form(m_op->op_id(), var, env, pool, stack); + } + Form* func = update_var_from_stack_to_form(m_op->op_id(), m_op->function_var(), env, pool, stack); + auto new_form = pool.alloc_element(GenericOperator::make_function(func), args); + result->push_back(new_form); +} + +void FunctionCallElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { + std::vector rewritten; + update_from_stack(env, pool, stack, &rewritten); + for (auto x : rewritten) { + stack.push_form_element(x, true); + } +} + +/////////////////// +// DerefElement +/////////////////// +void DerefElement::update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) { + // todo - update var tokens from stack? + m_base->update_children_from_stack(env, pool, stack); + result->push_back(this); +} + +/////////////////// +// UntilElement +/////////////////// + +void UntilElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { + for (auto form : {condition, body}) { + FormStack temp_stack; + for (auto& entry : form->elts()) { + entry->push_to_stack(env, pool, temp_stack); + } + auto new_entries = temp_stack.rewrite(pool); + form->clear(); + for (auto e : new_entries) { + form->push_back(e); + } + } + + stack.push_form_element(this, true); +} + +/////////////////// +// CondNoElseElement +/////////////////// +void CondNoElseElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { + for (auto& entry : entries) { + for (auto form : {entry.condition, entry.body}) { + FormStack temp_stack; + for (auto& elt : form->elts()) { + elt->push_to_stack(env, pool, temp_stack); + } + + std::vector new_entries; + if (form == entry.body) { + new_entries = temp_stack.rewrite(pool); + } else { + new_entries = temp_stack.rewrite(pool); + } + + form->clear(); + for (auto e : new_entries) { + form->push_back(e); + } + } + } + + stack.push_form_element(this, true); +} + +/////////////////// +// ConditionElement +/////////////////// + +void ConditionElement::push_to_stack(const Env&, FormPool& pool, FormStack& stack) { + std::vector source_forms; + + for (int i = 0; i < get_condition_num_args(m_kind); i++) { + source_forms.push_back(update_var_from_stack_to_form(m_src[i]->var().idx(), m_src[i]->var(), + m_consumed, pool, stack)); + } + + stack.push_form_element( + pool.alloc_element(GenericOperator::make_compare(m_kind), source_forms), + true); +} + +void ReturnElement::push_to_stack(const Env&, FormPool&, FormStack& stack) { + stack.push_form_element(this, true); +} + } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/FormStack.cpp b/decompiler/IR2/FormStack.cpp index 09cc5185f..ab351ef11 100644 --- a/decompiler/IR2/FormStack.cpp +++ b/decompiler/IR2/FormStack.cpp @@ -89,7 +89,9 @@ std::vector FormStack::rewrite(FormPool& pool) { return result; } -std::vector FormStack::rewrite_to_get_reg(FormPool& pool, Register reg) { +std::vector FormStack::rewrite_to_get_reg(FormPool& pool, + Register reg, + const Env& env) { // first, rewrite as normal. auto default_result = rewrite(pool); @@ -105,7 +107,8 @@ std::vector FormStack::rewrite_to_get_reg(FormPool& pool, Register } return default_result; } else { - throw std::runtime_error(fmt::format("Couldn't rewrite form to get result")); + throw std::runtime_error( + fmt::format("Couldn't rewrite form to get result {}:\n{}\n\n", reg.to_charp(), print(env))); } } } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/FormStack.h b/decompiler/IR2/FormStack.h index 8ae699818..2f2afee62 100644 --- a/decompiler/IR2/FormStack.h +++ b/decompiler/IR2/FormStack.h @@ -18,7 +18,7 @@ class FormStack { Form* pop_reg(const Variable& var); bool is_single_expression(); std::vector rewrite(FormPool& pool); - std::vector rewrite_to_get_reg(FormPool& pool, Register reg); + std::vector rewrite_to_get_reg(FormPool& pool, Register reg, const Env& env); std::string print(const Env& env); private: diff --git a/decompiler/IR2/IR2_common.h b/decompiler/IR2/IR2_common.h index 06848459f..a7d975ee0 100644 --- a/decompiler/IR2/IR2_common.h +++ b/decompiler/IR2/IR2_common.h @@ -2,6 +2,8 @@ #include #include "common/common_types.h" #include "decompiler/Disasm/Register.h" +#include "decompiler/util/TP_Type.h" +#include "third-party/fmt/core.h" namespace decompiler { enum class VariableMode : u8 { @@ -86,5 +88,47 @@ class Variable { using VariableSet = std::unordered_set; -enum class FixedOperatorKind { GPR_TO_FPR, DIVISION, ADDITION, INVALID }; +enum class FixedOperatorKind { + GPR_TO_FPR, + DIVISION, + ADDITION, + SUBTRACTION, + MULTIPLICATION, + ARITH_SHIFT, + MOD, + ABS, + MIN, + MAX, + LOGAND, + LOGIOR, + LOGXOR, + LOGNOR, + LOGNOT, + SLL, + INVALID +}; + +struct VariableNames { + struct VarInfo { + VarInfo() = default; + std::string name() const { return fmt::format("{}-{}", reg_id.reg.to_charp(), reg_id.id); } + TP_Type type; + RegId reg_id; + bool initialized = false; + }; + + // todo - this is kind of gross. + std::unordered_map, Register::hash> read_vars, + write_vars; + std::unordered_map, Register::hash> read_opid_to_varid, + write_opid_to_varid; + + const VarInfo& lookup(Register reg, int op_id, VariableMode mode) const { + if (mode == VariableMode::READ) { + return read_vars.at(reg).at(read_opid_to_varid.at(reg).at(op_id)); + } else { + return write_vars.at(reg).at(write_opid_to_varid.at(reg).at(op_id)); + } + } +}; } // namespace decompiler diff --git a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp index 9dc8c11fa..8158b3d6c 100644 --- a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp +++ b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp @@ -13,6 +13,7 @@ #include "decompiler/analysis/cfg_builder.h" #include "decompiler/analysis/expression_build.h" #include "common/goos/PrettyPrinter.h" +#include "decompiler/IR2/Form.h" namespace decompiler { @@ -37,10 +38,12 @@ void ObjectFileDB::analyze_functions_ir2(const std::string& output_dir) { ir2_variable_pass(); lg::info("Initial structuring.."); ir2_cfg_build_pass(); - lg::info("Storing temporary form result..."); - ir2_store_current_forms(); - lg::info("Expression building..."); - ir2_build_expressions(); + if (get_config().analyze_expressions) { + lg::info("Storing temporary form result..."); + ir2_store_current_forms(); + lg::info("Expression building..."); + ir2_build_expressions(); + } lg::info("Writing results..."); ir2_write_results(output_dir); } @@ -159,6 +162,7 @@ void ObjectFileDB::ir2_basic_block_pass() { for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) { total_functions++; func.ir2.env.file = &data.linked_data; + func.ir2.env.dts = &dts; // first, find basic blocks. auto blocks = find_blocks_in_function(data.linked_data, segment_id, func); @@ -384,7 +388,7 @@ void ObjectFileDB::ir2_build_expressions() { total++; if (func.ir2.top_form) { attempted++; - if (convert_to_expressions(func.ir2.top_form, func.ir2.form_pool, func)) { + if (convert_to_expressions(func.ir2.top_form, *func.ir2.form_pool, func, dts)) { successful++; func.ir2.print_debug_forms = true; } diff --git a/decompiler/analysis/atomic_op_builder.cpp b/decompiler/analysis/atomic_op_builder.cpp index 97674d26a..daa2da40e 100644 --- a/decompiler/analysis/atomic_op_builder.cpp +++ b/decompiler/analysis/atomic_op_builder.cpp @@ -1,11 +1,10 @@ -#include "atomic_op_builder.h" - #include #include "common/log/log.h" #include "common/symbols.h" -#include "decompiler/Function/BasicBlocks.h" +#include "atomic_op_builder.h" #include "decompiler/Function/Function.h" #include "decompiler/Disasm/InstructionMatching.h" +#include "decompiler/util/TP_Type.h" namespace decompiler { diff --git a/decompiler/analysis/cfg_builder.cpp b/decompiler/analysis/cfg_builder.cpp index 5aa25db37..3fac615ce 100644 --- a/decompiler/analysis/cfg_builder.cpp +++ b/decompiler/analysis/cfg_builder.cpp @@ -5,6 +5,8 @@ #include "cfg_builder.h" #include "decompiler/util/MatchParam.h" +#include "decompiler/Function/Function.h" +#include "decompiler/IR2/Form.h" namespace decompiler { namespace { @@ -60,7 +62,7 @@ std::pair get_condition_branch(Form* in) { * compare IR instead of a branch. * Doesn't "rebalance" the leading condition because this runs way before expression compaction. */ -void clean_up_cond_with_else(FormPool& pool, FormElement* ir) { +void clean_up_cond_with_else(FormPool& pool, FormElement* ir, const Env& env) { auto cwe = dynamic_cast(ir); assert(cwe); for (auto& e : cwe->entries) { @@ -72,7 +74,7 @@ void clean_up_cond_with_else(FormPool& pool, FormElement* ir) { assert(jump_to_next.first); assert(jump_to_next.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP); // patch the branch to next with a condition. - auto replacement = jump_to_next.first->op()->get_condition_as_form(pool); + auto replacement = jump_to_next.first->op()->get_condition_as_form(pool, env); replacement->invert(); *(jump_to_next.second) = replacement; @@ -102,11 +104,11 @@ void clean_up_cond_with_else(FormPool& pool, FormElement* ir) { /*! * Replace the branch at the end of an until loop's condition with a condition. */ -void clean_up_until_loop(FormPool& pool, UntilElement* ir) { +void clean_up_until_loop(FormPool& pool, UntilElement* ir, const Env& env) { auto condition_branch = get_condition_branch(ir->condition); assert(condition_branch.first); assert(condition_branch.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP); - auto replacement = condition_branch.first->op()->get_condition_as_form(pool); + auto replacement = condition_branch.first->op()->get_condition_as_form(pool, env); replacement->invert(); *(condition_branch.second) = replacement; } @@ -261,7 +263,7 @@ bool try_clean_up_sc_as_and(FormPool& pool, const Function& func, ShortCircuitEl } } - auto replacement = branch.first->op()->get_condition_as_form(pool); + auto replacement = branch.first->op()->get_condition_as_form(pool, func.ir2.env); replacement->invert(); *(branch.second) = replacement; } @@ -317,7 +319,7 @@ bool try_clean_up_sc_as_or(FormPool& pool, const Function& func, ShortCircuitEle } } - auto replacement = branch.first->op()->get_condition_as_form(pool); + auto replacement = branch.first->op()->get_condition_as_form(pool, func.ir2.env); *(branch.second) = replacement; } @@ -442,7 +444,7 @@ void convert_cond_no_else_to_compare(FormPool& pool, auto condition_as_single = dynamic_cast(cne->entries.front().condition->try_as_single_element()); - auto condition_replacement = condition.first->op()->get_condition_as_form(pool); + auto condition_replacement = condition.first->op()->get_condition_as_form(pool, f.ir2.env); auto crf = pool.alloc_single_form(nullptr, condition_replacement); auto replacement = pool.alloc_element(dst, crf, true); replacement->parent_form = cne->parent_form; @@ -552,7 +554,7 @@ void clean_up_cond_no_else(FormPool& pool, e.original_condition_branch = *jump_to_next.second; - auto replacement = jump_to_next.first->op()->get_condition_as_form(pool); + auto replacement = jump_to_next.first->op()->get_condition_as_form(pool, f.ir2.env); replacement->invert(); *(jump_to_next.second) = replacement; e.cleaned = true; @@ -709,8 +711,15 @@ Form* try_sc_as_abs(FormPool& pool, const Function& f, const ShortCircuit* vtx) // remove the branch b0_ptr->pop_back(); // add the ash - auto src_var = pool.alloc_single_element_form(nullptr, input); - auto src_abs = pool.alloc_single_element_form(nullptr, src_var); + auto& info = f.ir2.env.reg_use(); + auto final_op_idx = input.var().idx(); + RegSet consumed = info.op.at(final_op_idx).consumes; + + if (output.reg() == input.var().reg()) { + consumed.insert(output.reg()); + } + + auto src_abs = pool.alloc_single_element_form(nullptr, input.var(), consumed); auto replacement = pool.alloc_element(output, src_abs, true); b0_ptr->push_back(replacement); @@ -799,11 +808,21 @@ Form* try_sc_as_ash(FormPool& pool, const Function& f, const ShortCircuit* vtx) // remove the branch b0_ptr->pop_back(); + auto& info = f.ir2.env.reg_use(); + auto final_op_idx = value_ir.var().idx(); + RegSet consumed = info.op.at(final_op_idx).consumes; + for (auto var : {shift_ir.var(), value_ir.var()}) { + if (var.reg() == clobber) { + consumed.insert(var.reg()); + } + if (var.reg() == dest_ir.reg()) { + consumed.insert(var.reg()); + } + } + // setup - auto value_form = pool.alloc_single_element_form(nullptr, value_ir); - auto shift_form = pool.alloc_single_element_form(nullptr, shift_ir); - auto ash_form = pool.alloc_single_element_form(nullptr, shift_form, value_form, - clobber_ir, is_arith); + auto ash_form = pool.alloc_single_element_form( + nullptr, shift_ir.var(), value_ir.var(), clobber_ir, is_arith, consumed); auto set_form = pool.alloc_element(dest_ir, ash_form, true); b0_ptr->push_back(set_form); @@ -993,7 +1012,7 @@ void insert_cfg_into_list(FormPool& pool, auto start_op = f.ir2.atomic_ops->block_id_to_first_atomic_op.at(as_block->block_id); auto end_op = f.ir2.atomic_ops->block_id_to_end_atomic_op.at(as_block->block_id); for (auto i = start_op; i < end_op; i++) { - output->push_back(f.ir2.atomic_ops->ops.at(i)->get_as_form(pool)); + output->push_back(f.ir2.atomic_ops->ops.at(i)->get_as_form(pool, f.ir2.env)); } } else { auto ir = cfg_to_ir(pool, f, vtx); @@ -1011,7 +1030,7 @@ Form* cfg_to_ir(FormPool& pool, const Function& f, const CfgVtx* vtx) { auto start_op = f.ir2.atomic_ops->block_id_to_first_atomic_op.at(bv->block_id); auto end_op = f.ir2.atomic_ops->block_id_to_end_atomic_op.at(bv->block_id); for (auto i = start_op; i < end_op; i++) { - output->push_back(f.ir2.atomic_ops->ops.at(i)->get_as_form(pool)); + output->push_back(f.ir2.atomic_ops->ops.at(i)->get_as_form(pool, f.ir2.env)); } return output; @@ -1031,19 +1050,21 @@ Form* cfg_to_ir(FormPool& pool, const Function& f, const CfgVtx* vtx) { auto wvtx = dynamic_cast(vtx); auto result = pool.alloc_single_element_form( nullptr, cfg_to_ir(pool, f, wvtx->condition), cfg_to_ir(pool, f, wvtx->body)); - clean_up_until_loop(pool, dynamic_cast(result->try_as_single_element())); + clean_up_until_loop(pool, dynamic_cast(result->try_as_single_element()), + f.ir2.env); return result; } else if (dynamic_cast(vtx)) { auto wvtx = dynamic_cast(vtx); auto empty = pool.alloc_single_element_form(nullptr); auto result = pool.alloc_single_element_form( nullptr, cfg_to_ir(pool, f, wvtx->block), empty); - clean_up_until_loop(pool, dynamic_cast(result->try_as_single_element())); + clean_up_until_loop(pool, dynamic_cast(result->try_as_single_element()), + f.ir2.env); return result; } else if (dynamic_cast(vtx)) { auto wvtx = dynamic_cast(vtx); auto condition = pool.alloc_single_element_form( - nullptr, IR2_Condition::Kind::ALWAYS, nullptr, nullptr); + nullptr, IR2_Condition::Kind::ALWAYS, std::nullopt, std::nullopt, RegSet()); auto result = pool.alloc_single_element_form(nullptr, condition, cfg_to_ir(pool, f, wvtx->block)); clean_up_infinite_while_loop(pool, @@ -1089,8 +1110,8 @@ Form* cfg_to_ir(FormPool& pool, const Function& f, const CfgVtx* vtx) { entries.push_back(std::move(e)); } auto result = pool.alloc_single_element_form(nullptr, entries, else_ir); - clean_up_cond_with_else(pool, - dynamic_cast(result->try_as_single_element())); + clean_up_cond_with_else( + pool, dynamic_cast(result->try_as_single_element()), f.ir2.env); return result; } } else if (dynamic_cast(vtx)) { @@ -1159,7 +1180,7 @@ Form* cfg_to_ir(FormPool& pool, const Function& f, const CfgVtx* vtx) { * has a jump to the condition branch that we need to remove. This currently happens after all * conversion but this may need to be revisited depending on the final order of simplifications. */ -void clean_up_while_loops(FormPool& pool, Form* sequence) { +void clean_up_while_loops(FormPool& pool, Form* sequence, const Env& env) { std::vector to_remove; // the list of branches to remove by index in this sequence for (int i = 0; i < sequence->size(); i++) { auto* form_as_while = dynamic_cast(sequence->at(i)); @@ -1181,7 +1202,7 @@ void clean_up_while_loops(FormPool& pool, Form* sequence) { assert(condition_branch.first); assert(condition_branch.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP); // printf("got while condition branch %s\n", condition_branch.first->print(file).c_str()); - auto replacement = condition_branch.first->op()->get_condition_as_form(pool); + auto replacement = condition_branch.first->op()->get_condition_as_form(pool, env); *(condition_branch.second) = replacement; } @@ -1206,10 +1227,10 @@ void build_initial_forms(Function& function) { auto& pool = function.ir2.form_pool; auto top_level = function.cfg->get_single_top_level(); std::vector top_level_elts; - insert_cfg_into_list(pool, function, top_level, &top_level_elts); - auto result = pool.alloc_sequence_form(nullptr, top_level_elts); + insert_cfg_into_list(*pool, function, top_level, &top_level_elts); + auto result = pool->alloc_sequence_form(nullptr, top_level_elts); - result->apply_form([&](Form* form) { clean_up_while_loops(pool, form); }); + result->apply_form([&](Form* form) { clean_up_while_loops(*pool, form, function.ir2.env); }); result->apply([&](FormElement* form) { auto as_cne = dynamic_cast(form); diff --git a/decompiler/analysis/cfg_builder.h b/decompiler/analysis/cfg_builder.h index e8745e3d5..a197bc59a 100644 --- a/decompiler/analysis/cfg_builder.h +++ b/decompiler/analysis/cfg_builder.h @@ -1,7 +1,6 @@ #pragma once -#include "decompiler/Function/Function.h" - namespace decompiler { +class Function; void build_initial_forms(Function& function); -} \ No newline at end of file +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/analysis/expression_build.cpp b/decompiler/analysis/expression_build.cpp index 44978ed8f..6a728fef9 100644 --- a/decompiler/analysis/expression_build.cpp +++ b/decompiler/analysis/expression_build.cpp @@ -2,9 +2,13 @@ #include "decompiler/Function/Function.h" #include "decompiler/IR2/Form.h" #include "decompiler/IR2/FormStack.h" +#include "decompiler/util/DecompilerTypeSystem.h" namespace decompiler { -bool convert_to_expressions(Form* top_level_form, FormPool& pool, const Function& f) { +bool convert_to_expressions(Form* top_level_form, + FormPool& pool, + const Function& f, + const DecompilerTypeSystem& dts) { assert(top_level_form); try { @@ -31,12 +35,22 @@ bool convert_to_expressions(Form* top_level_form, FormPool& pool, const Function FormStack stack; for (auto& entry : top_level_form->elts()) { - fmt::print("push {} to stack\n", entry->to_form(f.ir2.env).print()); + // fmt::print("push {} to stack\n", entry->to_form(f.ir2.env).print()); entry->push_to_stack(f.ir2.env, pool, stack); } std::vector new_entries; if (f.type.last_arg() != TypeSpec("none")) { - new_entries = stack.rewrite_to_get_reg(pool, Register(Reg::GPR, Reg::V0)); + auto v0 = Register(Reg::GPR, Reg::V0); + new_entries = stack.rewrite_to_get_reg(pool, v0, f.ir2.env); + auto reg_return_type = f.ir2.env.get_types_after_op(f.ir2.atomic_ops->ops.size() - 1).get(v0); + if (!dts.ts.typecheck(f.type.last_arg(), reg_return_type.typespec(), "", false, false)) { + // we need to cast the final value. + auto to_cast = new_entries.back(); + new_entries.pop_back(); + auto cast = pool.alloc_element(f.type.last_arg(), + pool.alloc_single_form(nullptr, to_cast)); + new_entries.push_back(cast); + } } else { new_entries = stack.rewrite(pool); } diff --git a/decompiler/analysis/expression_build.h b/decompiler/analysis/expression_build.h index 227489a9a..bb1e803fa 100644 --- a/decompiler/analysis/expression_build.h +++ b/decompiler/analysis/expression_build.h @@ -4,5 +4,9 @@ namespace decompiler { class Form; class Function; class FormPool; -bool convert_to_expressions(Form* top_level_form, FormPool& pool, const Function& f); +class DecompilerTypeSystem; +bool convert_to_expressions(Form* top_level_form, + FormPool& pool, + const Function& f, + const DecompilerTypeSystem& dts); } // namespace decompiler \ No newline at end of file diff --git a/decompiler/analysis/reg_usage.cpp b/decompiler/analysis/reg_usage.cpp index 078048583..154d561b7 100644 --- a/decompiler/analysis/reg_usage.cpp +++ b/decompiler/analysis/reg_usage.cpp @@ -12,18 +12,33 @@ bool in_set(RegSet& set, const Register& obj) { return set.find(obj) != set.end(); } -void phase1(const FunctionAtomicOps& ops, int block_id, RegUsageInfo* out) { +void phase1(const FunctionAtomicOps& ops, + int block_id, + RegUsageInfo* out, + bool insert_v0_read_instruction_at_end) { int end_op = ops.block_id_to_end_atomic_op.at(block_id); int start_op = ops.block_id_to_first_atomic_op.at(block_id); - for (int i = end_op; i-- > start_op;) { - const auto& instr = ops.ops.at(i); + int loop_end = end_op; + if (insert_v0_read_instruction_at_end) { + loop_end++; + } + for (int i = loop_end; i-- > start_op;) { + std::vector read; + std::vector write; + if (i == end_op) { + read = {Register(Reg::GPR, Reg::V0)}; + } else { + const auto& instr = ops.ops.at(i); + read = instr->read_regs(); + write = instr->write_regs(); + } + auto& lv = out->op.at(i).live; auto& dd = out->op.at(i).dead; auto& block = out->block.at(block_id); // make all read live out - auto read = instr->read_regs(); lv.clear(); for (auto& x : read) { lv.insert(x); @@ -31,7 +46,6 @@ void phase1(const FunctionAtomicOps& ops, int block_id, RegUsageInfo* out) { // kill things which are overwritten dd.clear(); - auto write = instr->write_regs(); for (auto& x : write) { if (!in_set(lv, x)) { dd.insert(x); @@ -100,7 +114,8 @@ bool phase2(const std::vector& blocks, int block_id, RegUsageInfo* i void phase3(const FunctionAtomicOps& ops, const std::vector& blocks, int block_id, - RegUsageInfo* info) { + RegUsageInfo* info, + bool insert_v0_read_instruction_at_end) { RegSet live_local; const auto& block_obj = blocks.at(block_id); for (auto s : {block_obj.succ_branch, block_obj.succ_ft}) { @@ -115,7 +130,12 @@ void phase3(const FunctionAtomicOps& ops, int end_op = ops.block_id_to_end_atomic_op.at(block_id); int start_op = ops.block_id_to_first_atomic_op.at(block_id); - for (int i = end_op; i-- > start_op;) { + int loop_end = end_op; + if (insert_v0_read_instruction_at_end) { + loop_end++; + } + + for (int i = loop_end; i-- > start_op;) { auto& lv = info->op.at(i).live; auto& dd = info->op.at(i).dead; @@ -130,15 +150,20 @@ void phase3(const FunctionAtomicOps& ops, } } +bool should_insert_v0_read(const std::vector& blocks, const Function& function, int i) { + return i == int(blocks.size()) - 1 && function.type.arg_count() > 0 && + function.type.last_arg() != TypeSpec("none"); +} + } // namespace RegUsageInfo analyze_ir2_register_usage(const Function& function) { const auto& blocks = function.basic_blocks; const auto& ops = function.ir2.atomic_ops; - RegUsageInfo result(blocks.size(), ops->ops.size()); + RegUsageInfo result(blocks.size(), ops->ops.size() + 1); for (int i = 0; i < int(blocks.size()); i++) { - phase1(*ops, i, &result); + phase1(*ops, i, &result, should_insert_v0_read(blocks, function, i)); } bool changed = false; @@ -152,7 +177,7 @@ RegUsageInfo analyze_ir2_register_usage(const Function& function) { } while (changed); for (int i = 0; i < int(blocks.size()); i++) { - phase3(*ops, blocks, i, &result); + phase3(*ops, blocks, i, &result, should_insert_v0_read(blocks, function, i)); } // we want to know if an op "consumes" a register. @@ -187,6 +212,8 @@ RegUsageInfo analyze_ir2_register_usage(const Function& function) { } } + result.op.pop_back(); + assert(result.op.size() == ops->ops.size()); return result; } } // namespace decompiler \ No newline at end of file diff --git a/decompiler/analysis/variable_naming.cpp b/decompiler/analysis/variable_naming.cpp index 73803197f..09479522d 100644 --- a/decompiler/analysis/variable_naming.cpp +++ b/decompiler/analysis/variable_naming.cpp @@ -2,6 +2,8 @@ #include "variable_naming.h" #include "reg_usage.h" #include "decompiler/Function/Function.h" +#include "decompiler/util/DecompilerTypeSystem.h" +#include "decompiler/IR2/Env.h" #include "third-party/fmt/core.h" namespace decompiler { diff --git a/decompiler/analysis/variable_naming.h b/decompiler/analysis/variable_naming.h index 1c4abf384..dc918036b 100644 --- a/decompiler/analysis/variable_naming.h +++ b/decompiler/analysis/variable_naming.h @@ -22,7 +22,7 @@ #include #include "decompiler/Disasm/Register.h" #include "decompiler/util/TP_Type.h" -#include "decompiler/IR2/Env.h" +#include "decompiler/IR2/IR2_common.h" namespace decompiler { diff --git a/decompiler/util/TP_Type.cpp b/decompiler/util/TP_Type.cpp index 243eea371..31dab283c 100644 --- a/decompiler/util/TP_Type.cpp +++ b/decompiler/util/TP_Type.cpp @@ -104,7 +104,7 @@ TypeSpec TP_Type::typespec() const { case Kind::UNINITIALIZED: return TypeSpec("none"); case Kind::PRODUCT_WITH_CONSTANT: - return TypeSpec("int"); + return m_ts; case Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT: // this can be part of an array access, so we don't really know the type. // probably not a good idea to try to do anything with this as a typespec diff --git a/decompiler/util/TP_Type.h b/decompiler/util/TP_Type.h index 202b05c93..019705a9b 100644 --- a/decompiler/util/TP_Type.h +++ b/decompiler/util/TP_Type.h @@ -104,10 +104,11 @@ class TP_Type { return result; } - static TP_Type make_from_product(int64_t multiplier) { + static TP_Type make_from_product(int64_t multiplier, bool is_signed) { TP_Type result; result.kind = Kind::PRODUCT_WITH_CONSTANT; result.m_int = multiplier; + result.m_ts = is_signed ? TypeSpec("int") : TypeSpec("uint"); return result; } diff --git a/test/decompiler/FormRegressionTest.cpp b/test/decompiler/FormRegressionTest.cpp index 97df6a2b8..5b11b81d6 100644 --- a/test/decompiler/FormRegressionTest.cpp +++ b/test/decompiler/FormRegressionTest.cpp @@ -5,6 +5,7 @@ #include "decompiler/analysis/cfg_builder.h" #include "decompiler/analysis/expression_build.h" #include "common/goos/PrettyPrinter.h" +#include "decompiler/IR2/Form.h" using namespace decompiler; @@ -80,6 +81,7 @@ std::unique_ptr FormRegressionTest::make_function( test->file.words_by_seg.resize(3); test->file.labels = program.labels; test->func.ir2.env.file = &test->file; + test->func.ir2.env.dts = dts.get(); test->func.instructions = program.instructions; test->func.guessed_name.set_as_global("test-function"); test->func.type = function_type; @@ -113,19 +115,33 @@ std::unique_ptr FormRegressionTest::make_function( EXPECT_TRUE(test->func.ir2.top_form); // for now, just test that this can at least be called. - VariableSet vars; - test->func.ir2.top_form->collect_vars(vars); + if (test->func.ir2.top_form) { + VariableSet vars; + test->func.ir2.top_form->collect_vars(vars); - if (do_expressions) { - bool success = - convert_to_expressions(test->func.ir2.top_form, test->func.ir2.form_pool, test->func); + if (do_expressions) { + bool success = convert_to_expressions(test->func.ir2.top_form, *test->func.ir2.form_pool, + test->func, *dts); - EXPECT_TRUE(success); - if (!success) { - return nullptr; + EXPECT_TRUE(success); + if (!success) { + return nullptr; + } } } + // for (int i = 0; i < int(test->func.ir2.atomic_ops->ops.size()); i++) { + // auto& op = test->func.ir2.atomic_ops->ops.at(i); + // auto& info = test->func.ir2.env.reg_use().op.at(i); + // fmt::print("{} - {}: ", op->to_string(test->func.ir2.env), + // test->func.ir2.env.get_types_after_op(i).print_gpr_masked( + // regs_to_gpr_mask({Register(Reg::GPR, Reg::V0)}))); + // for (auto live : info.live) { + // fmt::print("{} ", live.to_charp()); + // } + // fmt::print("\n"); + // } + return test; } @@ -141,14 +157,15 @@ void FormRegressionTest::test(const std::string& code, ASSERT_TRUE(test); auto expected_form = pretty_print::get_pretty_printer_reader().read_from_string(expected, false).as_pair()->car; + ASSERT_TRUE(test->func.ir2.top_form); auto actual_form = pretty_print::get_pretty_printer_reader() .read_from_string(test->func.ir2.top_form->to_form(test->func.ir2.env).print(), false) .as_pair() ->car; if (expected_form != actual_form) { - printf("Got:\n%s\n\nExpected\n%s\n", actual_form.print().c_str(), - expected_form.print().c_str()); + printf("Got:\n%s\n\nExpected\n%s\n", pretty_print::to_string(actual_form).c_str(), + pretty_print::to_string(expected_form).c_str()); } EXPECT_TRUE(expected_form == actual_form); diff --git a/test/decompiler/test_FormBeforeExpressions.cpp b/test/decompiler/test_FormBeforeExpressions.cpp index 848d305a2..f5f7b641c 100644 --- a/test/decompiler/test_FormBeforeExpressions.cpp +++ b/test/decompiler/test_FormBeforeExpressions.cpp @@ -178,9 +178,9 @@ TEST_F(FormRegressionTest, FormatString) { " (set! t9-0 format)\n" " (set! a0-1 '#t)\n" " (set! a1-0 L343)\n" - " (set! f0-0 (l.f gp-0))\n" + " (set! f0-0 (-> gp-0 data))\n" " (set! a2-0 (fpr->gpr f0-0))\n" - " (set! v0-0 (call! a0-1 a1-0 a2-0))\n" // #t, "~f", the float + " (call! a0-1 a1-0 a2-0)\n" // #t, "~f", the float " (set! v0-1 gp-0)\n" " )"; test_no_expr(func, type, expected, false, "", {{"L343", "~f"}}); @@ -214,16 +214,16 @@ TEST_F(FormRegressionTest, WhileLoop) { std::string type = "(function basic type symbol)"; std::string expected = "(begin\n" - " (set! v1-0 (l.wu (+ a0-0 -4)))\n" + " (set! v1-0 (-> a0-0 type))\n" " (set! a0-1 object)\n" " (until\n" - " (begin (set! v1-0 (l.wu (+ v1-0 4))) (= v1-0 a0-1))\n" + " (begin (set! v1-0 (-> v1-0 parent)) (= v1-0 a0-1))\n" " (if\n" " (= v1-0 a1-0)\n" " (return ((begin (set! v1-1 '#t) (set! v0-0 v1-1))) ((set! v1-0 0)))\n" " )\n" " )\n" - " (set! v0-1 '#f)\n" + " (set! v0-0 '#f)\n" " )"; test_no_expr(func, type, expected); } @@ -274,7 +274,7 @@ TEST_F(FormRegressionTest, Or) { " (begin\n" " (or\n" " (begin\n" - " (set! a0-0 (l.wu (+ a0-0 4)))\n" + " (set! a0-0 (-> a0-0 parent))\n" " (set! a3-0 (= a0-0 v1-0))\n" " (truthy a3-0)\n" // this sets a2-0, the unused result of the OR. it gets a separate // variable because it's not used. @@ -288,7 +288,7 @@ TEST_F(FormRegressionTest, Or) { " (return ((begin (set! v1-1 '#t) (set! v0-0 v1-1))) ((set! v1-0 0)))\n" " )\n" " )\n" - " (set! v0-1 '#f)\n" + " (set! v0-0 '#f)\n" " )"; test_no_expr(func, type, expected); } @@ -345,7 +345,7 @@ TEST_F(FormRegressionTest, DynamicMethodAccess) { " (set! v1-1 (+ v1-0 a0-0))\n" " (set! v1-2 (l.wu (+ v1-1 16)))\n" // get the method of the given type. " (until\n" - " (!= v0-1 v1-2)\n" // actually goes after the body, so it's fine to refer to v0-1/v1-2 + " (!= v0-0 v1-2)\n" // actually goes after the body, so it's fine to refer to v1-2 " (if\n" " (begin\n" " (if\n" @@ -353,14 +353,14 @@ TEST_F(FormRegressionTest, DynamicMethodAccess) { " (return ((begin (set! v1-3 nothing) (set! v0-0 v1-3))) ((set! v1-2 0)))\n" // return // nothing. " )\n" - " (set! a0-0 (l.wu (+ a0-0 4)))\n" // get next parent type - " (set! a2-2 (sll a1-0 2))\n" // fancy access + " (set! a0-0 (-> a0-0 parent))\n" // get next parent type + " (set! a2-2 (sll a1-0 2))\n" // fancy access " (set! a2-3 (+ a2-2 a0-0))\n" - " (set! v0-1 (l.wu (+ a2-3 16)))\n" // get method (in v0-1, the same var as loop + " (set! v0-0 (l.wu (+ a2-3 16)))\n" // get method (in v0-1, the same var as loop // condition) - " (zero? v0-1)\n" // is it defined? + " (zero? v0-0)\n" // is it defined? " )\n" - " (return ((begin (set! v1-4 nothing) (set! v0-2 v1-4))) ((set! v1-2 0)))\n" // also + " (return ((begin (set! v1-4 nothing) (set! v0-0 v1-4))) ((set! v1-2 0)))\n" // also // return // nothing. " )\n" @@ -463,8 +463,8 @@ TEST_F(FormRegressionTest, And) { "(cond\n" " ((begin (set! v1-0 '()) (= a0-0 v1-0)) (set! v0-0 0))\n" // should be a case, not a return " (else\n" - " (set! v1-1 (l.w (+ a0-0 2)))\n" // v1-1 iteration. - " (set! v0-1 1)\n" // v0-1 count + " (set! v1-1 (-> a0-0 cdr))\n" // v1-1 iteration. + " (set! v0-0 1)\n" // v0-1 count " (while\n" " (begin\n" " (and\n" @@ -473,7 +473,7 @@ TEST_F(FormRegressionTest, And) { " )\n" " (truthy a0-2)\n" // this variable doesn't appear, but is set by the and. " )\n" - " (set! v0-1 (+ v0-1 1))\n" // merged (and the result) + " (set! v0-0 (+ v0-0 1))\n" // merged (and the result) " (set! v1-1 (l.w (+ v1-1 2)))\n" // also merged. " )\n" " (set! v1-2 '#f)\n" // while's false, I think. @@ -560,7 +560,7 @@ TEST_F(FormRegressionTest, FunctionCall) { " (set! v1-3 '())\n" // " (!= gp-0 v1-3)\n" // IF CONDITION " )\n" - " (set! v0-2 gp-0)\n" // not empty, so return the result + " (set! v0-1 gp-0)\n" // not empty, so return the result " )"; // the (set! v0 #f) from the if is added later. test_no_expr(func, type, expected, true); } @@ -703,7 +703,7 @@ TEST_F(FormRegressionTest, NestedAndOr) { " (begin\n" " (set! s2-0 (l.w (+ s3-0 -2)))\n" // s2 = car " (set! v1-0 (l.w (+ s3-0 2)))\n" - " (set! s1-0 (l.w (+ v1-0 -2)))\n" // s1 = cadr + " (set! s1-0 (-> v1-0 car))\n" // s1 = cadr " (set! t9-0 s5-0)\n" // func " (set! a0-1 s2-0)\n" // car " (set! a1-1 s1-0)\n" // cadr @@ -775,10 +775,10 @@ TEST_F(FormRegressionTest, NewMethod) { " (begin\n" " (set! gp-0 a2-0)\n" // gp-0 is size " (set! v1-0 object)\n" - " (set! t9-0 (l.wu (+ v1-0 16)))\n" // object new - " (set! v1-1 a1-0)\n" // ? - " (set! a2-1 (l.hu (+ a1-0 8)))\n" // math - " (set! a1-1 (l.hu (+ a1-0 12)))\n" + " (set! t9-0 (-> v1-0 method-table 0))\n" // object new + " (set! v1-1 a1-0)\n" // ? + " (set! a2-1 (-> a1-0 size))\n" // math + " (set! a1-1 (-> a1-0 heap-base))\n" " (set! a1-2 (*.ui gp-0 a1-1))\n" " (set! a2-2 (+ a2-1 a1-2))\n" " (set! a1-3 v1-1)\n" // size! @@ -829,7 +829,7 @@ TEST_F(FormRegressionTest, Recursive) { " (set! t9-0 fact)\n" // recurse! " (set! a0-1 (+ gp-0 -1))\n" " (set! v0-1 (call! a0-1))\n" - " (set! v0-2 (*.si gp-0 v0-1))\n" // not quite a tail call... + " (set! v0-0 (*.si gp-0 v0-1))\n" // not quite a tail call... " )\n" " )"; test_no_expr(func, type, expected, false); @@ -864,7 +864,7 @@ TEST_F(FormRegressionTest, TypeOf) { std::string expected = "(begin\n" " (set! v1-1 (type-of a0-0))\n" - " (set! t9-0 (l.wu (+ v1-1 24)))\n" // print method. + " (set! t9-0 (-> v1-1 method-table 2))\n" // print method. " (set! v0-0 (call! a0-0))\n" " )"; test_no_expr(func, type, expected, false); diff --git a/test/decompiler/test_FormExpressionBuild.cpp b/test/decompiler/test_FormExpressionBuild.cpp index 617bc196d..9850fbeaf 100644 --- a/test/decompiler/test_FormExpressionBuild.cpp +++ b/test/decompiler/test_FormExpressionBuild.cpp @@ -33,7 +33,7 @@ TEST_F(FormRegressionTest, ExprFloatingPoint) { test_with_expr(func, type, expected); } -TEST_F(FormRegressionTest, AdditionSigned) { +TEST_F(FormRegressionTest, ExprAdditionSigned) { std::string func = " sll r0, r0, 0\n" " daddu v0, a0, a1\n" @@ -44,7 +44,7 @@ TEST_F(FormRegressionTest, AdditionSigned) { test_with_expr(func, type, expected); } -TEST_F(FormRegressionTest, AdditionUnSigned) { +TEST_F(FormRegressionTest, ExprAdditionUnSigned) { std::string func = " sll r0, r0, 0\n" " daddu v0, a0, a1\n" @@ -55,7 +55,7 @@ TEST_F(FormRegressionTest, AdditionUnSigned) { test_with_expr(func, type, expected); } -TEST_F(FormRegressionTest, AdditionMixed1) { +TEST_F(FormRegressionTest, ExprAdditionMixed1) { std::string func = " sll r0, r0, 0\n" " daddu v0, a0, a1\n" @@ -66,7 +66,7 @@ TEST_F(FormRegressionTest, AdditionMixed1) { test_with_expr(func, type, expected); } -TEST_F(FormRegressionTest, AdditionMixed2) { +TEST_F(FormRegressionTest, ExprAdditionMixed2) { std::string func = " sll r0, r0, 0\n" " daddu v0, a0, a1\n" @@ -77,4 +77,395 @@ TEST_F(FormRegressionTest, AdditionMixed2) { test_with_expr(func, type, expected); } -// TODO - test the additions, but with the wrong return types. \ No newline at end of file +TEST_F(FormRegressionTest, ExprAdditionSignedWrongReturn) { + std::string func = + " sll r0, r0, 0\n" + " daddu v0, a0, a1\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int uint)"; + std::string expected = "(the-as uint (+ a0-0 a1-0))"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprAdditionUnSignedWrongReturn) { + std::string func = + " sll r0, r0, 0\n" + " daddu v0, a0, a1\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function uint uint int)"; + std::string expected = "(the-as int (+ a0-0 a1-0))"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprAdditionMixed1WrongReturn) { + std::string func = + " sll r0, r0, 0\n" + " daddu v0, a0, a1\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int uint uint)"; + std::string expected = "(the-as uint (+ a0-0 (the-as int a1-0)))"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprAdditionMixed2WrongReturn) { + std::string func = + " sll r0, r0, 0\n" + " daddu v0, a0, a1\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function uint int int)"; + std::string expected = "(the-as int (+ a0-0 (the-as uint a1-0)))"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprSubtraction) { + std::string func = + " sll r0, r0, 0\n" + " dsubu v0, a0, a1\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int int)"; + std::string expected = "(- a0-0 a1-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprMultiplication) { + std::string func = + " sll r0, r0, 0\n" + " mult3 v0, a0, a1\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int int)"; + std::string expected = "(* a0-0 a1-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprMultiplicationWrong1) { + std::string func = + " sll r0, r0, 0\n" + " mult3 v0, a0, a1\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int uint int)"; + std::string expected = "(* a0-0 (the-as int a1-0))"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprMultiplicationWrong2) { + std::string func = + " sll r0, r0, 0\n" + " mult3 v0, a0, a1\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function uint int int)"; + std::string expected = "(* (the-as int a0-0) a1-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprMultiplicationWrong3) { + std::string func = + " sll r0, r0, 0\n" + " mult3 v0, a0, a1\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function uint uint uint)"; + std::string expected = "(the-as uint (* (the-as int a0-0) (the-as int a1-0)))"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprDivision1) { + std::string func = + " sll r0, r0, 0\n" + " div a0, a1\n" + " mflo v0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int int)"; + std::string expected = "(/ a0-0 a1-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprDivision2) { + std::string func = + " sll r0, r0, 0\n" + " div a0, a1\n" + " mflo v0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function uint int int)"; + std::string expected = "(/ (the-as int a0-0) a1-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprDivision3) { + std::string func = + " sll r0, r0, 0\n" + " div a0, a1\n" + " mflo v0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int uint int)"; + std::string expected = "(/ a0-0 (the-as int a1-0))"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprDivision4) { + std::string func = + " sll r0, r0, 0\n" + " div a0, a1\n" + " mflo v0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function uint uint uint)"; + std::string expected = "(the-as uint (/ (the-as int a0-0) (the-as int a1-0)))"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprAsh) { + std::string func = + " sll r0, r0, 0\n" + "L305:\n" + " or v1, a0, r0\n" + " bgezl a1, L306\n" + " dsllv v0, v1, a1\n" + + " dsubu a0, r0, a1\n" + " dsrav v0, v1, a0\n" + "L306:\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int int)"; + std::string expected = "(ash a0-0 a1-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprMod) { + std::string func = + " sll r0, r0, 0\n" + " div a0, a1\n" + " mfhi v0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int int)"; + std::string expected = "(mod a0-0 a1-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprAbs) { + std::string func = + " sll r0, r0, 0\n" + "L301:\n" + " or v0, a0, r0\n" + " bltzl v0, L302\n" + " dsubu v0, r0, v0\n" + + "L302:\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int)"; + std::string expected = "(abs a0-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprMin) { + std::string func = + " sll r0, r0, 0\n" + " or v0, a0, r0\n" + " or v1, a1, r0\n" + " slt a0, v0, v1\n" + " movz v0, v1, a0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int int)"; + std::string expected = "(min a0-0 a1-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprMax) { + std::string func = + " sll r0, r0, 0\n" + " or v0, a0, r0\n" + " or v1, a1, r0\n" + " slt a0, v0, v1\n" + " movn v0, v1, a0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int int)"; + std::string expected = "(max a0-0 a1-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprLogior) { + std::string func = + " sll r0, r0, 0\n" + " or v0, a0, a1\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int int)"; + std::string expected = "(logior a0-0 a1-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprLogxor) { + std::string func = + " sll r0, r0, 0\n" + " xor v0, a0, a1\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int int)"; + std::string expected = "(logxor a0-0 a1-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprLognor) { + std::string func = + " sll r0, r0, 0\n" + " nor v0, a0, a1\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int int)"; + std::string expected = "(lognor a0-0 a1-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprLogand) { + std::string func = + " sll r0, r0, 0\n" + " and v0, a0, a1\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int int)"; + std::string expected = "(logand a0-0 a1-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprLognot) { + std::string func = + " sll r0, r0, 0\n" + " nor v0, a0, r0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int)"; + std::string expected = "(lognot a0-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprFalse) { + std::string func = + " sll r0, r0, 0\n" + " or v0, s7, r0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function symbol)"; + std::string expected = "'#f"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprTrue) { + std::string func = + " sll r0, r0, 0\n" + " daddiu v0, s7, #t\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function symbol)"; + std::string expected = "'#t"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprPrintBfloat) { + std::string func = + " sll r0, r0, 0\n" + "L343:\n" + " daddiu sp, sp, -32\n" + " sd ra, 0(sp)\n" + " sd fp, 8(sp)\n" + " or fp, t9, r0\n" + " sq gp, 16(sp)\n" + + " or gp, a0, r0\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L343\n" + " lwc1 f0, 0(gp)\n" + " mfc1 a2, f0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " or v0, gp, r0 \n" + " ld ra, 0(sp)\n" + " ld fp, 8(sp)\n" + " lq gp, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 32"; + std::string type = "(function bfloat bfloat)"; + + std::string expected = "(begin (set! gp-0 a0-0) (format '#t L343 (-> gp-0 data)) gp-0)"; + test_with_expr(func, type, expected, false, "", {{"L343", "~f"}}); +} + +TEST_F(FormRegressionTest, ExprSizeOfType) { + std::string func = + "L346:\n" // fake label. + " sll r0, r0, 0\n" + " daddiu sp, sp, -16\n" + " sd fp, 8(sp)\n" + " or fp, t9, r0\n" + " ld v1, L346(fp)\n" + " lhu a0, 14(a0)\n" + " dsll a0, a0, 2\n" + " daddiu a0, a0, 43\n" + " and v0, v1, a0 \n" + " ld fp, 8(sp)\n" + " jr ra\n" + " daddiu sp, sp, 16"; + std::string type = "(function type uint)"; + + std::string expected = "(logand (l.d L346) (+ (sll (-> a0-1 allocated-length) 2) 43))"; + test_with_expr(func, type, expected, false, ""); +} + +TEST_F(FormRegressionTest, ExprBasicTypeP) { + std::string func = + " sll r0, r0, 0\n" + "L285:\n" + " lwu v1, -4(a0)\n" + " lw a0, object(s7)\n" + + "L286:\n" + " bne v1, a1, L287\n" + " or a2, s7, r0\n" + + " daddiu v1, s7, #t\n" + " or v0, v1, r0\n" + " beq r0, r0, L288\n" + " sll r0, r0, 0\n" + + " or v1, r0, r0\n" + "L287:\n" + " lwu v1, 4(v1)\n" + " bne v1, a0, L286\n" + " sll r0, r0, 0\n" + " or v0, s7, r0\n" + "L288:\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function basic type symbol)"; + std::string expected = + "(begin\n" + " (set! v1-0 (-> a0-0 type))\n" + " (set! a0-1 object)\n" + " (until\n" + " (begin (set! v1-0 (-> v1-0 parent)) (= v1-0 a0-1))\n" // likely using set! as value. we + // don't plan on supporting this. + " (if\n" + " (= v1-0 a1-0)\n" + " (return ((begin (set! v1-1 '#t) (set! v0-0 v1-1))) ((set! v1-0 0)))\n" + " )\n" + " )\n" + " '#f\n" + " )"; + test_with_expr(func, type, expected); +} \ No newline at end of file