#pragma once #include #include #include #include #include "decompiler/Disasm/Register.h" #include "decompiler/IR2/AtomicOp.h" #include "common/goos/Object.h" namespace decompiler { class Form; class Env; class FormStack; /*! * A "FormElement" represents a single LISP form that's not a begin. * This is a abstract base class that all types of forms should be based on. */ class FormElement { public: Form* parent_form = nullptr; virtual goos::Object to_form(const Env& env) const = 0; virtual goos::Object to_form_as_condition(const Env& env) const; virtual ~FormElement() = default; virtual void apply(const std::function& f) = 0; virtual void apply_form(const std::function& f) = 0; virtual bool is_sequence_point() const { return true; } virtual void collect_vars(VariableSet& vars) const = 0; std::string to_string(const Env& env) const; // push the result of this operation to the operation stack virtual void push_to_stack(const Env& env, FormPool& pool, FormStack& stack); virtual void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result); protected: friend class Form; }; /*! * A SimpleExpressionElement is a form which has the value of a SimpleExpression. * Like a SimpleExpression, it has no side effects. */ class SimpleExpressionElement : public FormElement { public: explicit SimpleExpressionElement(SimpleExpression expr, int my_idx); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; bool is_sequence_point() const override; void collect_vars(VariableSet& vars) const override; // void push_to_stack(const Env& env, FormStack& stack) override; void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) override; void update_from_stack_identity(const Env& env, FormPool& pool, FormStack& stack, std::vector* result); void update_from_stack_gpr_to_fpr(const Env& env, FormPool& pool, FormStack& stack, std::vector* result); void update_from_stack_fpr_to_gpr(const Env& env, FormPool& pool, FormStack& stack, std::vector* result); void update_from_stack_div_s(const Env& env, FormPool& pool, FormStack& stack, std::vector* result); void update_from_stack_add_i(const Env& env, FormPool& pool, FormStack& stack, std::vector* result); void update_from_stack_mult_si(const Env& env, FormPool& pool, FormStack& stack, std::vector* result); void update_from_stack_lognot(const Env& env, FormPool& pool, FormStack& stack, std::vector* result); void update_from_stack_force_si_2(const Env& env, FixedOperatorKind kind, FormPool& pool, FormStack& stack, std::vector* result); void update_from_stack_force_ui_2(const Env& env, FixedOperatorKind kind, FormPool& pool, FormStack& stack, std::vector* result); void update_from_stack_copy_first_int_2(const Env& env, FixedOperatorKind kind, FormPool& pool, FormStack& stack, std::vector* result); const SimpleExpression& expr() const { return m_expr; } private: SimpleExpression m_expr; int m_my_idx; }; /*! * Represents storing a value into memory. * Because a value can be propagated "into" the source value, this will have to be special cased * in expression propagation. */ class StoreElement : public FormElement { public: explicit StoreElement(const StoreOp* op); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; private: // todo - we may eventually want to use a different representation for more // complicated store paths. const StoreOp* m_op; }; /*! * Representing a value loaded from memory. * Unclear if this should have some common base with store? */ class LoadSourceElement : public FormElement { public: LoadSourceElement(Form* addr, int size, LoadVarOp::Kind kind); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; int size() const { return m_size; } LoadVarOp::Kind kind() const { return m_kind; } const Form* location() const { return m_addr; } void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) override; private: Form* m_addr = nullptr; int m_size = -1; LoadVarOp::Kind m_kind; }; /*! * Representing an indivisible thing, like an integer constant variable, etc. * Just a wrapper around SimpleAtom. */ class SimpleAtomElement : public FormElement { public: explicit SimpleAtomElement(const SimpleAtom& var); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; const SimpleAtom& atom() const { return m_atom; } // void push_to_stack(const Env& env, FormStack& stack) override; private: SimpleAtom m_atom; }; /*! * Set a variable to a Form. This is the set! form to be used for expression building. */ class SetVarElement : public FormElement { public: SetVarElement(const Variable& var, Form* value, bool is_sequence_point); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; bool is_sequence_point() const override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; const Variable& dst() const { return m_dst; } const Form* src() const { return m_src; } private: Variable m_dst; Form* m_src = nullptr; bool m_is_sequence_point = true; }; /*! * Like SetVar, but sets a form to another form. * This is intended to be used with stores. * NOTE: do not use this when SetVarElement could be used instead. */ class SetFormFormElement : public FormElement { public: SetFormFormElement(Form* dst, Form* src); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; bool is_sequence_point() const override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; private: Form* m_dst = nullptr; Form* m_src = nullptr; }; /*! * A wrapper around a single AtomicOp. * The "important" special AtomicOps have their own Form type, like FuncitonCallElement. */ class AtomicOpElement : public FormElement { public: explicit AtomicOpElement(const AtomicOp* op); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; const AtomicOp* op() const { return m_op; } private: const AtomicOp* m_op; }; /*! * A "condition" like (< a b). This can be used as a boolean value directly: (set! a (< b c)) * or it can be used as a branch condition: (if (< a b)). * * In the first case, it can be either a conditional move or actually branching. GOAL seems to use * the branching when sometimes it could have used the conditional move, and for now, we don't * care about the difference. */ class ConditionElement : public FormElement { public: ConditionElement(IR2_Condition::Kind kind, std::optional src0, std::optional src1, RegSet consumed); goos::Object to_form(const Env& env) const override; goos::Object to_form_as_condition(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) override; void invert(); const RegSet& consume() const { return m_consumed; } private: IR2_Condition::Kind m_kind; std::optional m_src[2]; RegSet m_consumed; }; /*! * Wrapper around an AtomicOp call. */ class FunctionCallElement : public FormElement { public: explicit FunctionCallElement(const CallOp* op); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; private: const CallOp* m_op; }; /*! * Wrapper around an AtomicOp branch. These are inserted when directly converting blocks to Form, * but should be eliminated after the cfg_builder pass completes. */ class BranchElement : public FormElement { public: explicit BranchElement(const BranchOp* op); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; const BranchOp* op() const { return m_op; } private: const BranchOp* m_op; }; /*! * Represents a (return-from #f x) form, which immediately returns from the function. * This always has some "dead code" after it that can't be reached, which is the "dead_code". */ class ReturnElement : public FormElement { public: Form* return_code = nullptr; Form* dead_code = nullptr; ReturnElement(Form* _return_code, Form* _dead_code) : return_code(_return_code), dead_code(_dead_code) {} goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; }; /*! * Represents a (return-from Lxxx x) form, which returns from a block which ends before the end * of the function. These are used pretty rarely. As a result, I'm not planning to allow these to * next within other expressions. This means that the following code: * * (set! x (block my-block * (if (condition?) * (return-from my-block 12)) * 2)) * * Would become * * (block my-block * (when (condition?) * (set! x 12) * (return-from my-block none)) * (set! x 2) * ) * * which seems fine to me. */ class BreakElement : public FormElement { public: Form* return_code = nullptr; Form* dead_code = nullptr; BreakElement(Form* _return_code, Form* _dead_code) : return_code(_return_code), dead_code(_dead_code) {} goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; }; /*! * Condition (cond, if, when, unless) which has an "else" case. * The condition of the first entry may contain too much and will need to be adjusted later. * Example: * * (set! x 10) * (if (something?) ... ) * * might become * (if (begin (set! x 10) (something?)) ... ) * * We want to wait until after expressions are built to move the extra stuff up to avoid splitting * up a complicated expression used as the condition. But this should happen before variable * scoping. */ class CondWithElseElement : public FormElement { public: struct Entry { Form* condition = nullptr; Form* body = nullptr; bool cleaned = false; }; std::vector entries; Form* else_ir = nullptr; CondWithElseElement(std::vector _entries, Form* _else_ir) : entries(std::move(_entries)), else_ir(_else_ir) {} goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; }; /*! * An empty element. This is used to fill the body of control forms with nothing in them. * For example, I believe that (cond ((x y) (else none))) will generate an else case with an * "empty" and looks different from (cond ((x y))). * * We _could_ simplify out the use of empty, but I think it's more "authentic" to leave them in, and * might give us more clues about how the code was originally written */ class EmptyElement : public FormElement { public: EmptyElement() = default; goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; }; /*! * Represents a GOAL while loop and more complicated loops which have the "while" format of checking * the condition before the first loop. This will not include infinite while loops. * Unlike CondWithElseElement, this will correctly identify the start and end of the condition. */ class WhileElement : public FormElement { public: WhileElement(Form* _condition, Form* _body) : condition(_condition), body(_body) {} goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; Form* condition = nullptr; Form* body = nullptr; bool cleaned = false; }; /*! * Represents a GOAL until loop and more complicated loops which use the "until" format of checking * the condition after the first iteration. Has the same limitation as CondWithElseElement for the * condition. */ class UntilElement : public FormElement { public: UntilElement(Form* _condition, Form* _body) : condition(_condition), body(_body) {} goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; Form* condition = nullptr; Form* body = nullptr; }; /*! * Represents a GOAL short-circuit expression, either AND or OR. * The first "element" in ShortCircuitElement may be too large, see the comment on * CondWithElseElement */ class ShortCircuitElement : public FormElement { public: struct Entry { Form* condition = nullptr; // in the case where there's no else, each delay slot will write #f to the "output" register. // this can be with an or , s7, r0 // Form* output = nullptr; // todo, what? add to collect vars if we need it? bool is_output_trick = false; bool cleaned = false; }; enum Kind { UNKNOWN, AND, OR } kind = UNKNOWN; Variable final_result; std::vector entries; std::optional used_as_value = std::nullopt; explicit ShortCircuitElement(std::vector _entries) : entries(std::move(_entries)) {} goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; }; /*! * Represents a GOAL cond/if/when/unless statement which does not have an explicit else case. The * compiler will then move #f into the result register in the delay slot. The first condition may be * too large at first, see CondWithElseElement */ class CondNoElseElement : public FormElement { public: struct Entry { Form* condition = nullptr; Form* body = nullptr; std::optional false_destination; FormElement* original_condition_branch = nullptr; bool cleaned = false; }; Variable final_destination; bool used_as_value = false; std::vector entries; explicit CondNoElseElement(std::vector _entries) : entries(std::move(_entries)) {} goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; }; /*! * Represents a (abs x) expression. */ class AbsElement : public FormElement { public: explicit AbsElement(Variable _source, RegSet _consumed); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) override; Variable source; RegSet consumed; }; /*! * Represents an (ash x y) expression. There is also an "unsigned" version of this using logical * shifts. This only recognizes the fancy version where the shift amount isn't known at compile time * and the compiler emits code that branches depending on the sign of the shift amount. */ class AshElement : public FormElement { public: Variable shift_amount, value; std::optional clobber; bool is_signed = true; RegSet consumed; AshElement(Variable _shift_amount, Variable _value, std::optional _clobber, bool _is_signed, RegSet _consumed); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) override; }; /*! * Represents a form which gets the runtime type of a boxed object. This is for the most general * "object" case where we check for pair, binteger, or basic and there's actually branching. */ class TypeOfElement : public FormElement { public: Form* value; std::optional clobber; TypeOfElement(Form* _value, std::optional _clobber); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; }; /*! * Represents an unpaired cmove #f. GOAL may emit code like * (set! x #t) * (... evaluate something) * (cmov x y #f) * where the stuff in between is potentially very large. * GOAL has no "condition move" keyword available to the programmer - this would only happen if when * doing something like (set! x (zero? y)), in the code for creating a GOAL boolean. * * Code like (if x (set! y z)) will branch, the compiler isn't smart enough to use movn/movz here. * * These cannot be compacted into a single form until expression building, so we leave these * placeholders in. * * Note - some conditionals put the (set! x #t) immediately before the cmove, but not all. Those * that do will be correctly recognized and will be a ConditionElement. zero! seems to be the most * common one that's split, and it happens reasonably often, so I will try to actually correct it. */ class ConditionalMoveFalseElement : public FormElement { public: Variable dest; Form* source = nullptr; bool on_zero = false; ConditionalMoveFalseElement(Variable _dest, Form* _source, bool _on_zero); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; }; std::string fixed_operator_to_string(FixedOperatorKind kind); /*! * A GenericOperator is the head of a GenericElement. * It is used for the final output. */ class GenericOperator { public: enum class Kind { FIXED_OPERATOR, CONDITION_OPERATOR, FUNCTION_EXPR, INVALID }; static GenericOperator make_fixed(FixedOperatorKind kind); static GenericOperator make_function(Form* value); static GenericOperator make_compare(IR2_Condition::Kind kind); void collect_vars(VariableSet& vars) const; goos::Object to_form(const Env& env) const; void apply(const std::function& f); void apply_form(const std::function& f); bool operator==(const GenericOperator& other) const; bool operator!=(const GenericOperator& other) const; Kind kind() const { return m_kind; } FixedOperatorKind fixed_kind() const { assert(m_kind == Kind::FIXED_OPERATOR); return m_fixed_kind; } IR2_Condition::Kind condition_kind() const { assert(m_kind == Kind::CONDITION_OPERATOR); return m_condition_kind; } const Form* func() const { assert(m_kind == Kind::FUNCTION_EXPR); return m_function; } private: friend class GenericElement; Kind m_kind = Kind::INVALID; IR2_Condition::Kind m_condition_kind = IR2_Condition::Kind::INVALID; FixedOperatorKind m_fixed_kind = FixedOperatorKind::INVALID; Form* m_function = nullptr; }; class GenericElement : public FormElement { public: explicit GenericElement(GenericOperator op); GenericElement(GenericOperator op, Form* arg); GenericElement(GenericOperator op, Form* arg0, Form* arg1); GenericElement(GenericOperator op, std::vector forms); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) override; const GenericOperator& op() const { return m_head; } const std::vector& elts() const { return m_elts; } private: GenericOperator m_head; std::vector m_elts; }; class CastElement : public FormElement { public: explicit CastElement(TypeSpec type, Form* source); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; const TypeSpec& type() const { return m_type; } const Form* source() const { return m_source; } private: TypeSpec m_type; Form* m_source = nullptr; }; class DerefToken { public: enum class Kind { INTEGER_CONSTANT, INTEGER_EXPRESSION, // some form which evaluates to an integer index. Not offset, index. FIELD_NAME, INVALID }; static DerefToken make_int_constant(s64 int_constant); static DerefToken make_int_expr(Form* expr); static DerefToken make_field_name(const std::string& name); void collect_vars(VariableSet& vars) const; goos::Object to_form(const Env& env) const; void apply(const std::function& f); void apply_form(const std::function& f); Kind kind() const { return m_kind; } const std::string& field_name() const { assert(m_kind == Kind::FIELD_NAME); return m_name; } private: Kind m_kind = Kind::INVALID; s64 m_int_constant = -1; std::string m_name; Form* m_expr = nullptr; }; class DerefElement : public FormElement { public: DerefElement(Form* base, bool is_addr_of, DerefToken token); DerefElement(Form* base, bool is_addr_of, std::vector tokens); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) override; bool is_addr_of() const { return m_is_addr_of; } const Form* base() const { return m_base; } const std::vector& tokens() const { return m_tokens; } private: Form* m_base = nullptr; bool m_is_addr_of = false; std::vector m_tokens; }; class DynamicMethodAccess : public FormElement { public: explicit DynamicMethodAccess(Variable source); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) override; private: Variable m_source; }; /*! * A Form is a wrapper around one or more FormElements. * This is done for two reasons: * - Easier to "inline" begins, prevents stupid nesting of begins. * - Easier to manage ownership. */ class Form { public: Form() = default; Form(FormElement* parent, FormElement* single_child) : parent_element(parent), m_elements({single_child}) { single_child->parent_form = this; } Form(FormElement* parent, const std::vector& sequence) : parent_element(parent), m_elements(sequence) { for (auto& x : sequence) { x->parent_form = this; } } FormElement* try_as_single_element() const { if (is_single_element()) { return m_elements.front(); } return nullptr; } bool is_single_element() const { return m_elements.size() == 1; } FormElement* operator[](int idx) { return m_elements.at(idx); } FormElement* at(int idx) { return m_elements.at(idx); } const FormElement* operator[](int idx) const { return m_elements.at(idx); } int size() const { return int(m_elements.size()); } FormElement* back() const { assert(!m_elements.empty()); return m_elements.back(); } FormElement** back_ref() { assert(!m_elements.empty()); return &m_elements.back(); } void pop_back() { assert(!m_elements.empty()); m_elements.pop_back(); } const std::vector& elts() const { return m_elements; } std::vector& elts() { return m_elements; } void push_back(FormElement* elt) { elt->parent_form = this; m_elements.push_back(elt); } void clear() { m_elements.clear(); } goos::Object to_form(const Env& env) const; goos::Object to_form_as_condition(const Env& env) const; std::string to_string(const Env& env) const; void inline_forms(std::vector& forms, const Env& env) const; void apply(const std::function& f); void apply_form(const std::function& f); void collect_vars(VariableSet& vars) const; void update_children_from_stack(const Env& env, FormPool& pool, FormStack& stack); FormElement* parent_element = nullptr; private: std::vector m_elements; }; /*! * A FormPool is used to allocate forms and form elements. * It will clean up everything when it is destroyed. * As a result, you don't need to worry about deleting / referencing counting when manipulating * a Form graph. */ class FormPool { public: template T* alloc_element(Args&&... args) { auto elt = new T(std::forward(args)...); m_elements.emplace_back(elt); return elt; } template Form* alloc_single_element_form(FormElement* parent, Args&&... args) { auto elt = new T(std::forward(args)...); m_elements.emplace_back(elt); auto form = alloc_single_form(parent, elt); return form; } Form* alloc_single_form(FormElement* parent, FormElement* elt) { auto form = new Form(parent, elt); m_forms.push_back(form); return form; } Form* alloc_sequence_form(FormElement* parent, const std::vector sequence) { auto form = new Form(parent, sequence); m_forms.push_back(form); return form; } Form* acquire(std::unique_ptr
form_ptr) { Form* form = form_ptr.release(); m_forms.push_back(form); return form; } Form* alloc_empty_form() { Form* form = new Form; m_forms.push_back(form); return form; } ~FormPool(); private: std::vector m_forms; std::vector m_elements; }; } // namespace decompiler