mirror of
https://github.com/open-goal/jak-project.git
synced 2024-10-20 21:27:52 -04:00
Merge pull request #206 from water111/w/ir2-stacking
[Decompiler] Add stacking framework
This commit is contained in:
commit
8135c18e91
|
@ -34,7 +34,9 @@ add_library(
|
||||||
IR2/AtomicOpTypeAnalysis.cpp
|
IR2/AtomicOpTypeAnalysis.cpp
|
||||||
IR2/cfg_builder.cpp
|
IR2/cfg_builder.cpp
|
||||||
IR2/Env.cpp
|
IR2/Env.cpp
|
||||||
|
IR2/expression_build.cpp
|
||||||
IR2/Form.cpp
|
IR2/Form.cpp
|
||||||
|
IR2/FormStack.cpp
|
||||||
IR2/reg_usage.cpp
|
IR2/reg_usage.cpp
|
||||||
IR2/variable_naming.cpp
|
IR2/variable_naming.cpp
|
||||||
|
|
||||||
|
|
|
@ -169,6 +169,8 @@ class Function {
|
||||||
Env env;
|
Env env;
|
||||||
FormPool form_pool;
|
FormPool form_pool;
|
||||||
Form* top_form = nullptr;
|
Form* top_form = nullptr;
|
||||||
|
std::string debug_form_string;
|
||||||
|
bool print_debug_forms = false;
|
||||||
} ir2;
|
} ir2;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -20,6 +20,18 @@ FormPool::~FormPool() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////
|
||||||
|
// FormElement
|
||||||
|
///////////////////
|
||||||
|
|
||||||
|
std::string FormElement::to_string(const Env& env) const {
|
||||||
|
return to_form(env).print();
|
||||||
|
}
|
||||||
|
|
||||||
|
void FormElement::push_to_stack(const Env& env, FormStack&) {
|
||||||
|
throw std::runtime_error("push_to_stack not implemented for " + to_string(env));
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////
|
///////////////////
|
||||||
// Form
|
// Form
|
||||||
//////////////////
|
//////////////////
|
||||||
|
@ -38,6 +50,10 @@ goos::Object Form::to_form(const Env& env) const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string Form::to_string(const Env& env) const {
|
||||||
|
return to_form(env).print();
|
||||||
|
}
|
||||||
|
|
||||||
void Form::inline_forms(std::vector<goos::Object>& forms, const Env& env) const {
|
void Form::inline_forms(std::vector<goos::Object>& forms, const Env& env) const {
|
||||||
for (auto& x : m_elements) {
|
for (auto& x : m_elements) {
|
||||||
forms.push_back(x->to_form(env));
|
forms.push_back(x->to_form(env));
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
namespace decompiler {
|
namespace decompiler {
|
||||||
class Form;
|
class Form;
|
||||||
class Env;
|
class Env;
|
||||||
class IR2_Stack;
|
class FormStack;
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* A "FormElement" represents a single LISP form that's not a begin.
|
* A "FormElement" represents a single LISP form that's not a begin.
|
||||||
|
@ -27,14 +27,10 @@ class FormElement {
|
||||||
virtual void apply_form(const std::function<void(Form*)>& f) = 0;
|
virtual void apply_form(const std::function<void(Form*)>& f) = 0;
|
||||||
virtual bool is_sequence_point() const { return true; }
|
virtual bool is_sequence_point() const { return true; }
|
||||||
virtual void collect_vars(VariableSet& vars) const = 0;
|
virtual void collect_vars(VariableSet& vars) const = 0;
|
||||||
|
std::string to_string(const Env& env) const;
|
||||||
|
|
||||||
// // push the result of this operation to the operation stack
|
// push the result of this operation to the operation stack
|
||||||
// // this is used for the forms that aren't last in a multi-form.
|
virtual void push_to_stack(const Env& env, FormStack& stack);
|
||||||
// virtual void push_to_stack(const Env& env, IR2_Stack& stack) = 0;
|
|
||||||
//
|
|
||||||
// // this is used for the final of a multi-form only.
|
|
||||||
// // using the current expressions on the stack, simplify myself.
|
|
||||||
// virtual FormElement* simplify(const Env& env, FormPool& pool, IR2_Stack& stack) = 0;
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
friend class Form;
|
friend class Form;
|
||||||
|
@ -138,6 +134,10 @@ class SetVarElement : public FormElement {
|
||||||
bool m_is_sequence_point = true;
|
bool m_is_sequence_point = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* A wrapper around a single AtomicOp.
|
||||||
|
* The "important" special AtomicOps have their own Form type, like FuncitonCallElement.
|
||||||
|
*/
|
||||||
class AtomicOpElement : public FormElement {
|
class AtomicOpElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
explicit AtomicOpElement(const AtomicOp* op);
|
explicit AtomicOpElement(const AtomicOp* op);
|
||||||
|
@ -150,6 +150,14 @@ class AtomicOpElement : public FormElement {
|
||||||
const AtomicOp* m_op;
|
const AtomicOp* m_op;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* A "condition" like (< a b). This can be used as a boolean value directly: (set! a (< b c))
|
||||||
|
* or it can be used as a branch condition: (if (< a b)).
|
||||||
|
*
|
||||||
|
* In the first case, it can be either a conditional move or actually branching. GOAL seems to use
|
||||||
|
* the branching when sometimes it could have used the conditional move, and for now, we don't
|
||||||
|
* care about the difference.
|
||||||
|
*/
|
||||||
class ConditionElement : public FormElement {
|
class ConditionElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
ConditionElement(IR2_Condition::Kind kind, Form* src0, Form* src1);
|
ConditionElement(IR2_Condition::Kind kind, Form* src0, Form* src1);
|
||||||
|
@ -164,6 +172,9 @@ class ConditionElement : public FormElement {
|
||||||
Form* m_src[2] = {nullptr, nullptr};
|
Form* m_src[2] = {nullptr, nullptr};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Wrapper around an AtomicOp call.
|
||||||
|
*/
|
||||||
class FunctionCallElement : public FormElement {
|
class FunctionCallElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
explicit FunctionCallElement(const CallOp* op);
|
explicit FunctionCallElement(const CallOp* op);
|
||||||
|
@ -176,6 +187,10 @@ class FunctionCallElement : public FormElement {
|
||||||
const CallOp* m_op;
|
const CallOp* m_op;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Wrapper around an AtomicOp branch. These are inserted when directly converting blocks to Form,
|
||||||
|
* but should be eliminated after the cfg_builder pass completes.
|
||||||
|
*/
|
||||||
class BranchElement : public FormElement {
|
class BranchElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
explicit BranchElement(const BranchOp* op);
|
explicit BranchElement(const BranchOp* op);
|
||||||
|
@ -189,6 +204,10 @@ class BranchElement : public FormElement {
|
||||||
const BranchOp* m_op;
|
const BranchOp* m_op;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Represents a (return-from #f x) form, which immediately returns from the function.
|
||||||
|
* This always has some "dead code" after it that can't be reached, which is the "dead_code".
|
||||||
|
*/
|
||||||
class ReturnElement : public FormElement {
|
class ReturnElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
Form* return_code = nullptr;
|
Form* return_code = nullptr;
|
||||||
|
@ -201,6 +220,27 @@ class ReturnElement : public FormElement {
|
||||||
void collect_vars(VariableSet& vars) const override;
|
void collect_vars(VariableSet& vars) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Represents a (return-from Lxxx x) form, which returns from a block which ends before the end
|
||||||
|
* of the function. These are used pretty rarely. As a result, I'm not planning to allow these to
|
||||||
|
* next within other expressions. This means that the following code:
|
||||||
|
*
|
||||||
|
* (set! x (block my-block
|
||||||
|
* (if (condition?)
|
||||||
|
* (return-from my-block 12))
|
||||||
|
* 2))
|
||||||
|
*
|
||||||
|
* Would become
|
||||||
|
*
|
||||||
|
* (block my-block
|
||||||
|
* (when (condition?)
|
||||||
|
* (set! x 12)
|
||||||
|
* (return-from my-block none))
|
||||||
|
* (set! x 2)
|
||||||
|
* )
|
||||||
|
*
|
||||||
|
* which seems fine to me.
|
||||||
|
*/
|
||||||
class BreakElement : public FormElement {
|
class BreakElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
Form* return_code = nullptr;
|
Form* return_code = nullptr;
|
||||||
|
@ -213,6 +253,21 @@ class BreakElement : public FormElement {
|
||||||
void collect_vars(VariableSet& vars) const override;
|
void collect_vars(VariableSet& vars) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Condition (cond, if, when, unless) which has an "else" case.
|
||||||
|
* The condition of the first entry may contain too much and will need to be adjusted later.
|
||||||
|
* Example:
|
||||||
|
*
|
||||||
|
* (set! x 10)
|
||||||
|
* (if (something?) ... )
|
||||||
|
*
|
||||||
|
* might become
|
||||||
|
* (if (begin (set! x 10) (something?)) ... )
|
||||||
|
*
|
||||||
|
* We want to wait until after expressions are built to move the extra stuff up to avoid splitting
|
||||||
|
* up a complicated expression used as the condition. But this should happen before variable
|
||||||
|
* scoping.
|
||||||
|
*/
|
||||||
class CondWithElseElement : public FormElement {
|
class CondWithElseElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
struct Entry {
|
struct Entry {
|
||||||
|
@ -230,6 +285,14 @@ class CondWithElseElement : public FormElement {
|
||||||
void collect_vars(VariableSet& vars) const override;
|
void collect_vars(VariableSet& vars) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* An empty element. This is used to fill the body of control forms with nothing in them.
|
||||||
|
* For example, I believe that (cond ((x y) (else none))) will generate an else case with an
|
||||||
|
* "empty" and looks different from (cond ((x y))).
|
||||||
|
*
|
||||||
|
* We _could_ simplify out the use of empty, but I think it's more "authentic" to leave them in, and
|
||||||
|
* might give us more clues about how the code was originally written
|
||||||
|
*/
|
||||||
class EmptyElement : public FormElement {
|
class EmptyElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
EmptyElement() = default;
|
EmptyElement() = default;
|
||||||
|
@ -239,6 +302,11 @@ class EmptyElement : public FormElement {
|
||||||
void collect_vars(VariableSet& vars) const override;
|
void collect_vars(VariableSet& vars) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Represents a GOAL while loop and more complicated loops which have the "while" format of checking
|
||||||
|
* the condition before the first loop. This will not include infinite while loops.
|
||||||
|
* Unlike CondWithElseElement, this will correctly identify the start and end of the condition.
|
||||||
|
*/
|
||||||
class WhileElement : public FormElement {
|
class WhileElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
WhileElement(Form* _condition, Form* _body) : condition(_condition), body(_body) {}
|
WhileElement(Form* _condition, Form* _body) : condition(_condition), body(_body) {}
|
||||||
|
@ -251,6 +319,11 @@ class WhileElement : public FormElement {
|
||||||
bool cleaned = false;
|
bool cleaned = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Represents a GOAL until loop and more complicated loops which use the "until" format of checking
|
||||||
|
* the condition after the first iteration. Has the same limitation as CondWithElseElement for the
|
||||||
|
* condition.
|
||||||
|
*/
|
||||||
class UntilElement : public FormElement {
|
class UntilElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
UntilElement(Form* _condition, Form* _body) : condition(_condition), body(_body) {}
|
UntilElement(Form* _condition, Form* _body) : condition(_condition), body(_body) {}
|
||||||
|
@ -262,6 +335,11 @@ class UntilElement : public FormElement {
|
||||||
Form* body = nullptr;
|
Form* body = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Represents a GOAL short-circuit expression, either AND or OR.
|
||||||
|
* The first "element" in ShortCircuitElement may be too large, see the comment on
|
||||||
|
* CondWithElseElement
|
||||||
|
*/
|
||||||
class ShortCircuitElement : public FormElement {
|
class ShortCircuitElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
struct Entry {
|
struct Entry {
|
||||||
|
@ -286,6 +364,11 @@ class ShortCircuitElement : public FormElement {
|
||||||
void collect_vars(VariableSet& vars) const override;
|
void collect_vars(VariableSet& vars) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Represents a GOAL cond/if/when/unless statement which does not have an explicit else case. The
|
||||||
|
* compiler will then move #f into the result register in the delay slot. The first condition may be
|
||||||
|
* too large at first, see CondWithElseElement
|
||||||
|
*/
|
||||||
class CondNoElseElement : public FormElement {
|
class CondNoElseElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
struct Entry {
|
struct Entry {
|
||||||
|
@ -305,6 +388,9 @@ class CondNoElseElement : public FormElement {
|
||||||
void collect_vars(VariableSet& vars) const override;
|
void collect_vars(VariableSet& vars) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Represents a (abs x) expression.
|
||||||
|
*/
|
||||||
class AbsElement : public FormElement {
|
class AbsElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
explicit AbsElement(Form* _source);
|
explicit AbsElement(Form* _source);
|
||||||
|
@ -315,6 +401,11 @@ class AbsElement : public FormElement {
|
||||||
Form* source = nullptr;
|
Form* source = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Represents an (ash x y) expression. There is also an "unsigned" version of this using logical
|
||||||
|
* shifts. This only recognizes the fancy version where the shift amount isn't known at compile time
|
||||||
|
* and the compiler emits code that branches depending on the sign of the shift amount.
|
||||||
|
*/
|
||||||
class AshElement : public FormElement {
|
class AshElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
Form* shift_amount = nullptr;
|
Form* shift_amount = nullptr;
|
||||||
|
@ -328,6 +419,10 @@ class AshElement : public FormElement {
|
||||||
void collect_vars(VariableSet& vars) const override;
|
void collect_vars(VariableSet& vars) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Represents a form which gets the runtime type of a boxed object. This is for the most general
|
||||||
|
* "object" case where we check for pair, binteger, or basic and there's actually branching.
|
||||||
|
*/
|
||||||
class TypeOfElement : public FormElement {
|
class TypeOfElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
Form* value;
|
Form* value;
|
||||||
|
@ -339,6 +434,24 @@ class TypeOfElement : public FormElement {
|
||||||
void collect_vars(VariableSet& vars) const override;
|
void collect_vars(VariableSet& vars) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Represents an unpaired cmove #f. GOAL may emit code like
|
||||||
|
* (set! x #t)
|
||||||
|
* (... evaluate something)
|
||||||
|
* (cmov x y #f)
|
||||||
|
* where the stuff in between is potentially very large.
|
||||||
|
* GOAL has no "condition move" keyword available to the programmer - this would only happen if when
|
||||||
|
* doing something like (set! x (zero? y)), in the code for creating a GOAL boolean.
|
||||||
|
*
|
||||||
|
* Code like (if x (set! y z)) will branch, the compiler isn't smart enough to use movn/movz here.
|
||||||
|
*
|
||||||
|
* These cannot be compacted into a single form until expression building, so we leave these
|
||||||
|
* placeholders in.
|
||||||
|
*
|
||||||
|
* Note - some conditionals put the (set! x #t) immediately before the cmove, but not all. Those
|
||||||
|
* that do will be correctly recognized and will be a ConditionElement. zero! seems to be the most
|
||||||
|
* common one that's split, and it happens reasonably often, so I will try to actually correct it.
|
||||||
|
*/
|
||||||
class ConditionalMoveFalseElement : public FormElement {
|
class ConditionalMoveFalseElement : public FormElement {
|
||||||
public:
|
public:
|
||||||
Variable dest;
|
Variable dest;
|
||||||
|
@ -351,6 +464,37 @@ class ConditionalMoveFalseElement : public FormElement {
|
||||||
void collect_vars(VariableSet& vars) const override;
|
void collect_vars(VariableSet& vars) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
///*!
|
||||||
|
// * A GenericOperator is the head of a GenericElement.
|
||||||
|
// * It is used for the final output.
|
||||||
|
// */
|
||||||
|
// class GenericOperator {
|
||||||
|
// public:
|
||||||
|
// enum class Kind {
|
||||||
|
// FIXED_FUNCTION_CALL,
|
||||||
|
// VAR_FUNCTION_CALL,
|
||||||
|
// FIXED_OPERATOR
|
||||||
|
// };
|
||||||
|
//
|
||||||
|
// private:
|
||||||
|
// // if we're a VAR_FUNCTION_CALL, this should contain the expression to get the function
|
||||||
|
// Form* m_function_val;
|
||||||
|
//
|
||||||
|
// //std::string
|
||||||
|
//
|
||||||
|
//};
|
||||||
|
//
|
||||||
|
// class GenericElement : public FormElement {
|
||||||
|
// public:
|
||||||
|
// goos::Object to_form(const Env& env) const override;
|
||||||
|
// void apply(const std::function<void(FormElement*)>& f) override;
|
||||||
|
// void apply_form(const std::function<void(Form*)>& f) override;
|
||||||
|
// void collect_vars(VariableSet& vars) const override;
|
||||||
|
// private:
|
||||||
|
// GenericOperator m_head;
|
||||||
|
// std::vector<Form*> m_elts;
|
||||||
|
//};
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* A Form is a wrapper around one or more FormElements.
|
* A Form is a wrapper around one or more FormElements.
|
||||||
* This is done for two reasons:
|
* This is done for two reasons:
|
||||||
|
@ -401,9 +545,15 @@ class Form {
|
||||||
const std::vector<FormElement*>& elts() const { return m_elements; }
|
const std::vector<FormElement*>& elts() const { return m_elements; }
|
||||||
std::vector<FormElement*>& elts() { return m_elements; }
|
std::vector<FormElement*>& elts() { return m_elements; }
|
||||||
|
|
||||||
void push_back(FormElement* elt) { m_elements.push_back(elt); }
|
void push_back(FormElement* elt) {
|
||||||
|
elt->parent_form = this;
|
||||||
|
m_elements.push_back(elt);
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear() { m_elements.clear(); }
|
||||||
|
|
||||||
goos::Object to_form(const Env& env) const;
|
goos::Object to_form(const Env& env) const;
|
||||||
|
std::string to_string(const Env& env) const;
|
||||||
void inline_forms(std::vector<goos::Object>& forms, const Env& env) const;
|
void inline_forms(std::vector<goos::Object>& forms, const Env& env) const;
|
||||||
void apply(const std::function<void(FormElement*)>& f);
|
void apply(const std::function<void(FormElement*)>& f);
|
||||||
void apply_form(const std::function<void(Form*)>& f);
|
void apply_form(const std::function<void(Form*)>& f);
|
||||||
|
|
91
decompiler/IR2/FormStack.cpp
Normal file
91
decompiler/IR2/FormStack.cpp
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
#include "FormStack.h"
|
||||||
|
#include "Form.h"
|
||||||
|
|
||||||
|
namespace decompiler {
|
||||||
|
std::string FormStack::StackEntry::print(const Env& env) const {
|
||||||
|
if (destination.has_value()) {
|
||||||
|
assert(source && !elt);
|
||||||
|
return fmt::format("d: {} s: {} | {} <- {}", active, sequence_point,
|
||||||
|
destination.value().reg().to_charp(), source->to_string(env));
|
||||||
|
} else {
|
||||||
|
assert(elt && !source);
|
||||||
|
return fmt::format("d: {} s: {} | {}", active, sequence_point, elt->to_string(env));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string FormStack::print(const Env& env) {
|
||||||
|
std::string result;
|
||||||
|
for (auto& x : m_stack) {
|
||||||
|
result += x.print(env);
|
||||||
|
result += '\n';
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FormStack::push_value_to_reg(Variable var, Form* value, bool sequence_point) {
|
||||||
|
StackEntry entry;
|
||||||
|
entry.active = true; // by default, we should display everything!
|
||||||
|
entry.sequence_point = sequence_point;
|
||||||
|
entry.destination = var;
|
||||||
|
entry.source = value;
|
||||||
|
m_stack.push_back(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FormStack::is_single_expression() {
|
||||||
|
int count = 0;
|
||||||
|
for (auto& e : m_stack) {
|
||||||
|
if (e.active) {
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FormStack::push_form_element(FormElement* elt, bool sequence_point) {
|
||||||
|
StackEntry entry;
|
||||||
|
entry.active = true;
|
||||||
|
entry.elt = elt;
|
||||||
|
entry.sequence_point = sequence_point;
|
||||||
|
m_stack.push_back(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
Form* FormStack::pop_reg(const Variable& var) {
|
||||||
|
for (size_t i = m_stack.size(); i-- > 0;) {
|
||||||
|
auto& entry = m_stack.at(i);
|
||||||
|
if (entry.active) {
|
||||||
|
if (entry.destination == var) {
|
||||||
|
entry.active = false;
|
||||||
|
assert(entry.source);
|
||||||
|
return entry.source;
|
||||||
|
} else {
|
||||||
|
// we didn't match
|
||||||
|
if (entry.sequence_point) {
|
||||||
|
// and it's a sequence point! can't look any more back than this.
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// we didn't have it...
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<FormElement*> FormStack::rewrite(FormPool& pool) {
|
||||||
|
std::vector<FormElement*> result;
|
||||||
|
|
||||||
|
for (auto& e : m_stack) {
|
||||||
|
if (!e.active) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (e.destination.has_value()) {
|
||||||
|
auto elt = pool.alloc_element<SetVarElement>(*e.destination, e.source, e.sequence_point);
|
||||||
|
e.source->parent_element = elt;
|
||||||
|
result.push_back(elt);
|
||||||
|
} else {
|
||||||
|
result.push_back(e.elt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
} // namespace decompiler
|
36
decompiler/IR2/FormStack.h
Normal file
36
decompiler/IR2/FormStack.h
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
|
#include "decompiler/Disasm/Register.h"
|
||||||
|
#include "decompiler/IR2/AtomicOp.h"
|
||||||
|
|
||||||
|
namespace decompiler {
|
||||||
|
class Form;
|
||||||
|
/*!
|
||||||
|
* A FormStack is used to track partial expressions when rebuilding the tree structure of
|
||||||
|
* GOAL code. Linear sequences of operations are added onto the expression stack.
|
||||||
|
*/
|
||||||
|
class FormStack {
|
||||||
|
public:
|
||||||
|
FormStack() = default;
|
||||||
|
void push_value_to_reg(Variable var, Form* value, bool sequence_point);
|
||||||
|
void push_form_element(FormElement* elt, bool sequence_point);
|
||||||
|
Form* pop_reg(const Variable& var);
|
||||||
|
bool is_single_expression();
|
||||||
|
std::vector<FormElement*> rewrite(FormPool& pool);
|
||||||
|
std::string print(const Env& env);
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct StackEntry {
|
||||||
|
bool active = true; // should this appear in the output?
|
||||||
|
std::optional<Variable> destination; // what register we are setting (or nullopt if no dest.)
|
||||||
|
Form* source = nullptr; // the value we are setting the register to.
|
||||||
|
|
||||||
|
FormElement* elt = nullptr;
|
||||||
|
bool sequence_point = false;
|
||||||
|
TP_Type type;
|
||||||
|
std::string print(const Env& env) const;
|
||||||
|
};
|
||||||
|
std::vector<StackEntry> m_stack;
|
||||||
|
};
|
||||||
|
} // namespace decompiler
|
27
decompiler/IR2/expression_build.cpp
Normal file
27
decompiler/IR2/expression_build.cpp
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
#include "expression_build.h"
|
||||||
|
#include "decompiler/Function/Function.h"
|
||||||
|
#include "decompiler/IR2/Form.h"
|
||||||
|
#include "decompiler/IR2/FormStack.h"
|
||||||
|
|
||||||
|
namespace decompiler {
|
||||||
|
bool convert_to_expressions(Form* top_level_form, FormPool& pool, const Function& f) {
|
||||||
|
assert(top_level_form);
|
||||||
|
|
||||||
|
try {
|
||||||
|
top_level_form->apply_form([&](Form* form) {
|
||||||
|
FormStack stack;
|
||||||
|
for (auto& entry : form->elts()) {
|
||||||
|
entry->push_to_stack(f.ir2.env, stack);
|
||||||
|
}
|
||||||
|
auto new_entries = stack.rewrite(pool);
|
||||||
|
form->clear();
|
||||||
|
for (auto x : new_entries) {
|
||||||
|
form->push_back(x);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (std::exception& e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} // namespace decompiler
|
8
decompiler/IR2/expression_build.h
Normal file
8
decompiler/IR2/expression_build.h
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace decompiler {
|
||||||
|
class Form;
|
||||||
|
class Function;
|
||||||
|
class FormPool;
|
||||||
|
bool convert_to_expressions(Form* top_level_form, FormPool& pool, const Function& f);
|
||||||
|
} // namespace decompiler
|
|
@ -74,6 +74,8 @@ class ObjectFileDB {
|
||||||
void ir2_register_usage_pass();
|
void ir2_register_usage_pass();
|
||||||
void ir2_variable_pass();
|
void ir2_variable_pass();
|
||||||
void ir2_cfg_build_pass();
|
void ir2_cfg_build_pass();
|
||||||
|
void ir2_store_current_forms();
|
||||||
|
void ir2_build_expressions();
|
||||||
void ir2_write_results(const std::string& output_dir);
|
void ir2_write_results(const std::string& output_dir);
|
||||||
std::string ir2_to_file(ObjectFileData& data);
|
std::string ir2_to_file(ObjectFileData& data);
|
||||||
std::string ir2_function_to_string(ObjectFileData& data, Function& function, int seg);
|
std::string ir2_function_to_string(ObjectFileData& data, Function& function, int seg);
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include "decompiler/IR2/reg_usage.h"
|
#include "decompiler/IR2/reg_usage.h"
|
||||||
#include "decompiler/IR2/variable_naming.h"
|
#include "decompiler/IR2/variable_naming.h"
|
||||||
#include "decompiler/IR2/cfg_builder.h"
|
#include "decompiler/IR2/cfg_builder.h"
|
||||||
|
#include "decompiler/IR2/expression_build.h"
|
||||||
#include "common/goos/PrettyPrinter.h"
|
#include "common/goos/PrettyPrinter.h"
|
||||||
|
|
||||||
namespace decompiler {
|
namespace decompiler {
|
||||||
|
@ -34,8 +35,12 @@ void ObjectFileDB::analyze_functions_ir2(const std::string& output_dir) {
|
||||||
ir2_register_usage_pass();
|
ir2_register_usage_pass();
|
||||||
lg::info("Variable analysis...");
|
lg::info("Variable analysis...");
|
||||||
ir2_variable_pass();
|
ir2_variable_pass();
|
||||||
lg::info("Initial conversion to Form...");
|
lg::info("Initial structuring..");
|
||||||
ir2_cfg_build_pass();
|
ir2_cfg_build_pass();
|
||||||
|
lg::info("Storing temporary form result...");
|
||||||
|
ir2_store_current_forms();
|
||||||
|
lg::info("Expression building...");
|
||||||
|
ir2_build_expressions();
|
||||||
lg::info("Writing results...");
|
lg::info("Writing results...");
|
||||||
ir2_write_results(output_dir);
|
ir2_write_results(output_dir);
|
||||||
}
|
}
|
||||||
|
@ -349,6 +354,45 @@ void ObjectFileDB::ir2_cfg_build_pass() {
|
||||||
lg::info("{}/{}/{} cfg build in {:.2f} ms\n", successful, attempted, total, timer.getMs());
|
lg::info("{}/{}/{} cfg build in {:.2f} ms\n", successful, attempted, total, timer.getMs());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ObjectFileDB::ir2_store_current_forms() {
|
||||||
|
Timer timer;
|
||||||
|
int total = 0;
|
||||||
|
|
||||||
|
for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) {
|
||||||
|
(void)segment_id;
|
||||||
|
(void)data;
|
||||||
|
|
||||||
|
if (func.ir2.top_form) {
|
||||||
|
total++;
|
||||||
|
func.ir2.debug_form_string =
|
||||||
|
pretty_print::to_string(func.ir2.top_form->to_form(func.ir2.env));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
lg::info("Stored debug forms for {} functions in {:.2f} ms\n", total, timer.getMs());
|
||||||
|
}
|
||||||
|
|
||||||
|
void ObjectFileDB::ir2_build_expressions() {
|
||||||
|
Timer timer;
|
||||||
|
int total = 0;
|
||||||
|
int attempted = 0;
|
||||||
|
int successful = 0;
|
||||||
|
for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) {
|
||||||
|
(void)segment_id;
|
||||||
|
(void)data;
|
||||||
|
total++;
|
||||||
|
if (func.ir2.top_form) {
|
||||||
|
attempted++;
|
||||||
|
if (convert_to_expressions(func.ir2.top_form, func.ir2.form_pool, func)) {
|
||||||
|
successful++;
|
||||||
|
func.ir2.print_debug_forms = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
lg::info("{}/{}/{} expression build in {:.2f} ms\n", successful, attempted, total, timer.getMs());
|
||||||
|
}
|
||||||
|
|
||||||
void ObjectFileDB::ir2_write_results(const std::string& output_dir) {
|
void ObjectFileDB::ir2_write_results(const std::string& output_dir) {
|
||||||
Timer timer;
|
Timer timer;
|
||||||
lg::info("Writing IR2 results to file...");
|
lg::info("Writing IR2 results to file...");
|
||||||
|
@ -388,6 +432,12 @@ std::string ObjectFileDB::ir2_to_file(ObjectFileData& data) {
|
||||||
result += pretty_print::to_string(func.ir2.top_form->to_form(func.ir2.env));
|
result += pretty_print::to_string(func.ir2.top_form->to_form(func.ir2.env));
|
||||||
result += '\n';
|
result += '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (func.ir2.print_debug_forms) {
|
||||||
|
result += '\n';
|
||||||
|
result += func.ir2.debug_form_string;
|
||||||
|
result += '\n';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// print data
|
// print data
|
||||||
|
|
Loading…
Reference in a new issue