mirror of
https://github.com/open-goal/jak-project.git
synced 2024-10-20 11:26:18 -04:00
Recognize auto-generated inspect methods and create deftype
s from them (#95)
- Recognize new type definitions/parents/type flags in the decompiler - Analyze autogenerated inspect methods and dump guesses at fields to a file - Utility functions for accessing static data by label - Better ordering in the decompiler to go through functions in the order they appeared in the source - Added a decent number of types to `all-types.gc` based on the new field analyzer - Correct a few `int`/`integer` mistakes in `gcommon.gc` (this should really be a warning) - Correct a few type issues in `gcommon` and `gkernel-h` - Option in the decompiler to be strict about `define-extern` redefining a type of a symbol - Add a test to check consistency in types between `all-types.gc` (used by decompiler) and `goal_src` (used by the compiler)
This commit is contained in:
parent
b561cdfade
commit
b56025412b
|
@ -39,6 +39,8 @@ std::string MethodInfo::print_one_line() const {
|
|||
}
|
||||
|
||||
Field::Field(std::string name, TypeSpec ts) : m_name(std::move(name)), m_type(std::move(ts)) {}
|
||||
Field::Field(std::string name, TypeSpec ts, int offset)
|
||||
: m_name(std::move(name)), m_type(std::move(ts)), m_offset(offset) {}
|
||||
|
||||
/*!
|
||||
* Print a one line description of a field.
|
||||
|
|
|
@ -162,6 +162,7 @@ class Field {
|
|||
public:
|
||||
Field() = default;
|
||||
Field(std::string name, TypeSpec ts);
|
||||
Field(std::string name, TypeSpec ts, int offset);
|
||||
void set_dynamic();
|
||||
void set_array(int size);
|
||||
void set_inline();
|
||||
|
|
|
@ -18,7 +18,8 @@ add_executable(decompiler
|
|||
IR/BasicOpBuilder.cpp
|
||||
IR/CfgBuilder.cpp
|
||||
IR/IR.cpp
|
||||
IR/IR_TypeAnalysis.cpp)
|
||||
IR/IR_TypeAnalysis.cpp
|
||||
Function/TypeInspector.cpp)
|
||||
|
||||
target_link_libraries(decompiler
|
||||
goos
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include "decompiler/Disasm/InstructionMatching.h"
|
||||
#include "decompiler/ObjectFile/LinkedObjectFile.h"
|
||||
#include "decompiler/util/DecompilerTypeSystem.h"
|
||||
#include "TypeInspector.h"
|
||||
|
||||
namespace {
|
||||
std::vector<Register> gpr_backups = {make_gpr(Reg::GP), make_gpr(Reg::S5), make_gpr(Reg::S4),
|
||||
|
@ -474,7 +475,8 @@ void Function::find_global_function_defs(LinkedObjectFile& file, DecompilerTypeS
|
|||
* Look through this function to find calls to method-set! which define methods.
|
||||
* Updates the guessed_name of the function and updates type_info.
|
||||
*/
|
||||
void Function::find_method_defs(LinkedObjectFile& file) {
|
||||
void Function::find_method_defs(LinkedObjectFile& file, DecompilerTypeSystem& dts) {
|
||||
(void)dts;
|
||||
int state = 0;
|
||||
int label_id = -1;
|
||||
int method_id = -1;
|
||||
|
@ -546,6 +548,11 @@ void Function::find_method_defs(LinkedObjectFile& file) {
|
|||
auto& func = file.get_function_at_label(label_id);
|
||||
assert(func.guessed_name.empty());
|
||||
func.guessed_name.set_as_method(type_name, method_id);
|
||||
func.method_of_type = type_name;
|
||||
if (method_id == GOAL_INSPECT_METHOD) {
|
||||
func.is_inspect_method = true;
|
||||
}
|
||||
|
||||
state = 0;
|
||||
continue;
|
||||
}
|
||||
|
@ -553,6 +560,104 @@ void Function::find_method_defs(LinkedObjectFile& file) {
|
|||
}
|
||||
}
|
||||
|
||||
void Function::find_type_defs(LinkedObjectFile& file, DecompilerTypeSystem& dts) {
|
||||
int state = 0;
|
||||
Register temp_reg;
|
||||
std::string type_name;
|
||||
std::string parent_type;
|
||||
int label_idx = -1;
|
||||
|
||||
for (const auto& instr : instructions) {
|
||||
// look for lw xx, type(s7)
|
||||
if (instr.kind == InstructionKind::LW && instr.get_src(0).kind == InstructionAtom::IMM_SYM &&
|
||||
instr.get_src(0).get_sym() == "type" && instr.get_src(1).get_reg() == make_gpr(Reg::S7)) {
|
||||
state = 1;
|
||||
temp_reg = instr.get_dst(0).get_reg();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (state == 1) {
|
||||
// look for lwu t9, 16, v1
|
||||
if (instr.kind == InstructionKind::LWU && instr.get_dst(0).get_reg() == make_gpr(Reg::T9) &&
|
||||
instr.get_src(0).get_imm() == 16 && instr.get_src(1).get_reg() == temp_reg) {
|
||||
state = 2;
|
||||
continue;
|
||||
} else {
|
||||
state = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (state == 2) {
|
||||
// look for daddiu a0, s7, name-of-type
|
||||
if (instr.kind == InstructionKind::DADDIU &&
|
||||
instr.get_dst(0).get_reg() == make_gpr(Reg::A0) &&
|
||||
instr.get_src(0).get_reg() == make_gpr(Reg::S7) && instr.get_src(1).is_sym()) {
|
||||
state = 3;
|
||||
type_name = instr.get_src(1).get_sym();
|
||||
continue;
|
||||
} else {
|
||||
state = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (state == 3) {
|
||||
// look for lw a1, parent-type(s7)
|
||||
if (instr.kind == InstructionKind::LW && instr.get_dst(0).get_reg() == make_gpr(Reg::A1) &&
|
||||
instr.get_src(0).kind == InstructionAtom::IMM_SYM &&
|
||||
instr.get_src(1).get_reg() == make_gpr(Reg::S7)) {
|
||||
state = 4;
|
||||
parent_type = instr.get_src(0).get_sym();
|
||||
continue;
|
||||
} else {
|
||||
state = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (state == 4) {
|
||||
// look for ld a2, LXX(fp)
|
||||
if (instr.kind == InstructionKind::LD && instr.get_dst(0).get_reg() == make_gpr(Reg::A2) &&
|
||||
instr.get_src(0).is_label() && instr.get_src(1).get_reg() == make_gpr(Reg::FP)) {
|
||||
state = 5;
|
||||
label_idx = instr.get_src(0).get_label();
|
||||
continue;
|
||||
} else {
|
||||
state = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (state == 5) {
|
||||
if (instr.kind == InstructionKind::JALR && instr.get_dst(0).get_reg() == make_gpr(Reg::RA) &&
|
||||
instr.get_src(0).get_reg() == make_gpr(Reg::T9)) {
|
||||
state = 6;
|
||||
continue;
|
||||
} else {
|
||||
state = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (state == 6) {
|
||||
// look for sll v0, ra, 0
|
||||
if (instr.kind == InstructionKind::SLL && instr.get_dst(0).get_reg() == make_gpr(Reg::V0) &&
|
||||
instr.get_src(0).get_reg() == make_gpr(Reg::RA) && instr.get_src(1).get_imm() == 0) {
|
||||
// done!
|
||||
// fmt::print("Got type {} parent {}\n", type_name, parent_type);
|
||||
dts.add_type_parent(type_name, parent_type);
|
||||
Label flag_label = file.labels.at(label_idx);
|
||||
u64 word = file.read_data_word(flag_label);
|
||||
flag_label.offset += 4;
|
||||
u64 word2 = file.read_data_word(flag_label);
|
||||
word |= (word2 << 32);
|
||||
dts.add_type_flags(type_name, word);
|
||||
// fmt::print("Flags are 0x{:x}\n", word);
|
||||
state = 0;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
state = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Function::add_basic_op(std::shared_ptr<IR> op, int start_instr, int end_instr) {
|
||||
op->is_basic_op = true;
|
||||
assert(end_instr > start_instr);
|
||||
|
|
|
@ -70,7 +70,8 @@ class Function {
|
|||
Function(int _start_word, int _end_word);
|
||||
void analyze_prologue(const LinkedObjectFile& file);
|
||||
void find_global_function_defs(LinkedObjectFile& file, DecompilerTypeSystem& dts);
|
||||
void find_method_defs(LinkedObjectFile& file);
|
||||
void find_method_defs(LinkedObjectFile& file, DecompilerTypeSystem& dts);
|
||||
void find_type_defs(LinkedObjectFile& file, DecompilerTypeSystem& dts);
|
||||
void add_basic_op(std::shared_ptr<IR> op, int start_instr, int end_instr);
|
||||
bool has_basic_ops() { return !basic_ops.empty(); }
|
||||
bool has_typemaps() { return !basic_op_typemaps.empty(); }
|
||||
|
@ -94,6 +95,8 @@ class Function {
|
|||
FunctionName guessed_name;
|
||||
|
||||
bool suspected_asm = false;
|
||||
bool is_inspect_method = false;
|
||||
std::string method_of_type;
|
||||
|
||||
std::vector<Instruction> instructions;
|
||||
std::vector<BasicBlock> basic_blocks;
|
||||
|
@ -137,10 +140,10 @@ class Function {
|
|||
} prologue;
|
||||
|
||||
bool uses_fp_register = false;
|
||||
std::vector<std::shared_ptr<IR>> basic_ops;
|
||||
|
||||
private:
|
||||
void check_epilogue(const LinkedObjectFile& file);
|
||||
std::vector<std::shared_ptr<IR>> basic_ops;
|
||||
std::vector<TypeMap> basic_op_typemaps;
|
||||
std::unordered_map<int, int> instruction_to_basic_op;
|
||||
std::unordered_map<int, int> basic_op_to_instruction;
|
||||
|
|
812
decompiler/Function/TypeInspector.cpp
Normal file
812
decompiler/Function/TypeInspector.cpp
Normal file
|
@ -0,0 +1,812 @@
|
|||
#include "decompiler/config.h"
|
||||
#include "decompiler/Disasm/InstructionMatching.h"
|
||||
#include "TypeInspector.h"
|
||||
#include "Function.h"
|
||||
#include "decompiler/ObjectFile/LinkedObjectFile.h"
|
||||
#include "third-party/fmt/format.h"
|
||||
#include "decompiler/util/DecompilerTypeSystem.h"
|
||||
#include "common/type_system/deftype.h"
|
||||
|
||||
namespace {
|
||||
struct FieldPrint {
|
||||
char format = '\0';
|
||||
std::string field_name;
|
||||
std::string field_type_name;
|
||||
bool has_array = false;
|
||||
int array_size = -1;
|
||||
};
|
||||
|
||||
FieldPrint get_field_print(const std::string& str) {
|
||||
int idx = 0;
|
||||
auto next = [&]() { return str.at(idx++); };
|
||||
|
||||
auto peek = [&](int off) { return str.at(idx + off); };
|
||||
|
||||
FieldPrint field_print;
|
||||
|
||||
// first is ~T
|
||||
char c0 = next();
|
||||
assert(c0 == '~');
|
||||
char c1 = next();
|
||||
assert(c1 == 'T');
|
||||
|
||||
// next the name:
|
||||
char name_char = next();
|
||||
while (name_char != ':' && name_char != '[') {
|
||||
field_print.field_name.push_back(name_char);
|
||||
name_char = next();
|
||||
}
|
||||
|
||||
// possibly array thing
|
||||
if (name_char == '[') {
|
||||
int size = 0;
|
||||
char num_char = next();
|
||||
while (num_char >= '0' && num_char <= '9') {
|
||||
size = size * 10 + (num_char - '0');
|
||||
num_char = next();
|
||||
}
|
||||
field_print.has_array = true;
|
||||
field_print.array_size = size;
|
||||
|
||||
assert(num_char == ']');
|
||||
char c = next();
|
||||
assert(c == ' ');
|
||||
c = next();
|
||||
assert(c == '@');
|
||||
c = next();
|
||||
assert(c == ' ');
|
||||
c = next();
|
||||
assert(c == '#');
|
||||
c = next();
|
||||
assert(c == 'x');
|
||||
} else {
|
||||
// next a space
|
||||
char space_char = next();
|
||||
assert(space_char == ' ');
|
||||
}
|
||||
|
||||
// next the format
|
||||
char fmt1 = next();
|
||||
if (fmt1 == '~' && peek(0) != '`') { // normal ~_~%
|
||||
char fmt_code = next();
|
||||
field_print.format = fmt_code;
|
||||
char end1 = next();
|
||||
assert(end1 == '~');
|
||||
char end2 = next();
|
||||
assert(end2 == '%');
|
||||
assert(idx == (int)str.size());
|
||||
} else if (fmt1 == '#' && peek(0) == '<') { // struct #<my-struct @ #x~X>~%
|
||||
next();
|
||||
char type_name_c = next();
|
||||
while (type_name_c != ' ') {
|
||||
field_print.field_type_name += type_name_c;
|
||||
type_name_c = next();
|
||||
}
|
||||
|
||||
std::string expect_end = "@ #x~X>~%";
|
||||
for (char i : expect_end) {
|
||||
char c = next();
|
||||
assert(i == c);
|
||||
}
|
||||
field_print.format = 'X';
|
||||
|
||||
assert(idx == (int)str.size());
|
||||
} else if (fmt1 == '#' && peek(0) == 'x') { // #x~X~%
|
||||
next();
|
||||
std::string expect_end = "~X~%";
|
||||
for (char i : expect_end) {
|
||||
char c = next();
|
||||
assert(i == c);
|
||||
}
|
||||
field_print.format = 'X';
|
||||
} else if (fmt1 == '~' && peek(0) == '`') { // ~`my-type-with-overriden-print`P~%
|
||||
next();
|
||||
char type_name_c = next();
|
||||
while (type_name_c != '`') {
|
||||
field_print.field_type_name += type_name_c;
|
||||
type_name_c = next();
|
||||
}
|
||||
|
||||
std::string expect_end = "P~%";
|
||||
for (char i : expect_end) {
|
||||
char c = next();
|
||||
assert(i == c);
|
||||
}
|
||||
field_print.format = 'P';
|
||||
|
||||
assert(idx == (int)str.size());
|
||||
} else if (str.substr(idx - 1) == "(meters ~m)~%") {
|
||||
field_print.format = 'm';
|
||||
} else if (str.substr(idx - 1) == "(deg ~r)~%") {
|
||||
field_print.format = 'r';
|
||||
} else if (str.substr(idx - 1) == "(seconds ~e)~%") {
|
||||
field_print.format = 'e';
|
||||
}
|
||||
|
||||
else {
|
||||
throw std::runtime_error("other format nyi in get_field_print " + str.substr(idx));
|
||||
}
|
||||
|
||||
return field_print;
|
||||
}
|
||||
|
||||
bool is_int(IR* ir, s64 value) {
|
||||
auto as_int = dynamic_cast<IR_IntegerConstant*>(ir);
|
||||
return as_int && as_int->value == value;
|
||||
}
|
||||
|
||||
bool is_reg(IR* ir, Register reg) {
|
||||
auto as_reg = dynamic_cast<IR_Register*>(ir);
|
||||
return as_reg && as_reg->reg == reg;
|
||||
}
|
||||
|
||||
bool is_math_reg_constant(IR* ir, IR_IntMath2::Kind kind, Register src0, s64 src1) {
|
||||
auto as_math = dynamic_cast<IR_IntMath2*>(ir);
|
||||
return as_math && as_math->kind == kind && is_reg(as_math->arg0.get(), src0) &&
|
||||
is_int(as_math->arg1.get(), src1);
|
||||
}
|
||||
|
||||
bool is_load_with_offset(IR* ir, IR_Load::Kind kind, int load_size, Register base, s64 offset) {
|
||||
auto as_load = dynamic_cast<IR_Load*>(ir);
|
||||
return as_load && as_load->kind == kind && as_load->size == load_size &&
|
||||
is_math_reg_constant(as_load->location.get(), IR_IntMath2::ADD, base, offset);
|
||||
}
|
||||
|
||||
bool is_get_load_with_offset(IR* ir,
|
||||
Register dst,
|
||||
IR_Load::Kind kind,
|
||||
int load_size,
|
||||
Register base,
|
||||
s64 offset) {
|
||||
auto as_set = dynamic_cast<IR_Set*>(ir);
|
||||
return as_set && is_reg(as_set->dst.get(), dst) &&
|
||||
is_load_with_offset(as_set->src.get(), kind, load_size, base, offset);
|
||||
}
|
||||
|
||||
struct LoadInfo {
|
||||
int offset = 0;
|
||||
int size = 0;
|
||||
IR_Load::Kind kind;
|
||||
};
|
||||
|
||||
LoadInfo get_load_info_from_set(IR* load) {
|
||||
auto as_set = dynamic_cast<IR_Set*>(load);
|
||||
assert(as_set);
|
||||
auto as_load = dynamic_cast<IR_Load*>(as_set->src.get());
|
||||
assert(as_load);
|
||||
LoadInfo info;
|
||||
info.kind = as_load->kind;
|
||||
info.size = as_load->size;
|
||||
if (dynamic_cast<IR_Register*>(as_load->location.get())) {
|
||||
info.offset = 0;
|
||||
return info;
|
||||
}
|
||||
|
||||
auto as_math = dynamic_cast<IR_IntMath2*>(as_load->location.get());
|
||||
assert(as_math);
|
||||
assert(as_math->kind == IR_IntMath2::ADD);
|
||||
auto as_int = dynamic_cast<IR_IntegerConstant*>(as_math->arg1.get());
|
||||
assert(as_int);
|
||||
info.offset = as_int->value;
|
||||
return info;
|
||||
}
|
||||
|
||||
Register get_base_of_load(IR_Load* load) {
|
||||
auto as_reg = dynamic_cast<IR_Register*>(load->location.get());
|
||||
if (as_reg) {
|
||||
return as_reg->reg;
|
||||
}
|
||||
|
||||
auto as_math = dynamic_cast<IR_IntMath2*>(load->location.get());
|
||||
assert(as_math->kind == IR_IntMath2::ADD);
|
||||
assert(dynamic_cast<IR_IntegerConstant*>(as_math->arg1.get()));
|
||||
auto math_reg = dynamic_cast<IR_Register*>(as_math->arg0.get());
|
||||
if (math_reg) {
|
||||
return math_reg->reg;
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
bool is_load_with_base(IR* ir, Register base) {
|
||||
auto as_load = dynamic_cast<IR_Load*>(ir);
|
||||
return as_load && base == get_base_of_load(as_load);
|
||||
}
|
||||
|
||||
bool is_get_load(IR* ir, Register dst, Register base) {
|
||||
auto as_set = dynamic_cast<IR_Set*>(ir);
|
||||
return as_set && is_reg(as_set->dst.get(), dst) && is_load_with_base(as_set->src.get(), base);
|
||||
}
|
||||
|
||||
bool is_reg_reg_move(IR* ir, Register dst, Register src) {
|
||||
auto as_set = dynamic_cast<IR_Set*>(ir);
|
||||
return as_set && is_reg(as_set->dst.get(), dst) && is_reg(as_set->src.get(), src);
|
||||
}
|
||||
|
||||
bool is_sym_value(IR* ir, const std::string& sym_name) {
|
||||
auto as_sym_value = dynamic_cast<IR_SymbolValue*>(ir);
|
||||
return as_sym_value && as_sym_value->name == sym_name;
|
||||
}
|
||||
|
||||
bool is_sym(IR* ir, const std::string& sym_name) {
|
||||
auto as_sym = dynamic_cast<IR_Symbol*>(ir);
|
||||
return as_sym && as_sym->name == sym_name;
|
||||
}
|
||||
|
||||
bool is_get_sym_value(IR* ir, Register dst, const std::string& sym_name) {
|
||||
auto as_set = dynamic_cast<IR_Set*>(ir);
|
||||
return as_set && is_reg(as_set->dst.get(), dst) && is_sym_value(as_set->src.get(), sym_name);
|
||||
}
|
||||
|
||||
bool is_get_sym(IR* ir, Register dst, const std::string& sym_name) {
|
||||
auto as_set = dynamic_cast<IR_Set*>(ir);
|
||||
return as_set && is_reg(as_set->dst.get(), dst) && is_sym(as_set->src.get(), sym_name);
|
||||
}
|
||||
|
||||
bool is_label(IR* ir) {
|
||||
return dynamic_cast<IR_StaticAddress*>(ir);
|
||||
}
|
||||
|
||||
bool is_get_label(IR* ir, Register dst) {
|
||||
auto as_set = dynamic_cast<IR_Set*>(ir);
|
||||
return as_set && is_reg(as_set->dst.get(), dst) && is_label(as_set->src.get());
|
||||
}
|
||||
|
||||
int get_label_id_of_set(IR* ir) {
|
||||
return dynamic_cast<IR_StaticAddress*>(dynamic_cast<IR_Set*>(ir)->src.get())->label_id;
|
||||
}
|
||||
|
||||
bool is_set_shift(IR* ir) {
|
||||
auto as_set = dynamic_cast<IR_Set*>(ir);
|
||||
if (as_set) {
|
||||
auto as_math = dynamic_cast<IR_IntMath2*>(as_set->src.get());
|
||||
if (as_math && (as_math->kind == IR_IntMath2::LEFT_SHIFT ||
|
||||
as_math->kind == IR_IntMath2::RIGHT_SHIFT_LOGIC ||
|
||||
as_math->kind == IR_IntMath2::RIGHT_SHIFT_ARITH)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
auto as_asm = dynamic_cast<IR_AsmOp*>(ir);
|
||||
return as_asm && as_asm->name == "sllv";
|
||||
}
|
||||
|
||||
bool get_ptr_offset_constant_nonzero(IR_IntMath2* math, Register base, int* result) {
|
||||
if (!is_reg(math->arg0.get(), base)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto as_int = dynamic_cast<IR_IntegerConstant*>(math->arg1.get());
|
||||
if (!as_int) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*result = as_int->value;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool get_ptr_offset_zero(IR_IntMath2* math, Register base, int* result) {
|
||||
if (!is_reg(math->arg0.get(), make_gpr(Reg::R0)) || !is_reg(math->arg1.get(), base)) {
|
||||
return false;
|
||||
}
|
||||
*result = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool get_ptr_offset(IR* ir, Register dst, Register base, int* result) {
|
||||
auto as_set = dynamic_cast<IR_Set*>(ir);
|
||||
if (!as_set) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!is_reg(as_set->dst.get(), dst)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto as_math = dynamic_cast<IR_IntMath2*>(as_set->src.get());
|
||||
if (!as_math) {
|
||||
return false;
|
||||
}
|
||||
return get_ptr_offset_constant_nonzero(as_math, base, result) ||
|
||||
get_ptr_offset_zero(as_math, base, result);
|
||||
}
|
||||
|
||||
bool is_weird(Function& function, LinkedObjectFile& file, TypeInspectorResult* result) {
|
||||
if (function.basic_blocks.size() > 1) {
|
||||
result->warnings += " too many basic blocks";
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
;; for a basic
|
||||
or gp, a0, r0 ;; (set! gp a0)
|
||||
lw t9, format(s7) ;; (set! t9 format)
|
||||
daddiu a0, s7, #t ;; (set! a0 '#t)
|
||||
daddiu a1, fp, L362 ;; (set! a1 L362) "[~8x] ~A~%"
|
||||
or a2, gp, r0 ;; (set! a2 gp)
|
||||
lwu a3, -4(gp) ;; (set! a3 (l.wu (+.i gp -4)))
|
||||
jalr ra, t9 ;; (call!)
|
||||
sll v0, ra, 0
|
||||
;; for a struct
|
||||
or gp, a0, r0 ;; (set! gp a0)
|
||||
lw t9, format(s7) ;; (set! t9 format)
|
||||
daddiu a0, s7, #t ;; (set! a0 '#t)
|
||||
daddiu a1, fp, L79 ;; (set! a1 L79) "[~8x] ~A~%"
|
||||
or a2, gp, r0 ;; (set! a2 gp)
|
||||
daddiu a3, s7, dead-pool-heap-rec;; (set! a3 'dead-pool-heap-rec)
|
||||
jalr ra, t9 ;; (call!)
|
||||
*/
|
||||
|
||||
// check size
|
||||
if (function.basic_ops.size() < 7) {
|
||||
result->warnings += " not enough basic ops";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto& move_op = function.basic_ops.at(0);
|
||||
if (!is_reg_reg_move(move_op.get(), make_gpr(Reg::GP), make_gpr(Reg::A0))) {
|
||||
result->warnings += "bad first move";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto& get_format_op = function.basic_ops.at(1);
|
||||
if (!is_get_sym_value(get_format_op.get(), make_gpr(Reg::T9), "format")) {
|
||||
result->warnings += "bad get format";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto& get_true = function.basic_ops.at(2);
|
||||
if (!is_get_sym(get_true.get(), make_gpr(Reg::A0), "#t")) {
|
||||
result->warnings += "bad get true";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto& get_str = function.basic_ops.at(3);
|
||||
if (!is_get_label(get_str.get(), make_gpr(Reg::A1))) {
|
||||
result->warnings += "bad get label";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto str = file.get_goal_string_by_label(file.labels.at(get_label_id_of_set(get_str.get())));
|
||||
if (str != "[~8x] ~A~%") {
|
||||
result->warnings += "bad type dec string: " + str;
|
||||
return true;
|
||||
}
|
||||
|
||||
auto& move2_op = function.basic_ops.at(4);
|
||||
if (!is_reg_reg_move(move2_op.get(), make_gpr(Reg::A2), make_gpr(Reg::GP))) {
|
||||
result->warnings += "bad second move";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto& load_op = function.basic_ops.at(5);
|
||||
bool is_basic_load = is_get_load_with_offset(load_op.get(), make_gpr(Reg::A3), IR_Load::UNSIGNED,
|
||||
4, make_gpr(Reg::GP), -4);
|
||||
result->is_basic = is_basic_load;
|
||||
|
||||
bool is_struct_load = is_get_sym(load_op.get(), make_gpr(Reg::A3), function.method_of_type);
|
||||
|
||||
if (!is_basic_load && !is_struct_load) {
|
||||
result->warnings += "bad load";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto& call = function.basic_ops.at(6);
|
||||
if (!dynamic_cast<IR_Call*>(call.get())) {
|
||||
result->warnings += "bad call";
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int identify_basic_field(int idx,
|
||||
Function& function,
|
||||
LinkedObjectFile& file,
|
||||
TypeInspectorResult* result,
|
||||
FieldPrint& print_info) {
|
||||
(void)file;
|
||||
auto load_info = get_load_info_from_set(function.basic_ops.at(idx++).get());
|
||||
assert(load_info.size == 4);
|
||||
assert(load_info.kind == IR_Load::UNSIGNED || load_info.kind == IR_Load::SIGNED);
|
||||
|
||||
if (load_info.kind == IR_Load::SIGNED) {
|
||||
result->warnings += "field " + print_info.field_name + " is a basic loaded with a signed load ";
|
||||
}
|
||||
|
||||
int offset = load_info.offset;
|
||||
if (result->is_basic) {
|
||||
offset += BASIC_OFFSET;
|
||||
}
|
||||
|
||||
Field field(print_info.field_name, TypeSpec("basic"), offset);
|
||||
result->fields_of_type.push_back(field);
|
||||
return idx;
|
||||
}
|
||||
|
||||
int identify_pointer_field(int idx,
|
||||
Function& function,
|
||||
LinkedObjectFile& file,
|
||||
TypeInspectorResult* result,
|
||||
FieldPrint& print_info) {
|
||||
(void)file;
|
||||
auto load_info = get_load_info_from_set(function.basic_ops.at(idx++).get());
|
||||
assert(load_info.size == 4);
|
||||
assert(load_info.kind == IR_Load::UNSIGNED);
|
||||
|
||||
int offset = load_info.offset;
|
||||
if (result->is_basic) {
|
||||
offset += BASIC_OFFSET;
|
||||
}
|
||||
|
||||
Field field(print_info.field_name, TypeSpec("pointer"), offset);
|
||||
result->fields_of_type.push_back(field);
|
||||
return idx;
|
||||
}
|
||||
|
||||
int identify_array_field(int idx,
|
||||
Function& function,
|
||||
LinkedObjectFile& file,
|
||||
TypeInspectorResult* result,
|
||||
FieldPrint& print_info) {
|
||||
auto& get_op = function.basic_ops.at(idx++);
|
||||
int offset = 0;
|
||||
if (!get_ptr_offset(get_op.get(), make_gpr(Reg::A2), make_gpr(Reg::GP), &offset)) {
|
||||
printf("bad get ptr offset %s\n", get_op->print(file).c_str());
|
||||
assert(false);
|
||||
}
|
||||
if (result->is_basic) {
|
||||
offset += BASIC_OFFSET;
|
||||
}
|
||||
|
||||
Field field(print_info.field_name, TypeSpec("UNKNOWN"), offset);
|
||||
if (print_info.array_size) {
|
||||
field.set_array(print_info.array_size);
|
||||
} else {
|
||||
field.set_dynamic();
|
||||
}
|
||||
result->fields_of_type.push_back(field);
|
||||
return idx;
|
||||
}
|
||||
|
||||
int identify_float_field(int idx,
|
||||
Function& function,
|
||||
LinkedObjectFile& file,
|
||||
TypeInspectorResult* result,
|
||||
FieldPrint& print_info) {
|
||||
auto load_info = get_load_info_from_set(function.basic_ops.at(idx++).get());
|
||||
assert(load_info.size == 4);
|
||||
assert(load_info.kind == IR_Load::FLOAT);
|
||||
|
||||
auto& float_move = function.basic_ops.at(idx++);
|
||||
if (!is_reg_reg_move(float_move.get(), make_gpr(Reg::A2), make_fpr(0))) {
|
||||
printf("bad float move: %s\n", float_move->print(file).c_str());
|
||||
assert(false);
|
||||
}
|
||||
|
||||
std::string type;
|
||||
switch (print_info.format) {
|
||||
case 'f':
|
||||
type = "float";
|
||||
break;
|
||||
case 'm':
|
||||
type = "meters";
|
||||
break;
|
||||
case 'r':
|
||||
type = "deg";
|
||||
break;
|
||||
case 'X':
|
||||
type = "float";
|
||||
result->warnings += "field " + print_info.field_name + " is a float printed as hex? ";
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
int offset = load_info.offset;
|
||||
if (result->is_basic) {
|
||||
offset += BASIC_OFFSET;
|
||||
}
|
||||
|
||||
Field field(print_info.field_name, TypeSpec(type), offset);
|
||||
result->fields_of_type.push_back(field);
|
||||
return idx;
|
||||
}
|
||||
|
||||
int identify_struct_not_inline_field(int idx,
|
||||
Function& function,
|
||||
LinkedObjectFile& file,
|
||||
TypeInspectorResult* result,
|
||||
FieldPrint& print_info) {
|
||||
(void)file;
|
||||
auto load_info = get_load_info_from_set(function.basic_ops.at(idx++).get());
|
||||
|
||||
if (!(load_info.size == 4 && load_info.kind == IR_Load::UNSIGNED)) {
|
||||
result->warnings += "field " + print_info.field_type_name + " is likely a value type";
|
||||
}
|
||||
int offset = load_info.offset;
|
||||
if (result->is_basic) {
|
||||
offset += BASIC_OFFSET;
|
||||
}
|
||||
|
||||
Field field(print_info.field_name, TypeSpec(print_info.field_type_name), offset);
|
||||
result->fields_of_type.push_back(field);
|
||||
return idx;
|
||||
}
|
||||
|
||||
int identify_struct_inline_field(int idx,
|
||||
Function& function,
|
||||
LinkedObjectFile& file,
|
||||
TypeInspectorResult* result,
|
||||
FieldPrint& print_info) {
|
||||
auto& get_op = function.basic_ops.at(idx++);
|
||||
int offset = 0;
|
||||
if (!get_ptr_offset(get_op.get(), make_gpr(Reg::A2), make_gpr(Reg::GP), &offset)) {
|
||||
printf("bad get ptr offset %s\n", get_op->print(file).c_str());
|
||||
assert(false);
|
||||
}
|
||||
if (result->is_basic) {
|
||||
offset += BASIC_OFFSET;
|
||||
}
|
||||
|
||||
Field field(print_info.field_name, TypeSpec(print_info.field_type_name), offset);
|
||||
field.set_inline();
|
||||
result->fields_of_type.push_back(field);
|
||||
return idx;
|
||||
}
|
||||
|
||||
int identify_int_field(int idx,
|
||||
Function& function,
|
||||
LinkedObjectFile& file,
|
||||
TypeInspectorResult* result,
|
||||
FieldPrint& print_info) {
|
||||
(void)file;
|
||||
auto load_info = get_load_info_from_set(function.basic_ops.at(idx++).get());
|
||||
|
||||
std::string field_type_name;
|
||||
if (load_info.kind == IR_Load::UNSIGNED) {
|
||||
field_type_name += "u";
|
||||
} else if (load_info.kind == IR_Load::FLOAT) {
|
||||
assert(false); // ...
|
||||
}
|
||||
field_type_name += "int";
|
||||
|
||||
switch (load_info.size) {
|
||||
case 1:
|
||||
field_type_name += "8";
|
||||
break;
|
||||
case 2:
|
||||
field_type_name += "16";
|
||||
break;
|
||||
case 4:
|
||||
field_type_name += "32";
|
||||
break;
|
||||
case 8:
|
||||
field_type_name += "64";
|
||||
break;
|
||||
case 16:
|
||||
field_type_name += "128";
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("unknown load op size in identify int field " +
|
||||
std::to_string((int)load_info.size));
|
||||
}
|
||||
|
||||
if (print_info.format == 'e') {
|
||||
switch (load_info.kind) {
|
||||
case IR_Load::SIGNED:
|
||||
field_type_name = "sseconds";
|
||||
break;
|
||||
case IR_Load::UNSIGNED:
|
||||
field_type_name = "useconds";
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
assert(load_info.size == 8);
|
||||
}
|
||||
|
||||
int offset = load_info.offset;
|
||||
if (result->is_basic) {
|
||||
offset += BASIC_OFFSET;
|
||||
}
|
||||
|
||||
Field field(print_info.field_name, TypeSpec(field_type_name), offset);
|
||||
result->fields_of_type.push_back(field);
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
int detect(int idx, Function& function, LinkedObjectFile& file, TypeInspectorResult* result) {
|
||||
auto& get_format_op = function.basic_ops.at(idx++);
|
||||
if (!is_get_sym_value(get_format_op.get(), make_gpr(Reg::T9), "format")) {
|
||||
printf("bad get format");
|
||||
assert(false);
|
||||
}
|
||||
|
||||
auto& get_true = function.basic_ops.at(idx++);
|
||||
if (!is_get_sym(get_true.get(), make_gpr(Reg::A0), "#t")) {
|
||||
printf("bad get true");
|
||||
assert(false);
|
||||
}
|
||||
|
||||
auto& get_str = function.basic_ops.at(idx++);
|
||||
if (!is_get_label(get_str.get(), make_gpr(Reg::A1))) {
|
||||
result->warnings += "bad get label";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto str = file.get_goal_string_by_label(file.labels.at(get_label_id_of_set(get_str.get())));
|
||||
auto info = get_field_print(str);
|
||||
|
||||
auto& first_get_op = function.basic_ops.at(idx);
|
||||
|
||||
if (is_get_load(first_get_op.get(), make_gpr(Reg::A2), make_gpr(Reg::GP)) &&
|
||||
(info.format == 'D' || info.format == 'X' || info.format == 'e') && !info.has_array &&
|
||||
info.field_type_name.empty()) {
|
||||
idx = identify_int_field(idx, function, file, result, info);
|
||||
// it's a load!
|
||||
} else if (is_get_load(first_get_op.get(), make_fpr(0), make_gpr(Reg::GP)) &&
|
||||
(info.format == 'f' || info.format == 'm' || info.format == 'r' ||
|
||||
info.format == 'X') &&
|
||||
!info.has_array && info.field_type_name.empty()) {
|
||||
idx = identify_float_field(idx, function, file, result, info);
|
||||
} else if (is_get_load(first_get_op.get(), make_gpr(Reg::A2), make_gpr(Reg::GP)) &&
|
||||
info.format == 'A' && !info.has_array && info.field_type_name.empty()) {
|
||||
idx = identify_basic_field(idx, function, file, result, info);
|
||||
} else if (is_get_load(first_get_op.get(), make_gpr(Reg::A2), make_gpr(Reg::GP)) &&
|
||||
info.format == 'X' && !info.has_array && info.field_type_name.empty()) {
|
||||
idx = identify_pointer_field(idx, function, file, result, info);
|
||||
} else if (info.has_array && (info.format == 'X' || info.format == 'P') &&
|
||||
info.field_type_name.empty()) {
|
||||
idx = identify_array_field(idx, function, file, result, info);
|
||||
} else if (!info.has_array && (info.format == 'X' || info.format == 'P') &&
|
||||
!info.field_type_name.empty()) {
|
||||
// structure.
|
||||
if (is_get_load(first_get_op.get(), make_gpr(Reg::A2), make_gpr(Reg::GP))) {
|
||||
// not inline
|
||||
idx = identify_struct_not_inline_field(idx, function, file, result, info);
|
||||
} else {
|
||||
idx = identify_struct_inline_field(idx, function, file, result, info);
|
||||
}
|
||||
}
|
||||
|
||||
else if (is_set_shift(first_get_op.get())) {
|
||||
result->warnings += "likely a bitfield type";
|
||||
return -1;
|
||||
} else {
|
||||
printf("couldn't do %s, %s\n", str.c_str(), first_get_op->print(file).c_str());
|
||||
return -1;
|
||||
}
|
||||
|
||||
auto& call_op = function.basic_ops.at(idx++);
|
||||
if (!dynamic_cast<IR_Call*>(call_op.get())) {
|
||||
printf("bad call\n");
|
||||
assert(false);
|
||||
}
|
||||
|
||||
return idx;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TypeInspectorResult inspect_inspect_method(Function& inspect,
|
||||
const std::string& type_name,
|
||||
DecompilerTypeSystem& dts,
|
||||
LinkedObjectFile& file) {
|
||||
TypeInspectorResult result;
|
||||
TypeFlags flags;
|
||||
flags.flag = 0;
|
||||
dts.lookup_flags(type_name, &flags.flag);
|
||||
result.type_name = type_name;
|
||||
result.parent_type_name = dts.lookup_parent_from_inspects(type_name);
|
||||
result.flags = flags.flag;
|
||||
result.type_size = flags.size;
|
||||
result.type_method_count = flags.methods;
|
||||
result.type_heap_base = flags.heap_base;
|
||||
assert(flags.pad == 0);
|
||||
|
||||
auto& bad_set = get_config().bad_inspect_types;
|
||||
if (is_weird(inspect, file, &result) || bad_set.find(type_name) != bad_set.end()) {
|
||||
// printf("was weird: %s\n", result.warnings.c_str());
|
||||
return result;
|
||||
}
|
||||
int idx = 7;
|
||||
while (idx < int(inspect.basic_ops.size()) - 1 && idx != -1) {
|
||||
idx = detect(idx, inspect, file, &result);
|
||||
}
|
||||
|
||||
// todo, continue to identify fields, then identify the return.
|
||||
|
||||
result.success = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string TypeInspectorResult::print_as_deftype() {
|
||||
std::string result;
|
||||
|
||||
result += fmt::format("(deftype {} ({})\n (", type_name, parent_type_name);
|
||||
|
||||
int longest_field_name = 0;
|
||||
int longest_type_name = 0;
|
||||
int longest_mods = 0;
|
||||
|
||||
std::string inline_string = ":inline";
|
||||
std::string dynamic_string = ":dynamic";
|
||||
|
||||
for (auto& field : fields_of_type) {
|
||||
longest_field_name = std::max(longest_field_name, int(field.name().size()));
|
||||
longest_type_name = std::max(longest_type_name, int(field.type().print().size()));
|
||||
|
||||
int mods = 0;
|
||||
// mods are array size, :inline, :dynamic
|
||||
if (field.is_array() && !field.is_dynamic()) {
|
||||
mods += std::to_string(field.array_size()).size();
|
||||
}
|
||||
|
||||
if (field.is_inline()) {
|
||||
if (mods) {
|
||||
mods++; // space
|
||||
}
|
||||
mods += inline_string.size();
|
||||
}
|
||||
|
||||
if (field.is_dynamic()) {
|
||||
if (mods) {
|
||||
mods++; // space
|
||||
}
|
||||
mods += dynamic_string.size();
|
||||
}
|
||||
longest_mods = std::max(longest_mods, mods);
|
||||
}
|
||||
|
||||
for (auto& field : fields_of_type) {
|
||||
result += "(";
|
||||
result += field.name();
|
||||
result.append(1 + (longest_field_name - int(field.name().size())), ' ');
|
||||
result += field.type().print();
|
||||
result.append(1 + (longest_type_name - int(field.type().print().size())), ' ');
|
||||
|
||||
std::string mods;
|
||||
if (field.is_array() && !field.is_dynamic()) {
|
||||
mods += std::to_string(field.array_size());
|
||||
mods += " ";
|
||||
}
|
||||
|
||||
if (field.is_inline()) {
|
||||
mods += inline_string;
|
||||
mods += " ";
|
||||
}
|
||||
|
||||
if (field.is_dynamic()) {
|
||||
mods += dynamic_string;
|
||||
mods += " ";
|
||||
}
|
||||
result.append(mods);
|
||||
result.append(longest_mods - int(mods.size() - 1), ' ');
|
||||
|
||||
result.append(":offset-assert ");
|
||||
result.append(std::to_string(field.offset()));
|
||||
result.append(")\n ");
|
||||
}
|
||||
result.append(")\n");
|
||||
|
||||
result.append(fmt::format(" :method-count-assert {}\n", type_method_count));
|
||||
result.append(fmt::format(" :size-assert #x{:x}\n", type_size));
|
||||
result.append(fmt::format(" :flag-assert #x{:x}\n ", flags));
|
||||
if (!warnings.empty()) {
|
||||
result.append(";; ");
|
||||
result.append(warnings);
|
||||
result.append("\n ");
|
||||
}
|
||||
|
||||
if (type_method_count > 9) {
|
||||
result.append("(:methods\n ");
|
||||
for (int i = 9; i < type_method_count; i++) {
|
||||
result.append(fmt::format("(dummy-{} () none {})\n ", i, i));
|
||||
}
|
||||
result.append(")\n ");
|
||||
}
|
||||
result.append(")\n");
|
||||
|
||||
return result;
|
||||
}
|
35
decompiler/Function/TypeInspector.h
Normal file
35
decompiler/Function/TypeInspector.h
Normal file
|
@ -0,0 +1,35 @@
|
|||
#pragma once
|
||||
|
||||
/*!
|
||||
* @file TypeInspector.h
|
||||
* Analyze an auto-generated GOAL inspect method to determine the layout of a type in memory.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include "common/type_system/Type.h"
|
||||
|
||||
class Function;
|
||||
class DecompilerTypeSystem;
|
||||
class LinkedObjectFile;
|
||||
|
||||
struct TypeInspectorResult {
|
||||
bool success = false;
|
||||
int type_size = -1;
|
||||
int type_method_count = -1;
|
||||
int type_heap_base = -1;
|
||||
|
||||
std::string warnings;
|
||||
std::vector<Field> fields_of_type;
|
||||
bool is_basic = false;
|
||||
|
||||
std::string type_name;
|
||||
std::string parent_type_name;
|
||||
u64 flags = 0;
|
||||
|
||||
std::string print_as_deftype();
|
||||
};
|
||||
|
||||
TypeInspectorResult inspect_inspect_method(Function& inspect,
|
||||
const std::string& type_name,
|
||||
DecompilerTypeSystem& dts,
|
||||
LinkedObjectFile& file);
|
|
@ -794,8 +794,11 @@ std::string LinkedObjectFile::print_disassembly() {
|
|||
/*!
|
||||
* Hacky way to get a GOAL string object
|
||||
*/
|
||||
std::string LinkedObjectFile::get_goal_string(int seg, int word_idx) {
|
||||
std::string result = "\"";
|
||||
std::string LinkedObjectFile::get_goal_string(int seg, int word_idx, bool with_quotes) {
|
||||
std::string result;
|
||||
if (with_quotes) {
|
||||
result += "\"";
|
||||
}
|
||||
// next should be the size
|
||||
if (word_idx + 1 >= int(words_by_seg[seg].size())) {
|
||||
return "invalid string!\n";
|
||||
|
@ -819,7 +822,10 @@ std::string LinkedObjectFile::get_goal_string(int seg, int word_idx) {
|
|||
memcpy(cword, &word.data, 4);
|
||||
result += cword[byte_offset];
|
||||
}
|
||||
return result + "\"";
|
||||
if (with_quotes) {
|
||||
result += "\"";
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*!
|
||||
|
@ -997,4 +1003,16 @@ goos::Object LinkedObjectFile::to_form_script_object(int seg,
|
|||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
u32 LinkedObjectFile::read_data_word(const Label& label) {
|
||||
assert(0 == (label.offset % 4));
|
||||
auto& word = words_by_seg.at(label.target_segment).at(label.offset / 4);
|
||||
assert(word.kind == LinkedWord::Kind::PLAIN_DATA);
|
||||
return word.data;
|
||||
}
|
||||
|
||||
std::string LinkedObjectFile::get_goal_string_by_label(const Label& label) {
|
||||
assert(0 == (label.offset % 4));
|
||||
return get_goal_string(label.target_segment, (label.offset / 4) - 1, false);
|
||||
}
|
|
@ -68,6 +68,9 @@ class LinkedObjectFile {
|
|||
const std::string& extra_name);
|
||||
std::string print_asm_function_disassembly(const std::string& my_name);
|
||||
|
||||
u32 read_data_word(const Label& label);
|
||||
std::string get_goal_string_by_label(const Label& label);
|
||||
|
||||
struct Stats {
|
||||
uint32_t total_code_bytes = 0;
|
||||
uint32_t total_v2_code_bytes = 0;
|
||||
|
@ -134,7 +137,7 @@ class LinkedObjectFile {
|
|||
goos::Object to_form_script_object(int seg, int byte_idx, std::vector<bool>& seen);
|
||||
bool is_empty_list(int seg, int byte_idx);
|
||||
bool is_string(int seg, int byte_idx);
|
||||
std::string get_goal_string(int seg, int word_idx);
|
||||
std::string get_goal_string(int seg, int word_idx, bool with_quotes = true);
|
||||
|
||||
std::vector<std::unordered_map<int, int>> label_per_seg_by_offset;
|
||||
};
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "decompiler/Function/BasicBlocks.h"
|
||||
#include "decompiler/IR/BasicOpBuilder.h"
|
||||
#include "decompiler/IR/CfgBuilder.h"
|
||||
#include "decompiler/Function/TypeInspector.h"
|
||||
#include "third-party/spdlog/include/spdlog/spdlog.h"
|
||||
#include "third-party/json.hpp"
|
||||
|
||||
|
@ -625,7 +626,8 @@ void ObjectFileDB::analyze_functions() {
|
|||
assert(func.guessed_name.empty());
|
||||
func.guessed_name.set_as_top_level();
|
||||
func.find_global_function_defs(data.linked_data, dts);
|
||||
func.find_method_defs(data.linked_data);
|
||||
func.find_type_defs(data.linked_data, dts);
|
||||
func.find_method_defs(data.linked_data, dts);
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -687,10 +689,11 @@ void ObjectFileDB::analyze_functions() {
|
|||
int successful_type_analysis = 0;
|
||||
|
||||
std::map<int, std::vector<std::string>> unresolved_by_length;
|
||||
|
||||
if (get_config().find_basic_blocks) {
|
||||
timer.start();
|
||||
int total_basic_blocks = 0;
|
||||
for_each_function([&](Function& func, int segment_id, ObjectFileData& data) {
|
||||
for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) {
|
||||
// printf("in %s from %s\n", func.guessed_name.to_string().c_str(),
|
||||
// data.to_unique_name().c_str());
|
||||
auto blocks = find_blocks_in_function(data.linked_data, segment_id, func);
|
||||
|
@ -718,6 +721,12 @@ void ObjectFileDB::analyze_functions() {
|
|||
total_basic_ops += func.get_basic_op_count();
|
||||
total_failed_basic_ops += func.get_failed_basic_op_count();
|
||||
|
||||
if (func.is_inspect_method) {
|
||||
auto result = inspect_inspect_method(func, func.method_of_type, dts, data.linked_data);
|
||||
all_type_defs += ";; " + data.to_unique_name() + "\n";
|
||||
all_type_defs += result.print_as_deftype() + "\n";
|
||||
}
|
||||
|
||||
// Combine basic ops + CFG to build a nested IR
|
||||
func.ir = build_cfg_ir(func, *func.cfg, data.linked_data);
|
||||
non_asm_funcs++;
|
||||
|
@ -744,11 +753,13 @@ void ObjectFileDB::analyze_functions() {
|
|||
}
|
||||
// GOOD!
|
||||
func.type = kv->second;
|
||||
|
||||
/*
|
||||
spdlog::info("Type Analysis on {} {}", func.guessed_name.to_string(),
|
||||
kv->second.print());
|
||||
func.run_type_analysis(kv->second, dts, data.linked_data);
|
||||
*/
|
||||
|
||||
if (func.has_typemaps()) {
|
||||
successful_type_analysis++;
|
||||
}
|
||||
|
|
|
@ -57,6 +57,7 @@ class ObjectFileDB {
|
|||
void analyze_functions();
|
||||
ObjectFileData& lookup_record(const ObjectFileRecord& rec);
|
||||
DecompilerTypeSystem dts;
|
||||
std::string all_type_defs;
|
||||
|
||||
private:
|
||||
void load_map_file(const std::string& map_data);
|
||||
|
@ -101,6 +102,23 @@ class ObjectFileDB {
|
|||
});
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void for_each_function_def_order(Func f) {
|
||||
for_each_obj([&](ObjectFileData& data) {
|
||||
// printf("IN %s\n", data.record.to_unique_name().c_str());
|
||||
for (int i = 0; i < int(data.linked_data.segments); i++) {
|
||||
// printf("seg %d\n", i);
|
||||
int fn = 0;
|
||||
// for (auto& goal_func : data.linked_data.functions_by_seg.at(i)) {
|
||||
for (size_t j = data.linked_data.functions_by_seg.at(i).size(); j-- > 0;) {
|
||||
// printf("fn %d\n", fn);
|
||||
f(data.linked_data.functions_by_seg.at(i).at(j), i, data);
|
||||
fn++;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Danger: after adding all object files, we assume that the vector never reallocates.
|
||||
std::unordered_map<std::string, std::vector<ObjectFileData>> obj_files_by_name;
|
||||
std::unordered_map<std::string, std::vector<ObjectFileRecord>> obj_files_by_dgo;
|
||||
|
|
|
@ -33,4 +33,9 @@ void set_config(const std::string& path_to_config_file) {
|
|||
for (const auto& x : asm_functions_by_name) {
|
||||
gConfig.asm_functions_by_name.insert(x);
|
||||
}
|
||||
|
||||
auto bad_inspect = cfg.at("types_with_bad_inspect_methods").get<std::vector<std::string>>();
|
||||
for (const auto& x : bad_inspect) {
|
||||
gConfig.bad_inspect_types.insert(x);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
struct Config {
|
||||
int game_version = -1;
|
||||
std::vector<std::string> dgo_names;
|
||||
std::unordered_set<std::string> bad_inspect_types;
|
||||
std::string obj_file_name_map_file;
|
||||
bool write_disassembly = false;
|
||||
bool write_hexdump = false;
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -3,14 +3,14 @@
|
|||
{
|
||||
"game_version":1,
|
||||
// the order here matters. KERNEL and GAME should go first
|
||||
"dgo_names":["CGO/KERNEL.CGO", "CGO/GAME.CGO"
|
||||
"dgo_names":["CGO/KERNEL.CGO","CGO/GAME.CGO"], /*
|
||||
, "CGO/ENGINE.CGO"
|
||||
, "CGO/ART.CGO", "DGO/BEA.DGO", "DGO/CIT.DGO", "CGO/COMMON.CGO", "DGO/DAR.DGO", "DGO/DEM.DGO",
|
||||
"DGO/FIN.DGO", "DGO/INT.DGO", "DGO/JUB.DGO", "DGO/JUN.DGO", "CGO/JUNGLE.CGO", "CGO/L1.CGO", "DGO/FIC.DGO",
|
||||
"DGO/LAV.DGO", "DGO/MAI.DGO", "CGO/MAINCAVE.CGO", "DGO/MIS.DGO", "DGO/OGR.DGO", "CGO/RACERP.CGO", "DGO/ROB.DGO", "DGO/ROL.DGO",
|
||||
"DGO/SNO.DGO", "DGO/SUB.DGO", "DGO/SUN.DGO", "CGO/SUNKEN.CGO", "DGO/SWA.DGO", "DGO/TIT.DGO", "DGO/TRA.DGO", "DGO/VI1.DGO",
|
||||
"DGO/VI2.DGO", "DGO/VI3.DGO", "CGO/VILLAGEP.CGO", "CGO/WATER-AN.CGO"
|
||||
],
|
||||
],*/
|
||||
|
||||
"write_disassembly":true,
|
||||
"write_hex_near_instructions":false,
|
||||
|
@ -31,6 +31,12 @@
|
|||
// Experimental Stuff
|
||||
"find_basic_blocks":true,
|
||||
|
||||
"types_with_bad_inspect_methods":[
|
||||
"engine",
|
||||
"bsp-header",
|
||||
"joint-anim-matrix"
|
||||
],
|
||||
|
||||
"asm_functions_by_name":[
|
||||
// gcommon
|
||||
"quad-copy!",
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "DecompilerTypeSystem.h"
|
||||
#include "common/goos/Reader.h"
|
||||
#include "common/type_system/deftype.h"
|
||||
#include "third-party/spdlog/include/spdlog/spdlog.h"
|
||||
|
||||
DecompilerTypeSystem::DecompilerTypeSystem() {
|
||||
ts.add_builtin_types();
|
||||
|
@ -75,4 +76,46 @@ std::string DecompilerTypeSystem::dump_symbol_types() {
|
|||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void DecompilerTypeSystem::add_type_flags(const std::string& name, u64 flags) {
|
||||
auto kv = type_flags.find(name);
|
||||
if (kv != type_flags.end()) {
|
||||
spdlog::warn("duplicated type flags for {}, was 0x{:x}, now 0x{:x}", name.c_str(), kv->second,
|
||||
flags);
|
||||
if (kv->second != flags) {
|
||||
spdlog::warn("duplicated type flags that are inconsistent!");
|
||||
}
|
||||
}
|
||||
type_flags[name] = flags;
|
||||
}
|
||||
|
||||
void DecompilerTypeSystem::add_type_parent(const std::string& child, const std::string& parent) {
|
||||
auto kv = type_parents.find(child);
|
||||
if (kv != type_parents.end()) {
|
||||
spdlog::warn("duplicated type parents for {} was {} now {}", child.c_str(), kv->second.c_str(),
|
||||
parent.c_str());
|
||||
if (kv->second != parent) {
|
||||
throw std::runtime_error("duplicated type parents that are inconsistent!");
|
||||
}
|
||||
}
|
||||
type_parents[child] = parent;
|
||||
}
|
||||
|
||||
std::string DecompilerTypeSystem::lookup_parent_from_inspects(const std::string& child) const {
|
||||
auto kv_tp = type_parents.find(child);
|
||||
if (kv_tp != type_parents.end()) {
|
||||
return kv_tp->second;
|
||||
}
|
||||
|
||||
return "UNKNOWN";
|
||||
}
|
||||
|
||||
bool DecompilerTypeSystem::lookup_flags(const std::string& type, u64* dest) const {
|
||||
auto kv = type_flags.find(type);
|
||||
if (kv != type_flags.end()) {
|
||||
*dest = kv->second;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
|
@ -11,6 +11,8 @@ class DecompilerTypeSystem {
|
|||
std::unordered_map<std::string, TypeSpec> symbol_types;
|
||||
std::unordered_set<std::string> symbols;
|
||||
std::vector<std::string> symbol_add_order;
|
||||
std::unordered_map<std::string, u64> type_flags;
|
||||
std::unordered_map<std::string, std::string> type_parents;
|
||||
|
||||
void add_symbol(const std::string& name) {
|
||||
if (symbols.find(name) == symbols.end()) {
|
||||
|
@ -40,7 +42,12 @@ class DecompilerTypeSystem {
|
|||
|
||||
void parse_type_defs(const std::vector<std::string>& file_path);
|
||||
|
||||
void add_type_flags(const std::string& name, u64 flags);
|
||||
void add_type_parent(const std::string& child, const std::string& parent);
|
||||
|
||||
std::string dump_symbol_types();
|
||||
std::string lookup_parent_from_inspects(const std::string& child) const;
|
||||
bool lookup_flags(const std::string& type, u64* dest) const;
|
||||
};
|
||||
|
||||
#endif // JAK_DECOMPILERTYPESYSTEM_H
|
||||
|
|
|
@ -41,6 +41,10 @@
|
|||
)
|
||||
)
|
||||
|
||||
(defmacro tc ()
|
||||
`(m "decompiler/config/all-types.gc")
|
||||
)
|
||||
|
||||
(defmacro e ()
|
||||
`(:exit)
|
||||
)
|
||||
|
|
|
@ -69,7 +69,7 @@
|
|||
(/ x y)
|
||||
)
|
||||
|
||||
(defun ash ((value integer) (shift-amount integer))
|
||||
(defun ash ((value int) (shift-amount int))
|
||||
"Arithmetic shift value by shift-amount.
|
||||
A positive shift-amount will shift to the left and a negative will shift to the right.
|
||||
"
|
||||
|
@ -97,7 +97,7 @@
|
|||
)
|
||||
)
|
||||
|
||||
(defun mod ((a integer) (b integer))
|
||||
(defun mod ((a int) (b int))
|
||||
"Compute mod. It does what you expect for positive numbers. For negative numbers, nobody knows what to expect.
|
||||
This is a 32-bit operation. It uses an idiv on x86 and gets the remainder."
|
||||
|
||||
|
@ -107,7 +107,7 @@
|
|||
)
|
||||
|
||||
|
||||
(defun rem ((a integer) (b integer))
|
||||
(defun rem ((a int) (b int))
|
||||
"Compute remainder (32-bit). It is identical to mod. It uses a idiv and gets the remainder"
|
||||
|
||||
;; The original implementation is div, mfhi
|
||||
|
@ -134,7 +134,7 @@
|
|||
)
|
||||
)
|
||||
|
||||
(defun min ((a integer) (b integer))
|
||||
(defun min ((a int) (b int))
|
||||
"Compute minimum."
|
||||
|
||||
;; The original implementation was inline assembly, to take advantage of branch delay slots:
|
||||
|
@ -147,23 +147,23 @@
|
|||
(if (> a b) b a)
|
||||
)
|
||||
|
||||
(defun max ((a integer) (b integer))
|
||||
(defun max ((a int) (b int))
|
||||
"Compute maximum."
|
||||
(declare (inline))
|
||||
(if (> a b) a b)
|
||||
)
|
||||
|
||||
(defun logior ((a integer) (b integer))
|
||||
(defun logior ((a int) (b int))
|
||||
"Compute the bitwise inclusive-or"
|
||||
(logior a b)
|
||||
)
|
||||
|
||||
(defun logand ((a integer) (b integer))
|
||||
(defun logand ((a int) (b int))
|
||||
"Compute the bitwise and"
|
||||
(logand a b)
|
||||
)
|
||||
|
||||
(defun lognor ((a integer) (b integer))
|
||||
(defun lognor ((a int) (b int))
|
||||
"Compute not or."
|
||||
;; Note - MIPS has a 'nor' instruction, but x86 doesn't.
|
||||
;; the GOAL x86 compiler therefore doesn't have a nor operation,
|
||||
|
@ -172,12 +172,12 @@
|
|||
(lognot (logior a b))
|
||||
)
|
||||
|
||||
(defun logxor ((a integer) (b integer))
|
||||
(defun logxor ((a int) (b int))
|
||||
"Compute the logical exclusive-or"
|
||||
(logxor a b)
|
||||
)
|
||||
|
||||
(defun lognot ((a integer))
|
||||
(defun lognot ((a int))
|
||||
"Compute the bitwise not"
|
||||
(lognot a)
|
||||
)
|
||||
|
@ -573,6 +573,7 @@
|
|||
((length int32 :offset-assert 4)
|
||||
(allocated-length int32 :offset-assert 8)
|
||||
(data uint8 :dynamic)
|
||||
(_pad uint8 4)
|
||||
)
|
||||
(:methods (new (symbol type int) _type_ 0) ;; we will override print later on. This is optional to include
|
||||
)
|
||||
|
|
|
@ -85,17 +85,17 @@
|
|||
|
||||
;; this stores the current state of the kernel.
|
||||
(deftype kernel-context (basic)
|
||||
((prevent-from-run int32 :offset-assert 4) ;; actually a process-mask
|
||||
(require-for-run int32 :offset-assert 8) ;; actually a process-mask, unused?
|
||||
(allow-to-run int32 :offset-assert 12) ;; actually a process-mask, unused?
|
||||
(next-pid int32 :offset-assert 16) ;; next PID to give out
|
||||
(fast-stack-top pointer :offset-assert 20) ;; scratchpad stack (unused?)
|
||||
(current-process basic :offset-assert 24) ;; process?
|
||||
(relocating-process basic :offset-assert 28) ;; process?
|
||||
(relocating-min int32 :offset-assert 32) ;; print hex
|
||||
(relocating-max int32 :offset-assert 36) ;; print hex
|
||||
(relocating-offset int32 :offset-assert 40) ;; ?
|
||||
(low-memory-message basic :offset-assert 44) ;; boolean?
|
||||
((prevent-from-run uint32 :offset-assert 4)
|
||||
(require-for-run uint32 :offset-assert 8)
|
||||
(allow-to-run uint32 :offset-assert 12)
|
||||
(next-pid int32 :offset-assert 16)
|
||||
(fast-stack-top uint32 :offset-assert 20)
|
||||
(current-process basic :offset-assert 24)
|
||||
(relocating-process basic :offset-assert 28)
|
||||
(relocating-min int32 :offset-assert 32)
|
||||
(relocating-max int32 :offset-assert 36)
|
||||
(relocating-offset int32 :offset-assert 40)
|
||||
(low-memory-message basic :offset-assert 44)
|
||||
)
|
||||
|
||||
:size-assert #x30
|
||||
|
@ -146,6 +146,7 @@
|
|||
)
|
||||
|
||||
(:methods
|
||||
(new ((allocation symbol) (type-to-make type) (parent-process process) (name symbol) (stack-size int) (stack-top pointer)) _type_ 0)
|
||||
(thread-suspend ((this _type_)) none 10)
|
||||
(thread-resume ((to-resume _type_)) none 11)
|
||||
)
|
||||
|
@ -160,7 +161,7 @@
|
|||
;; (except GOAL is old and it looks like they called them left-child right-brother trees back then)
|
||||
(deftype process-tree (basic)
|
||||
((name basic :offset-assert 4)
|
||||
(mask int32 :offset-assert 8)
|
||||
(mask uint32 :offset-assert 8)
|
||||
(parent pointer :offset-assert 12)
|
||||
(brother pointer :offset-assert 16)
|
||||
(child pointer :offset-assert 20)
|
||||
|
@ -302,12 +303,12 @@
|
|||
;; A catch frame is a frame you can "throw" to, by name.
|
||||
;; You can "throw" out of a function and into another function.
|
||||
(deftype catch-frame (stack-frame)
|
||||
((sp pointer :offset 12) ;; where to reset the stack when throwing.
|
||||
(ra pointer :offset 16) ;; where to jump when throwing
|
||||
((sp int32 :offset 12) ;; where to reset the stack when throwing.
|
||||
(ra int32 :offset 16) ;; where to jump when throwing
|
||||
|
||||
;; todo - rework for x86-64.
|
||||
(freg float 6 :offset 20) ;; saved floating point registers from "catch" statement
|
||||
(rreg uint128 8 :offset 48) ;; saved GPRs from "catch" statement (ugh they are 128s)
|
||||
(freg float 6 :offset-assert 20) ;; saved floating point registers from "catch" statement
|
||||
(rreg uint128 8 :offset-assert 48) ;; saved GPRs from "catch" statement (ugh they are 128s)
|
||||
)
|
||||
:size-assert #xb0
|
||||
:method-count-assert 9
|
||||
|
|
|
@ -31,6 +31,7 @@ class Compiler {
|
|||
std::vector<std::string> run_test(const std::string& source_code);
|
||||
std::vector<std::string> run_test_no_load(const std::string& source_code);
|
||||
void shutdown_target();
|
||||
void enable_throw_on_redefines() { m_throw_on_define_extern_redefinition = true; }
|
||||
|
||||
private:
|
||||
void init_logger();
|
||||
|
@ -104,6 +105,7 @@ class Compiler {
|
|||
std::unordered_map<std::shared_ptr<goos::SymbolObject>, goos::Object> m_global_constants;
|
||||
std::unordered_map<std::shared_ptr<goos::SymbolObject>, LambdaVal*> m_inlineable_functions;
|
||||
CompilerSettings m_settings;
|
||||
bool m_throw_on_define_extern_redefinition = false;
|
||||
MathMode get_math_mode(const TypeSpec& ts);
|
||||
bool is_number(const TypeSpec& ts);
|
||||
bool is_float(const TypeSpec& ts);
|
||||
|
|
|
@ -74,6 +74,10 @@ Val* Compiler::compile_define_extern(const goos::Object& form, const goos::Objec
|
|||
"[Warning] define-extern has redefined the type of symbol %s\npreviously: %s\nnow: %s\n",
|
||||
symbol_string(sym).c_str(), existing_type->second.print().c_str(),
|
||||
new_type.print().c_str());
|
||||
|
||||
if (m_throw_on_define_extern_redefinition) {
|
||||
throw_compile_error(form, "define-extern redefinition");
|
||||
}
|
||||
}
|
||||
|
||||
if (new_type == m_ts.make_typespec("type")) {
|
||||
|
|
|
@ -104,7 +104,7 @@ TEST_F(WithGameTests, All) {
|
|||
runner.run_static_test(env, testCategory, "test-delete-car.gc", {"((a . b) (e . f))\n#f\n0\n"});
|
||||
runner.run_static_test(env, testCategory, "test-insert-cons.gc",
|
||||
{"((c . w) (a . b) (e . f))\n0\n"});
|
||||
runner.run_static_test(env, testCategory, "test-new-inline-array-class.gc", {"2820\n"});
|
||||
runner.run_static_test(env, testCategory, "test-new-inline-array-class.gc", {"2824\n"});
|
||||
runner.run_static_test(env, testCategory, "test-memcpy.gc", {"13\n"});
|
||||
runner.run_static_test(env, testCategory, "test-memset.gc", {"11\n"});
|
||||
runner.run_static_test(env, testCategory, "test-binteger-print.gc", {"-17\n0\n"});
|
||||
|
@ -132,3 +132,10 @@ TEST_F(WithGameTests, All) {
|
|||
runner.run_static_test(env, testCategory, "test-new-static-basic.gc",
|
||||
get_test_pass_string("new-static-basic", 9));
|
||||
}
|
||||
|
||||
TEST(TypeConsistency, TypeConsistency) {
|
||||
Compiler compiler;
|
||||
compiler.enable_throw_on_redefines();
|
||||
compiler.run_test_no_load("test/goalc/source_templates/with_game/test-build-game.gc");
|
||||
compiler.run_test_no_load("decompiler/config/all-types.gc");
|
||||
}
|
Loading…
Reference in a new issue