diff --git a/decompiler/Function/BasicBlocks.h b/decompiler/Function/BasicBlocks.h index 8b4f4e26d..d4248f189 100644 --- a/decompiler/Function/BasicBlocks.h +++ b/decompiler/Function/BasicBlocks.h @@ -4,6 +4,7 @@ #include #include "CfgVtx.h" +#include "decompiler/util/DecompilerTypeSystem.h" class LinkedObjectFile; class Function; @@ -11,6 +12,12 @@ class Function; struct BasicBlock { int start_word; int end_word; + TypeState init_types; + + int start_basic_op = -1; + int end_basic_op = -1; + + std::string label_name; std::vector pred; int succ_ft = -1; diff --git a/decompiler/IR/IR.cpp b/decompiler/IR/IR.cpp index 071d5f42f..f539edbc7 100644 --- a/decompiler/IR/IR.cpp +++ b/decompiler/IR/IR.cpp @@ -1,6 +1,7 @@ #include "IR.h" #include "decompiler/ObjectFile/LinkedObjectFile.h" #include "common/goos/PrettyPrinter.h" +#include "third-party/fmt/core.h" std::vector> IR::get_all_ir(LinkedObjectFile& file) const { (void)file; @@ -34,6 +35,69 @@ bool IR::update_types(TypeMap& reg_types, DecompilerTypeSystem& dts, LinkedObjec return false; } +namespace { +void add_regs_to_str(const std::vector& regs, std::string& str) { + bool first = true; + for (auto& reg : regs) { + if (first) { + first = false; + } else { + str.push_back(' '); + } + str.append(reg.to_charp()); + } +} + +u32 regs_to_gpr_mask(const std::vector& regs) { + u32 result = 0; + for (const auto& reg : regs) { + if (reg.get_kind() == Reg::GPR) { + result |= (1 << reg.get_gpr()); + } + } + return result; +} +} // namespace + +std::string IR_Atomic::print_with_reguse(const LinkedObjectFile& file) const { + std::string result = print(file); + if (result.length() < 40) { + result.append(40 - result.length(), ' '); + } + result += " ;;"; + if (!write_regs.empty()) { + result += "write: ["; + add_regs_to_str(write_regs, result); + result += "] "; + } + if (!read_regs.empty()) { + result += "read: ["; + add_regs_to_str(read_regs, result); + result += "] "; + } + if (!clobber_regs.empty()) { + result += "clobber: ["; + add_regs_to_str(clobber_regs, result); + result += "] "; + } + return result; +} + +std::string IR_Atomic::print_with_types(const TypeState& init_types, + const LinkedObjectFile& file) const { + std::string result = print(file); + if (result.length() < 40) { + result.append(40 - result.length(), ' '); + } + result += " ;; "; + auto read_mask = regs_to_gpr_mask(read_regs); + auto write_mask = regs_to_gpr_mask(write_regs); + + result += fmt::format("[{}] -> [{}]", init_types.print_gpr_masked(read_mask), + end_types.print_gpr_masked(write_mask)); + return result; +} + goos::Object IR_Failed::to_form(const LinkedObjectFile& file) const { (void)file; return pretty_print::build_list("INVALID-OPERATION"); diff --git a/decompiler/IR/IR.h b/decompiler/IR/IR.h index 4c9ddf03f..fcca92e13 100644 --- a/decompiler/IR/IR.h +++ b/decompiler/IR/IR.h @@ -7,6 +7,7 @@ #include #include "decompiler/Disasm/Register.h" #include "common/type_system/TypeSpec.h" +#include "decompiler/util/DecompilerTypeSystem.h" class LinkedObjectFile; class DecompilerTypeSystem; @@ -39,6 +40,11 @@ class IR_Atomic : public virtual IR { public: std::vector read_regs, write_regs, clobber_regs; bool reg_info_set = false; + + TypeState end_types; // types at the end of this instruction + + std::string print_with_types(const TypeState& init_types, const LinkedObjectFile& file) const; + std::string print_with_reguse(const LinkedObjectFile& file) const; }; class IR_Failed : public virtual IR { diff --git a/decompiler/ObjectFile/LinkedObjectFile.cpp b/decompiler/ObjectFile/LinkedObjectFile.cpp index fdf72a8e1..ac87d85f6 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.cpp +++ b/decompiler/ObjectFile/LinkedObjectFile.cpp @@ -793,6 +793,44 @@ std::string LinkedObjectFile::print_disassembly() { return result; } +std::string LinkedObjectFile::print_type_analysis_debug() { + std::string result; + + assert(segments <= 3); + for (int seg = segments; seg-- > 0;) { + // segment header + result += ";------------------------------------------\n; "; + result += segment_names[seg]; + result += "\n;------------------------------------------\n\n"; + + // functions + for (auto& func : functions_by_seg.at(seg)) { + result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"; + result += "; .function " + func.guessed_name.to_string() + "\n"; + result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"; + + for (auto& block : func.basic_blocks) { + result += "\n"; + if (!block.label_name.empty()) { + result += block.label_name + ":\n"; + } + + TypeState* init_types = &block.init_types; + for (int i = block.start_basic_op; i < block.end_basic_op; i++) { + result += " "; + // result += func.basic_ops.at(i)->print_with_reguse(*this); + // result += func.basic_ops.at(i)->print(*this); + result += func.basic_ops.at(i)->print_with_types(*init_types, *this); + result += "\n"; + init_types = &func.basic_ops.at(i)->end_types; + } + } + } + } + + return result; +} + /*! * Hacky way to get a GOAL string object */ diff --git a/decompiler/ObjectFile/LinkedObjectFile.h b/decompiler/ObjectFile/LinkedObjectFile.h index 6f4fbc01c..7b84bbdf6 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.h +++ b/decompiler/ObjectFile/LinkedObjectFile.h @@ -59,6 +59,7 @@ class LinkedObjectFile { void process_fp_relative_links(); std::string print_scripts(); std::string print_disassembly(); + std::string print_type_analysis_debug(); bool has_any_functions(); void append_word_to_string(std::string& dest, const LinkedWord& word) const; std::string to_asm_json(const std::string& obj_file_name); diff --git a/decompiler/ObjectFile/ObjectFileDB.cpp b/decompiler/ObjectFile/ObjectFileDB.cpp index 9d05d073b..529d6ec0c 100644 --- a/decompiler/ObjectFile/ObjectFileDB.cpp +++ b/decompiler/ObjectFile/ObjectFileDB.cpp @@ -560,11 +560,35 @@ void ObjectFileDB::write_object_file_words(const std::string& output_dir, bool d // printf("\n"); } +void ObjectFileDB::write_debug_type_analysis(const std::string& output_dir) { + spdlog::info("- Writing debug type analysis..."); + Timer timer; + uint32_t total_bytes = 0, total_files = 0; + + for_each_obj([&](ObjectFileData& obj) { + if (obj.linked_data.has_any_functions()) { + auto file_text = obj.linked_data.print_type_analysis_debug(); + auto file_name = file_util::combine_path(output_dir, obj.to_unique_name() + "_db.asm"); + + total_bytes += file_text.size(); + file_util::write_text_file(file_name, file_text); + total_files++; + } + }); + + spdlog::info("Wrote functions dumps:"); + spdlog::info(" Total {} files", total_files); + spdlog::info(" Total {} MB", total_bytes / ((float)(1u << 20u))); + spdlog::info(" Total {} ms ({:.3f} MB/sec)", timer.getMs(), + total_bytes / ((1u << 20u) * timer.getSeconds())); +} + /*! * Dump disassembly for object files containing code. Data zones will also be dumped. */ void ObjectFileDB::write_disassembly(const std::string& output_dir, - bool disassemble_objects_without_functions) { + bool disassemble_objects_without_functions, + bool write_json) { spdlog::info("- Writing functions..."); Timer timer; uint32_t total_bytes = 0, total_files = 0; @@ -577,7 +601,7 @@ void ObjectFileDB::write_disassembly(const std::string& output_dir, asm_functions += obj.linked_data.print_asm_function_disassembly(obj.to_unique_name()); auto file_name = file_util::combine_path(output_dir, obj.to_unique_name() + ".asm"); - if (get_config().analyze_functions) { + if (get_config().analyze_functions && write_json) { auto json_asm_text = obj.linked_data.to_asm_json(obj.to_unique_name()); auto json_asm_file_name = file_util::combine_path(output_dir, obj.to_unique_name() + "_asm.json"); @@ -736,6 +760,9 @@ std::string ObjectFileDB::process_game_count() { return result; } +/*! + * This is the main decompiler routine which runs after we've identified functions. + */ void ObjectFileDB::analyze_functions() { spdlog::info("- Analyzing Functions..."); Timer timer; @@ -744,68 +771,70 @@ void ObjectFileDB::analyze_functions() { int resolved_cfg_functions = 0; const auto& config = get_config(); - { - timer.start(); - for_each_obj([&](ObjectFileData& data) { - if (data.linked_data.segments == 3) { - // the top level segment should have a single function - assert(data.linked_data.functions_by_seg.at(2).size() == 1); + // Step 1 - analyze the "top level" or "login" code for each object file. + // this will give us type definitions, method definitions, and function definitions... + spdlog::info(" - Processing top levels..."); - auto& func = data.linked_data.functions_by_seg.at(2).front(); - assert(func.guessed_name.empty()); - func.guessed_name.set_as_top_level(); - func.find_global_function_defs(data.linked_data, dts); - func.find_type_defs(data.linked_data, dts); - func.find_method_defs(data.linked_data, dts); - } - }); + timer.start(); + for_each_obj([&](ObjectFileData& data) { + if (data.linked_data.segments == 3) { + // the top level segment should have a single function + assert(data.linked_data.functions_by_seg.at(2).size() == 1); - // check for function uniqueness. - std::unordered_set unique_names; - std::unordered_map> duplicated_functions; + auto& func = data.linked_data.functions_by_seg.at(2).front(); + assert(func.guessed_name.empty()); + func.guessed_name.set_as_top_level(); + func.find_global_function_defs(data.linked_data, dts); + func.find_type_defs(data.linked_data, dts); + func.find_method_defs(data.linked_data, dts); + } + }); - int uid = 1; - for_each_obj([&](ObjectFileData& data) { - int func_in_obj = 0; - for (int segment_id = 0; segment_id < int(data.linked_data.segments); segment_id++) { - for (auto& func : data.linked_data.functions_by_seg.at(segment_id)) { - func.guessed_name.unique_id = uid++; - func.guessed_name.id_in_object = func_in_obj++; - func.guessed_name.object_name = data.to_unique_name(); - auto name = func.guessed_name.to_string(); + // check for function uniqueness. + std::unordered_set unique_names; + std::unordered_map> duplicated_functions; - if (unique_names.find(name) != unique_names.end()) { - duplicated_functions[name].insert(data.to_unique_name()); - } + int uid = 1; + for_each_obj([&](ObjectFileData& data) { + int func_in_obj = 0; + for (int segment_id = 0; segment_id < int(data.linked_data.segments); segment_id++) { + for (auto& func : data.linked_data.functions_by_seg.at(segment_id)) { + func.guessed_name.unique_id = uid++; + func.guessed_name.id_in_object = func_in_obj++; + func.guessed_name.object_name = data.to_unique_name(); + auto name = func.guessed_name.to_string(); - unique_names.insert(name); + if (unique_names.find(name) != unique_names.end()) { + duplicated_functions[name].insert(data.to_unique_name()); + } - if (config.asm_functions_by_name.find(name) != config.asm_functions_by_name.end()) { - func.warnings += "flagged as asm by config\n"; - func.suspected_asm = true; - } + unique_names.insert(name); + + if (config.asm_functions_by_name.find(name) != config.asm_functions_by_name.end()) { + func.warnings += "flagged as asm by config\n"; + func.suspected_asm = true; } } - }); + } + }); - for_each_function([&](Function& func, int segment_id, ObjectFileData& data) { - (void)segment_id; - auto name = func.guessed_name.to_string(); + for_each_function([&](Function& func, int segment_id, ObjectFileData& data) { + (void)segment_id; + auto name = func.guessed_name.to_string(); - if (duplicated_functions.find(name) != duplicated_functions.end()) { - duplicated_functions[name].insert(data.to_unique_name()); - func.warnings += "this function exists in multiple non-identical object files"; - } - }); - /* - for (const auto& kv : duplicated_functions) { - printf("Function %s is found in non-identical object files:\n", kv.first.c_str()); - for (const auto& obj : kv.second) { - printf(" %s\n", obj.c_str()); - } + if (duplicated_functions.find(name) != duplicated_functions.end()) { + duplicated_functions[name].insert(data.to_unique_name()); + func.warnings += "this function exists in multiple non-identical object files"; + } + }); + /* + for (const auto& kv : duplicated_functions) { + printf("Function %s is found in non-identical object files:\n", kv.first.c_str()); + for (const auto& obj : kv.second) { + printf(" %s\n", obj.c_str()); } - */ - } + } + */ int total_trivial_cfg_functions = 0; int total_named_functions = 0; @@ -822,14 +851,19 @@ void ObjectFileDB::analyze_functions() { timer.start(); int total_basic_blocks = 0; + + // Main Pass over each function... for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) { + total_functions++; // printf("in %s from %s\n", func.guessed_name.to_string().c_str(), // data.to_unique_name().c_str()); + + // first, find basic blocks. auto blocks = find_blocks_in_function(data.linked_data, segment_id, func); total_basic_blocks += blocks.size(); func.basic_blocks = blocks; - total_functions++; + // analyze the proluge if (!func.suspected_asm) { // first, find the prologue/epilogue func.analyze_prologue(data.linked_data); @@ -838,19 +872,28 @@ void ObjectFileDB::analyze_functions() { if (!func.suspected_asm) { // run analysis - // build a control flow graph + // build a control flow graph, just looking at branch instructions. func.cfg = build_cfg(data.linked_data, segment_id, func); // convert individual basic blocks to sequences of IR Basic Ops for (auto& block : func.basic_blocks) { if (block.end_word > block.start_word) { + auto label_id = + data.linked_data.get_label_at(segment_id, (func.start_word + block.start_word) * 4); + if (label_id != -1) { + block.label_name = data.linked_data.get_label_name(label_id); + } + + block.start_basic_op = func.basic_ops.size(); add_basic_ops_to_block(&func, block, &data.linked_data); + block.end_basic_op = func.basic_ops.size(); } } total_basic_ops += func.get_basic_op_count(); total_failed_basic_ops += func.get_failed_basic_op_count(); total_reginfo_ops += func.get_reginfo_basic_op_count(); + // if we got an inspect method, inspect it. if (func.is_inspect_method) { auto result = inspect_inspect_method(func, func.method_of_type, dts, data.linked_data); all_type_defs += ";; " + data.to_unique_name() + "\n"; diff --git a/decompiler/ObjectFile/ObjectFileDB.h b/decompiler/ObjectFile/ObjectFileDB.h index 2de5798aa..31d64e406 100644 --- a/decompiler/ObjectFile/ObjectFileDB.h +++ b/decompiler/ObjectFile/ObjectFileDB.h @@ -58,7 +58,11 @@ class ObjectFileDB { void dump_raw_objects(const std::string& output_dir); void write_object_file_words(const std::string& output_dir, bool dump_v3_only); - void write_disassembly(const std::string& output_dir, bool disassemble_objects_without_functions); + void write_disassembly(const std::string& output_dir, + bool disassemble_objects_without_functions, + bool write_json); + + void write_debug_type_analysis(const std::string& output_dir); void analyze_functions(); void process_tpages(); std::string process_game_count(); diff --git a/decompiler/config.cpp b/decompiler/config.cpp index 014d17cb6..8d1a619a9 100644 --- a/decompiler/config.cpp +++ b/decompiler/config.cpp @@ -32,6 +32,7 @@ void set_config(const std::string& path_to_config_file) { gConfig.process_game_text = cfg.at("process_game_text").get(); gConfig.process_game_count = cfg.at("process_game_count").get(); gConfig.dump_objs = cfg.at("dump_objs").get(); + gConfig.write_func_json = cfg.at("write_func_json").get(); std::vector asm_functions_by_name = cfg.at("asm_functions_by_name").get>(); diff --git a/decompiler/config.h b/decompiler/config.h index c84519933..0f502743d 100644 --- a/decompiler/config.h +++ b/decompiler/config.h @@ -25,6 +25,7 @@ struct Config { bool process_game_text = false; bool process_game_count = false; bool dump_objs = false; + bool write_func_json = false; std::unordered_set asm_functions_by_name; // ... }; diff --git a/decompiler/config/jak1_ntsc_black_label.jsonc b/decompiler/config/jak1_ntsc_black_label.jsonc index 1b4afe09c..fe788addd 100644 --- a/decompiler/config/jak1_ntsc_black_label.jsonc +++ b/decompiler/config/jak1_ntsc_black_label.jsonc @@ -62,6 +62,7 @@ "process_game_text":true, "process_game_count":true, "dump_objs":false, + "write_func_json":false, // to write out data of each object file "write_hexdump":false, diff --git a/decompiler/main.cpp b/decompiler/main.cpp index 38f8a44ea..a1376357e 100644 --- a/decompiler/main.cpp +++ b/decompiler/main.cpp @@ -83,7 +83,9 @@ int main(int argc, char** argv) { } if (get_config().write_disassembly) { - db.write_disassembly(out_folder, get_config().disassemble_objects_without_functions); + db.write_disassembly(out_folder, get_config().disassemble_objects_without_functions, + get_config().write_func_json); + db.write_debug_type_analysis(out_folder); } // todo print type summary diff --git a/decompiler/util/DecompilerTypeSystem.cpp b/decompiler/util/DecompilerTypeSystem.cpp index 4743c6b85..867651dea 100644 --- a/decompiler/util/DecompilerTypeSystem.cpp +++ b/decompiler/util/DecompilerTypeSystem.cpp @@ -1,6 +1,7 @@ #include "DecompilerTypeSystem.h" #include "common/goos/Reader.h" #include "common/type_system/deftype.h" +#include "decompiler/Disasm/Register.h" #include "third-party/spdlog/include/spdlog/spdlog.h" DecompilerTypeSystem::DecompilerTypeSystem() { @@ -146,4 +147,151 @@ void DecompilerTypeSystem::add_symbol(const std::string& name, const TypeSpec& t throw std::runtime_error("Type redefinition"); } } +} + +std::string TP_Type::print() const { + switch (kind) { + case OBJECT_OF_TYPE: + return ts.print(); + case TYPE_OBJECT: + return fmt::format("[{}]", ts.print()); + case FALSE: + return fmt::format("[#f]"); + case NONE: + return fmt::format("[none]"); + default: + assert(false); + } +} + +std::string TypeState::print_gpr_masked(u32 mask) const { + std::string result; + for (int i = 0; i < 32; i++) { + if (mask & (1 << i)) { + result += Register(Reg::GPR, i).to_charp(); + result += ": "; + result += gpr_types[i].print(); + result += " "; + } + } + return result; +} + +TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed) { + switch (existing.kind) { + case TP_Type::OBJECT_OF_TYPE: + switch (add.kind) { + case TP_Type::OBJECT_OF_TYPE: { + // two normal types, do LCA as normal. + TP_Type result; + result.kind = TP_Type::OBJECT_OF_TYPE; + result.ts = ts.lowest_common_ancestor(existing.ts, add.ts); + *changed = (result.ts != existing.ts); + return result; + } + case TP_Type::TYPE_OBJECT: { + // normal, [type object]. Change type object to less specific "type". + TP_Type result; + result.kind = TP_Type::OBJECT_OF_TYPE; + result.ts = ts.lowest_common_ancestor(existing.ts, ts.make_typespec("type")); + *changed = (result.ts != existing.ts); + return result; + } + case TP_Type::FALSE: + // allow #f anywhere + *changed = false; + return existing; + case TP_Type::NONE: + // allow possibly undefined. + *changed = false; + return existing; + default: + assert(false); + } + break; + case TP_Type::TYPE_OBJECT: + switch (add.kind) { + case TP_Type::OBJECT_OF_TYPE: { + TP_Type result; + result.kind = TP_Type::OBJECT_OF_TYPE; + result.ts = ts.lowest_common_ancestor(ts.make_typespec("type"), add.ts); + *changed = true; // changed type + return result; + } + case TP_Type::TYPE_OBJECT: { + // two type objects. + TP_Type result; + result.kind = TP_Type::TYPE_OBJECT; + result.ts = ts.lowest_common_ancestor(existing.ts, add.ts); + *changed = (result.ts != existing.ts); + return result; + } + case TP_Type::FALSE: + // allow #f anywhere + *changed = false; + return existing; + case TP_Type::NONE: + // allow possibly undefined. + *changed = false; + return existing; + default: + assert(false); + } + break; + case TP_Type::FALSE: + switch (add.kind) { + case TP_Type::OBJECT_OF_TYPE: + *changed = true; + return add; + case TP_Type::TYPE_OBJECT: + *changed = true; + return add; + case TP_Type::FALSE: + *changed = false; + return existing; + case TP_Type::NONE: + *changed = false; + return existing; + default: + assert(false); + } + break; + case TP_Type::NONE: + switch (add.kind) { + case TP_Type::OBJECT_OF_TYPE: + case TP_Type::TYPE_OBJECT: + case TP_Type::FALSE: + case TP_Type::NONE: + *changed = false; + return existing; + default: + assert(false); + } + break; + default: + assert(false); + } +} + +bool DecompilerTypeSystem::tp_lca(TypeState* combined, const TypeState& add) { + bool result = false; + for (int i = 0; i < 32; i++) { + bool diff = false; + auto new_type = tp_lca(combined->gpr_types[i], add.gpr_types[i], &diff); + if (diff) { + result = true; + combined->gpr_types[i] = new_type; + } + } + + for (int i = 0; i < 32; i++) { + bool diff = false; + auto new_type = tp_lca(combined->fpr_types[i], add.fpr_types[i], &diff); + if (diff) { + result = true; + combined->fpr_types[i] = new_type; + } + } + + return result; } \ No newline at end of file diff --git a/decompiler/util/DecompilerTypeSystem.h b/decompiler/util/DecompilerTypeSystem.h index 625391464..01f9e5d1a 100644 --- a/decompiler/util/DecompilerTypeSystem.h +++ b/decompiler/util/DecompilerTypeSystem.h @@ -3,6 +3,20 @@ #include "common/type_system/TypeSystem.h" +struct TP_Type { + enum Kind { OBJECT_OF_TYPE, TYPE_OBJECT, FALSE, NONE } kind = NONE; + // in the case that we are type_object, just store the type name in a single arg ts. + TypeSpec ts; + std::string print() const; +}; + +struct TypeState { + TP_Type gpr_types[32]; + TP_Type fpr_types[32]; + + std::string print_gpr_masked(u32 mask) const; +}; + class DecompilerTypeSystem { public: DecompilerTypeSystem(); @@ -25,15 +39,14 @@ class DecompilerTypeSystem { } void add_symbol(const std::string& name, const TypeSpec& type_spec); - void parse_type_defs(const std::vector& file_path); - void add_type_flags(const std::string& name, u64 flags); void add_type_parent(const std::string& child, const std::string& parent); - std::string dump_symbol_types(); std::string lookup_parent_from_inspects(const std::string& child) const; bool lookup_flags(const std::string& type, u64* dest) const; + TP_Type tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed); + bool tp_lca(TypeState* combined, const TypeState& add); }; #endif // JAK_DECOMPILERTYPESYSTEM_H