/*! * @file ObjectFileDB_IR2.cpp * This runs the IR2 analysis passes. */ #include "ObjectFileDB.h" #include "common/formatter/formatter.h" #include "common/goos/PrettyPrinter.h" #include "common/link_types.h" #include "common/log/log.h" #include "common/util/FileUtil.h" #include "common/util/Timer.h" #include "common/util/string_util.h" #include "decompiler/IR2/Form.h" #include "decompiler/analysis/analyze_inspect_method.h" #include "decompiler/analysis/cfg_builder.h" #include "decompiler/analysis/expression_build.h" #include "decompiler/analysis/final_output.h" #include "decompiler/analysis/find_defpartgroup.h" #include "decompiler/analysis/find_defstates.h" #include "decompiler/analysis/find_skelgroups.h" #include "decompiler/analysis/inline_asm_rewrite.h" #include "decompiler/analysis/insert_lets.h" #include "decompiler/analysis/label_types.h" #include "decompiler/analysis/mips2c.h" #include "decompiler/analysis/reg_usage.h" #include "decompiler/analysis/stack_spill.h" #include "decompiler/analysis/static_refs.h" #include "decompiler/analysis/symbol_def_map.h" #include "decompiler/analysis/type_analysis.h" #include "decompiler/analysis/variable_naming.h" #include "decompiler/types2/types2.h" namespace decompiler { void ObjectFileDB::process_object_file_data( ObjectFileData& data, const fs::path& output_dir, const Config& config, const std::unordered_set& skip_functions, const std::unordered_map>& skip_states) { Timer file_timer; ir2_do_segment_analysis_phase1(TOP_LEVEL_SEGMENT, config, data); ir2_do_segment_analysis_phase1(DEBUG_SEGMENT, config, data); ir2_do_segment_analysis_phase1(MAIN_SEGMENT, config, data); ir2_setup_labels(config, data); ir2_do_segment_analysis_phase2(TOP_LEVEL_SEGMENT, config, data); if (data.linked_data.functions_by_seg.size() == 3) { enum { DEFPART, DEFSTATE, DEFSKELGROUP } step = DEFPART; try { run_defpartgroup(data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).front()); step = DEFSTATE; run_defstate(data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).front(), skip_states); step = DEFSKELGROUP; run_defskelgroups(data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).front()); } catch (const std::exception& e) { switch (step) { case DEFPART: lg::error("Failed to find defpartgroups: {}", e.what()); break; case DEFSTATE: lg::error("Failed to find defstates: {}", e.what()); break; case DEFSKELGROUP: lg::error("Failed to find defskelgroups: {}", e.what()); break; } } } ir2_do_segment_analysis_phase2(DEBUG_SEGMENT, config, data); ir2_do_segment_analysis_phase2(MAIN_SEGMENT, config, data); ir2_insert_anonymous_functions(DEBUG_SEGMENT, data); ir2_insert_anonymous_functions(MAIN_SEGMENT, data); ir2_insert_anonymous_functions(TOP_LEVEL_SEGMENT, data); ir2_run_mips2c(config, data); ir2_symbol_definition_map(data); // TODO - insert the game_name into the import line automatically // instead of `goal_src/jak1/import/something.gc` // just `import/something.gc` // // Can be relative to the root of the source directory const auto& imports_it = config.import_deps_by_file.find(data.to_unique_name()); std::vector imports; if (imports_it != config.import_deps_by_file.end()) { imports = imports_it->second; } if (!output_dir.string().empty()) { ir2_write_results(output_dir, config, imports, data); } else { data.output_with_skips = ir2_final_out(data, imports, skip_functions); data.full_output = ir2_final_out(data, imports, {}); } if (!config.generate_all_types) { // this frees ir2 memory, but means future passes can't look back on this function. for_each_function_def_order_in_obj(data, [&](Function& f, int) { f.ir2 = {}; }); } else { for_each_function_def_order_in_obj(data, [&](Function& f, int seg) { if (seg == TOP_LEVEL_SEGMENT) { return; // keep top-levels } if (f.guessed_name.kind == FunctionName::FunctionKind::METHOD && f.guessed_name.method_id == GOAL_INSPECT_METHOD) { return; // keep inspects } // otherwise free memory f.ir2 = {}; }); } lg::info("Done in {:.2f}ms", file_timer.getMs()); } /*! * Main IR2 analysis pass. * At this point, we assume that the files are loaded and we've run find_code to locate all * functions, but nothing else. */ void ObjectFileDB::analyze_functions_ir2( const fs::path& output_dir, const Config& config, const std::optional> prefile_callback, const std::optional> postfile_callback, const std::unordered_set& skip_functions, const std::unordered_map>& skip_states) { int total_file_count = 0; for (auto& f : obj_files_by_name) { total_file_count += f.second.size(); } int file_idx = 1; for_each_obj([&](ObjectFileData& data) { if (prefile_callback) { prefile_callback.value()(data.to_unique_name()); } lg::info("[{:3d}/{}]------ {}", file_idx++, total_file_count, data.to_unique_name()); process_object_file_data(data, output_dir, config, skip_functions, skip_states); if (postfile_callback) { postfile_callback.value()(); } }); lg::info("{}", stats.let.print()); if (config.generate_symbol_definition_map) { lg::info("Generating symbol definition map..."); map_builder.build_map(); std::string result = map_builder.convert_to_json(); file_util::write_text_file(output_dir / "symbol_map.json", result); } } void ObjectFileDB::ir2_do_segment_analysis_phase1(int seg, const Config& config, ObjectFileData& data) { ir2_basic_block_pass(seg, config, data); ir2_stack_spill_slot_pass(seg, data); ir2_atomic_op_pass(seg, config, data); } void ObjectFileDB::ir2_do_segment_analysis_phase2(int seg, const Config& config, ObjectFileData& data) { ir2_type_analysis_pass(seg, config, data); ir2_register_usage_pass(seg, data); ir2_variable_pass(seg, data); ir2_cfg_build_pass(seg, data); ir2_build_expressions(seg, config, data); ir2_rewrite_inline_asm_instructions(seg, data); ir2_insert_lets(seg, data); ir2_add_store_errors(seg, data); } void ObjectFileDB::ir2_setup_labels(const Config& config, ObjectFileData& data) { if (data.linked_data.segments == 3) { std::unordered_map config_labels; auto config_it = config.label_types.find(data.to_unique_name()); if (config_it != config.label_types.end()) { config_labels = config_it->second; } try { data.linked_data.label_db = std::make_unique(config_labels, data.linked_data.labels, dts); analyze_labels(data.linked_data.label_db.get(), &data.linked_data); } catch (const std::exception& e) { lg::die("Error parsing labels for {}: {}", data.to_unique_name(), e.what()); } } } void ObjectFileDB::ir2_run_mips2c(const Config& config, ObjectFileData& data) { for_each_function_def_order_in_obj(data, [&](Function& func, int) { if (config.hacks.mips2c_functions_by_name.count(func.name())) { lg::info("MIPS2C on {}", func.name()); run_mips2c(&func, config.game_version); } auto it = config.hacks.mips2c_jump_table_functions.find(func.name()); if (it != config.hacks.mips2c_jump_table_functions.end()) { run_mips2c_jump_table(&func, it->second, config.game_version); } }); } /*! * Analyze the top level function of each object. * - Find global function definitions * - Find type definitions * - Find method definitions * - Warn for non-unique function names. */ void ObjectFileDB::ir2_top_level_pass(const Config& config) { Timer timer; int total_functions = 0; int total_named_global_functions = 0; int total_methods = 0; int total_top_levels = 0; int total_unknowns = 0; for_each_obj([&](ObjectFileData& data) { if (data.linked_data.segments == 3) { // the top level segment should have a single function ASSERT(data.linked_data.functions_by_seg.at(2).size() == 1); auto& func = data.linked_data.functions_by_seg.at(2).front(); ASSERT(func.guessed_name.empty()); func.guessed_name.set_as_top_level(data.to_unique_name()); func.find_global_function_defs(data.linked_data, dts); func.find_type_defs(data.linked_data, dts); func.find_method_defs(data.linked_data, dts); } }); // check for function uniqueness. std::unordered_set unique_names; std::unordered_map> duplicated_functions; int uid = 1; for_each_obj([&](ObjectFileData& data) { int func_in_obj = 0; for (int segment_id = 0; segment_id < int(data.linked_data.segments); segment_id++) { for (auto& func : data.linked_data.functions_by_seg.at(segment_id)) { func.guessed_name.unique_id = uid++; func.guessed_name.id_in_object = func_in_obj++; func.guessed_name.object_name = data.to_unique_name(); auto name = func.name(); switch (func.guessed_name.kind) { case FunctionName::FunctionKind::METHOD: total_methods++; break; case FunctionName::FunctionKind::GLOBAL: total_named_global_functions++; break; case FunctionName::FunctionKind::TOP_LEVEL_INIT: total_top_levels++; break; case FunctionName::FunctionKind::UNIDENTIFIED: total_unknowns++; break; default: ASSERT(false); } total_functions++; if (unique_names.find(name) != unique_names.end()) { duplicated_functions[name].insert(data.to_unique_name()); } unique_names.insert(name); TypeSpec ts; if (lookup_function_type(func.guessed_name, data.to_unique_name(), config, &ts)) { func.type = ts; } else { func.type = TypeSpec("function"); } if (config.hacks.mips2c_functions_by_name.find(name) != config.hacks.mips2c_functions_by_name.end()) { func.warnings.info("Flagged as mips2c by config"); func.suspected_asm = true; } else if (config.hacks.asm_functions_by_name.find(name) != config.hacks.asm_functions_by_name.end()) { func.warnings.error("Flagged as asm by config"); func.suspected_asm = true; } } } }); // we remember duplicates like this so we can warn on all occurances of the duplicate name for_each_function([&](Function& func, int segment_id, ObjectFileData& data) { (void)segment_id; auto name = func.name(); if (duplicated_functions.find(name) != duplicated_functions.end()) { duplicated_functions[name].insert(data.to_unique_name()); func.warnings.info("this function exists in multiple non-identical object files"); } }); lg::info("Found a total of {} functions in {:.2f} ms", total_functions, timer.getMs()); lg::info("{:4d} unknown {:.2f}%", total_unknowns, 100.f * total_unknowns / total_functions); lg::info("{:4d} global {:.2f}%", total_named_global_functions, 100.f * total_named_global_functions / total_functions); lg::info("{:4d} methods {:.2f}%", total_methods, 100.f * total_methods / total_functions); lg::info("{:4d} logins {:.2f}%", total_top_levels, 100.f * total_top_levels / total_functions); } void ObjectFileDB::ir2_analyze_all_types(const fs::path& output_file, const std::optional& previous_game_types, const std::unordered_set& bad_types) { auto is_code_file = [](ObjectFileData& data) { return (data.obj_version == 3 || (data.obj_version == 5 && data.linked_data.has_any_functions())); }; std::unordered_map per_object; DecompilerTypeSystem previous_game_ts(GameVersion::Jak2); // version here doesn't matter. if (previous_game_types) { previous_game_ts.parse_type_defs({*previous_game_types}); } TypeInspectorCache ti_cache; // Do a first pass to initialize all types and symbols for_each_obj([&](ObjectFileData& data) { if (is_code_file(data)) { per_object[data.to_unique_name()] = PerObjectAllTypeInfo(); // Go through the top-level segment first to identify the type names associated with each // symbol def for_each_function_in_seg_in_obj(TOP_LEVEL_SEGMENT, data, [&](Function& f) { inspect_top_level_for_metadata(f, data.linked_data, dts, previous_game_ts, per_object.at(data.to_unique_name())); }); } }); // Guess at non-virtual state type's: // // Collect all type names, since the DTS doesn't know the actual type tree (all-types is empty!) // we can't filter by what is actually a process type (with existing code). std::unordered_map> all_type_names; for (auto& [obj_name, obj_info] : per_object) { for (const auto& type_name : obj_info.type_names_in_order) { if (all_type_names.find(obj_name) == all_type_names.end()) { all_type_names[obj_name] = {}; } all_type_names[obj_name].push_back(type_name); } } std::unordered_map state_to_type_map; for (auto& [obj_name, obj_info] : per_object) { for (const auto& [sym_name, sym_type] : obj_info.symbol_types) { if (sym_type == "state") { int longest_match_length = 0; std::string longest_match = ""; std::string longest_match_object_name = ""; // Make a best effort guess by finding the longest prefix match for (const auto& [obj_name, type_names] : all_type_names) { for (const auto& type_name : type_names) { if (str_util::starts_with(sym_name, type_name) && (int)type_name.length() > longest_match_length) { longest_match_length = type_name.length(); longest_match = type_name; longest_match_object_name = obj_name; } } } if (longest_match != "") { if (per_object.find(longest_match_object_name) != per_object.end()) { per_object.at(longest_match_object_name).non_virtual_state_guesses[sym_name] = longest_match; obj_info.already_seen_symbols.insert(sym_name); } } } } } // Then another to actually setup the definitions for_each_obj([&](ObjectFileData& data) { if (is_code_file(data)) { auto& object_result = per_object.at(data.to_unique_name()); // Handle the top level last, which is fine as all symbol_defs are always written after // typedefs for_each_function_def_order_in_obj(data, [&](Function& f, int seg) { if (seg != TOP_LEVEL_SEGMENT) { if (f.is_inspect_method && bad_types.find(f.guessed_name.type_name) == bad_types.end()) { auto deftype_from_inspect = inspect_inspect_method(f, f.guessed_name.type_name, dts, data.linked_data, previous_game_ts, ti_cache, object_result); bool already_seen = object_result.type_info.count(f.guessed_name.type_name) > 0; if (!already_seen) { object_result.type_names_in_order.push_back(f.guessed_name.type_name); } auto& info = object_result.type_info[f.guessed_name.type_name]; info.from_inspect_method = true; info.type_definition = deftype_from_inspect; } else { // no inspect methods // - can we solve custom print methods in a generic way? ie `entity-links` } } }); for_each_function_in_seg_in_obj(TOP_LEVEL_SEGMENT, data, [&](Function& f) { object_result.symbol_defs += inspect_top_level_symbol_defines( f, data.linked_data, dts, previous_game_ts, object_result); }); } }); // Output result std::string result; result += ";; All Types\n\n"; for_each_obj([&](ObjectFileData& data) { if (is_code_file(data)) { auto& obj = per_object.at(data.to_unique_name()); result += fmt::format(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"); result += fmt::format(";; {:30s} ;;\n", data.name_in_dgo); result += fmt::format(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n\n"); for (const auto& type_name : obj.type_names_in_order) { auto& info = obj.type_info.at(type_name); result += info.type_definition; result += "\n"; } result += obj.symbol_defs; result += "\n"; } }); file_util::write_text_file(output_file, result); } /*! * Initial Function Analysis Pass to build the control flow graph. * - Find basic blocks * - Analyze prologue and epilogue * - Build control flow graph */ void ObjectFileDB::ir2_basic_block_pass(int seg, const Config& config, ObjectFileData& data) { for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { func.ir2.env.file = &data.linked_data; func.ir2.env.dts = &dts; func.ir2.env.func = &func; // first, find basic blocks. auto blocks = find_blocks_in_function(data.linked_data, seg, func); func.basic_blocks = blocks; if (!func.suspected_asm) { // find the prologue/epilogue so they can be excluded from basic blocks. func.analyze_prologue(data.linked_data); } else { // manually exclude the type tag from the basic block. ASSERT(func.basic_blocks.front().start_word == 0); ASSERT(func.basic_blocks.front().end_word >= 1); func.basic_blocks.front().start_word = 1; } if (!func.suspected_asm) { // run analysis // build a control flow graph, just looking at branch instructions. CondWithElseLengthHack hack; auto lookup = config.hacks.cond_with_else_len_by_func_name.find(func.name()); if (lookup != config.hacks.cond_with_else_len_by_func_name.end()) { hack = lookup->second; } std::unordered_set asm_br_blocks; auto asm_lookup = config.hacks.blocks_ending_in_asm_branch_by_func_name.find(func.name()); if (asm_lookup != config.hacks.blocks_ending_in_asm_branch_by_func_name.end()) { asm_br_blocks = asm_lookup->second; } func.cfg = build_cfg(data.linked_data, seg, func, hack, asm_br_blocks, config.game_version); if (!func.cfg->is_fully_resolved()) { lg::warn("Function {} from {} failed to build control flow graph!", func.name(), data.to_unique_name()); } else { func.cfg_ok = true; } } if (func.suspected_asm) { func.warnings.info("Assembly Function"); } }); } void ObjectFileDB::ir2_stack_spill_slot_pass(int seg, ObjectFileData& data) { for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { if (!func.cfg_ok) { return; } try { auto spill_map = build_spill_map(func.instructions, {func.prologue_end, func.epilogue_start}); func.ir2.env.set_stack_spills(spill_map); } catch (std::exception& e) { func.warnings.warning("stack spill failed: {}", e.what()); } }); } /*! * Conversion of MIPS instructions into AtomicOps. The AtomicOps represent what we * think are IR of the original GOAL compiler. */ void ObjectFileDB::ir2_atomic_op_pass(int seg, const Config& config, ObjectFileData& data) { for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { if (!func.cfg_ok) { return; } if (!func.suspected_asm) { func.ir2.atomic_ops_attempted = true; try { bool inline_asm = config.hacks.hint_inline_assembly_functions.find(func.name()) != config.hacks.hint_inline_assembly_functions.end(); std::unordered_set blocks_ending_in_asm_branch; auto asm_branch_it = config.hacks.blocks_ending_in_asm_branch_by_func_name.find(func.name()); if (asm_branch_it != config.hacks.blocks_ending_in_asm_branch_by_func_name.end()) { blocks_ending_in_asm_branch = asm_branch_it->second; } auto ops = convert_function_to_atomic_ops(func, data.linked_data.labels, func.warnings, inline_asm, blocks_ending_in_asm_branch, config.game_version); func.ir2.atomic_ops = std::make_shared(std::move(ops)); func.ir2.atomic_ops_succeeded = true; func.ir2.env.set_end_var(func.ir2.atomic_ops->end_op().return_var()); } catch (std::exception& e) { lg::warn("Function {} from {} could not be converted to atomic ops: {}", func.name(), data.to_unique_name(), e.what()); func.warnings.error("Failed to convert to atomic ops: {}", e.what()); } } }); } void ObjectFileDB::ir2_symbol_definition_map(ObjectFileData& data) { map_builder.add_object(data); } template Value try_lookup(const std::unordered_map& map, const Key& key) { auto lookup = map.find(key); if (lookup == map.end()) { return Value(); } else { return lookup->second; } } const std::string* find_file_override_for_art_group(const Config& config, const std::string& obj_name, const std::string& type_name) { // find file override for this type auto it_file = config.art_group_file_override.find(obj_name); if (it_file != config.art_group_file_override.end()) { auto it_type = it_file->second.find(type_name); if (it_type != it_file->second.end()) { return &it_type->second; } } return nullptr; } /*! * Analyze registers and determine the type in each register at each instruction. * - Figure out the type of each function, from configs. * - Propagate types. * - NOTE: this will update register info usage more accurately for functions. */ void ObjectFileDB::ir2_type_analysis_pass(int seg, const Config& config, ObjectFileData& data) { auto obj_name = data.to_unique_name(); for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { if (!func.suspected_asm) { TypeSpec ts; if (lookup_function_type(func.guessed_name, data.to_unique_name(), config, &ts) && func.ir2.atomic_ops_succeeded) { func.type = ts; // try type analysis here. auto func_name = func.name(); auto register_casts = try_lookup(config.register_type_casts_by_function_by_atomic_op_idx, func_name); func.ir2.env.set_type_casts(register_casts); auto stack_casts = try_lookup(config.stack_type_casts_by_function_by_stack_offset, func_name); func.ir2.env.set_stack_casts(stack_casts); if (config.hacks.pair_functions_by_name.find(func_name) != config.hacks.pair_functions_by_name.end()) { func.ir2.env.set_sloppy_pair_typing(); } if (config.hacks.reject_cond_to_value.find(func_name) != config.hacks.reject_cond_to_value.end()) { func.ir2.env.aggressively_reject_cond_to_value_rewrite = true; } func.ir2.env.set_stack_structure_hints( try_lookup(config.stack_structure_hints_by_function, func_name)); if (config.process_stack_size_overrides.find(func_name) != config.process_stack_size_overrides.end()) { func.process_stack_size = config.process_stack_size_overrides.at(func_name); } if (func.guessed_name.kind == FunctionName::FunctionKind::V_STATE) { if (config.art_group_type_remap.find(func.guessed_name.type_name) != config.art_group_type_remap.end()) { auto ag_override = find_file_override_for_art_group(config, obj_name, func.guessed_name.type_name); func.ir2.env.set_art_group( ag_override ? *ag_override : config.art_group_type_remap.at(func.guessed_name.type_name)); } else { func.ir2.env.set_art_group(func.guessed_name.type_name + "-ag"); } } else if (func.guessed_name.kind == FunctionName::FunctionKind::NV_STATE || func.type.try_get_tag("behavior").has_value()) { std::string type = func.type.get_tag("behavior"); if (config.art_group_type_remap.find(type) != config.art_group_type_remap.end()) { auto ag_override = find_file_override_for_art_group(config, obj_name, type); func.ir2.env.set_art_group(ag_override ? *ag_override : config.art_group_type_remap.at(type)); } else { func.ir2.env.set_art_group(type + "-ag"); } } else { func.ir2.env.set_art_group(obj_name + "-ag"); } func.ir2.env.set_jg(func.ir2.env.art_group()); if (config.joint_node_hacks.find(func.ir2.env.art_group()) != config.joint_node_hacks.end()) { func.ir2.env.set_jg(config.joint_node_hacks.at(func.ir2.env.art_group())); } constexpr bool kForceNewTypes = false; if (config.game_version != GameVersion::Jak1 || kForceNewTypes) { // use new types for jak 2/3 always types2::Input in; types2::Output out; in.func = &func; in.function_type = ts; in.dts = &dts; try { types2::run(out, in); func.ir2.env.set_types(out.block_init_types, out.op_end_types, *func.ir2.atomic_ops, ts); } catch (const std::exception& e) { func.warnings.error("Type analysis failed: {}", e.what()); } func.ir2.env.types_succeeded = out.succeeded; } else { // old type pass if (run_type_analysis_ir2(ts, dts, func)) { func.ir2.env.types_succeeded = true; } else { func.warnings.error("Type analysis failed"); } } } else { lg::warn("Function {} didn't know its type", func.name()); func.warnings.error("Function {} has unknown type", func.name()); } } }); } void ObjectFileDB::ir2_register_usage_pass(int seg, ObjectFileData& data) { for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { if (!func.suspected_asm && func.ir2.atomic_ops_succeeded) { func.ir2.env.set_reg_use(analyze_ir2_register_usage(func)); auto& block_0_start = func.ir2.env.reg_use().block.at(0).input; std::vector dep_regs; for (auto x : block_0_start) { dep_regs.push_back(x); } if (!dep_regs.empty()) { std::sort(dep_regs.begin(), dep_regs.end(), [](const Register& a, const Register& b) { return a.reg_id() < b.reg_id(); }); int end_valid_argument = Register(Reg::GPR, Reg::T3).reg_id() + 1; if (func.type.arg_count() > 0) { // end_valid_argument = Register::get_arg_reg(func.type.arg_count() - 1).reg_id(); end_valid_argument = Register(Reg::GPR, Reg::A0).reg_id() + func.type.arg_count() - 1; } for (auto& x : dep_regs) { if ((x.get_kind() == Reg::VF && x.get_vf() != 0) || x.get_kind() == Reg::SPECIAL) { lg::error("Bad vf dependency on {} in {}", x.to_charp(), func.name()); func.warnings.error("Bad vector register dependency: {}", x.to_string()); continue; } if (x == Register(Reg::GPR, Reg::S6) || x == Register(Reg::GPR, Reg::S7) || x == Register(Reg::GPR, Reg::SP) || x == Register(Reg::VF, 0)) { continue; } if (x.reg_id() < end_valid_argument) { continue; } lg::error("Bad register dependency on {} in {}", x.to_charp(), func.name()); if (x.to_string() == "f31") { func.warnings.warning("Function may read a register that is not set: {}", x.to_string()); } else { func.warnings.error("Function may read a register that is not set: {}", x.to_string()); } } } } }); } void ObjectFileDB::ir2_variable_pass(int seg, ObjectFileData& data) { for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { (void)data; if (!func.suspected_asm && func.ir2.atomic_ops_succeeded && func.ir2.env.has_type_analysis()) { try { auto result = run_variable_renaming(func, func.ir2.env.reg_use(), *func.ir2.atomic_ops, dts); if (result.has_value()) { func.ir2.env.set_local_vars(*result); } } catch (const std::exception& e) { lg::warn("variable pass failed on {}: {}", func.name(), e.what()); } } }); } void ObjectFileDB::ir2_cfg_build_pass(int seg, ObjectFileData& data) { Timer timer; int total = 0; int attempted = 0; int successful = 0; for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { (void)data; total++; if (!func.suspected_asm && func.ir2.atomic_ops_succeeded && func.cfg->is_fully_resolved()) { attempted++; try { build_initial_forms(func); } catch (std::exception& e) { func.warnings.error("Failed to structure: {}", e.what()); func.ir2.top_form = nullptr; } } if (func.ir2.top_form) { successful++; } }); } void ObjectFileDB::ir2_build_expressions(int seg, const Config& config, ObjectFileData& data) { for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { (void)data; if (func.ir2.top_form && func.ir2.env.has_type_analysis() && func.ir2.env.has_local_vars() && func.ir2.env.types_succeeded) { auto name = func.name(); auto arg_config = config.function_arg_names.find(name); auto var_config = config.function_var_overrides.find(name); if (convert_to_expressions(func.ir2.top_form, *func.ir2.form_pool, func, arg_config != config.function_arg_names.end() ? arg_config->second : std::vector{}, var_config != config.function_var_overrides.end() ? var_config->second : std::unordered_map{}, dts)) { func.ir2.print_debug_forms = true; func.ir2.expressions_succeeded = true; } } }); } void ObjectFileDB::ir2_insert_lets(int seg, ObjectFileData& data) { for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { if (func.ir2.expressions_succeeded) { try { insert_lets(func, func.ir2.env, *func.ir2.form_pool, func.ir2.top_form, stats.let); } catch (const std::exception& e) { const auto err = fmt::format( "Error while inserting lets: {}. Make sure that the return type is not " "none if something is actually returned.", e.what()); lg::warn("{}", err); func.warnings.error(err); } } }); } void ObjectFileDB::ir2_add_store_errors(int seg, ObjectFileData& data) { for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { if (func.ir2.expressions_succeeded && !func.warnings.has_errors()) { // print warning about failed store, but only if decompilation passes without any major // errors func.ir2.top_form->apply([&](FormElement* f) { auto as_store = dynamic_cast(f); if (as_store) { func.warnings.error("Failed store: {} at op {}", as_store->to_string(func.ir2.env), as_store->op()->op_id()); } }); } }); } void ObjectFileDB::ir2_rewrite_inline_asm_instructions(int seg, ObjectFileData& data) { for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { (void)data; if (func.ir2.top_form && func.ir2.env.has_type_analysis()) { if (rewrite_inline_asm_instructions(func.ir2.top_form, *func.ir2.form_pool, func, dts)) { func.ir2.print_debug_forms = true; } } }); } void ObjectFileDB::ir2_insert_anonymous_functions(int seg, ObjectFileData& data) { for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { (void)data; if (func.ir2.top_form && func.ir2.env.has_type_analysis()) { try { insert_static_refs(func.ir2.top_form, *func.ir2.form_pool, func, dts); } catch (std::exception& e) { func.warnings.error("Failed static ref finding: {}\n", e.what()); lg::error("Function {} failed static ref: {}\n", func.name(), e.what()); } } }); } void ObjectFileDB::ir2_write_results(const fs::path& output_dir, const Config& config, const std::vector& imports, ObjectFileData& obj) { if (obj.linked_data.has_any_functions()) { auto file_text = ir2_to_file(obj, config); auto file_name = output_dir / (obj.to_unique_name() + "_ir2.asm"); file_util::write_text_file(file_name, file_text); auto unformatted_code = ir2_final_out(obj, imports, {}); auto final_name = output_dir / (obj.to_unique_name() + "_disasm.gc"); if (config.format_code) { const auto formatted_code = formatter::format_code(unformatted_code); if (!formatted_code) { lg::error( "Was unable to format the decompiled result of {}, make a github issue. Writing " "unformatted code", obj.to_unique_name()); file_util::write_text_file(final_name, unformatted_code); } else { file_util::write_text_file(final_name, formatted_code.value()); } } else { file_util::write_text_file(final_name, unformatted_code); } } } std::string ObjectFileDB::ir2_to_file(ObjectFileData& data, const Config& config) { std::string result; auto all_types_path = file_util::get_file_path({config.all_types_file}); auto game_version = game_version_names[config.game_version]; result += fmt::format("; ALL_TYPES={}={}\n\n", game_version, all_types_path); const char* segment_names[] = {"main segment", "debug segment", "top-level segment"}; ASSERT(data.linked_data.segments <= 3); for (int seg = data.linked_data.segments; seg-- > 0;) { // segment header result += ";------------------------------------------\n; "; result += segment_names[seg]; result += "\n;------------------------------------------\n\n"; // functions for (auto& func : data.linked_data.functions_by_seg.at(seg)) { try { result += ir2_function_to_string(data, func, seg); } catch (std::exception& e) { result += "Failed to write "; result += func.name(); result += ": "; result += e.what(); result += "\n"; } if (func.ir2.top_form && func.ir2.env.has_local_vars()) { result += "\n;;-*-OpenGOAL-Start-*-\n\n"; if (func.ir2.env.has_local_vars()) { if (!func.ir2.print_debug_forms) { result += ";; expression building failed part way through, function may be weird\n"; } result += final_defun_out(func, func.ir2.env, dts); } else { result += ";; no variable information\n"; result += pretty_print::to_string(func.ir2.top_form->to_form(func.ir2.env)); } result += "\n\n;;-*-OpenGOAL-End-*-\n\n"; } else if (func.ir2.atomic_ops_succeeded) { auto& ao = func.ir2.atomic_ops; for (size_t i = 0; i < ao->ops.size(); i++) { auto& op = ao->ops.at(i); if (!dynamic_cast(op.get())) { auto instr_idx = ao->atomic_op_to_instruction.at(i); // check for a label to print auto label_id = data.linked_data.get_label_at(seg, (func.start_word + instr_idx) * 4); if (label_id != -1) { result += fmt::format("(label {})\n", data.linked_data.labels.at(label_id).name); } // check for no misaligned labels in code segments. for (int j = 1; j < 4; j++) { ASSERT(data.linked_data.get_label_at(seg, (func.start_word + instr_idx) * 4 + j) == -1); } // print assembly ops. } // print instruction result += fmt::format(" {}\n", op->to_string(func.ir2.env)); } } // print if it exists, even if it's not okay. if (config.print_cfgs && func.cfg) { result += fmt::format("Control Flow Graph:\n{}\n\n", pretty_print::to_string(func.cfg->to_form())); } if (false && func.ir2.print_debug_forms) { result += '\n'; result += ";; DEBUG OUTPUT BELOW THIS LINE:\n"; result += func.ir2.debug_form_string; result += '\n'; } result += ";; .endfunction\n\n"; } // print data for (size_t i = data.linked_data.offset_of_data_zone_by_seg.at(seg); i < data.linked_data.words_by_seg.at(seg).size(); i++) { for (int j = 0; j < 4; j++) { auto label_id = data.linked_data.get_label_at(seg, i * 4 + j); if (label_id != -1) { result += data.linked_data.labels.at(label_id).name + ":"; if (j != 0) { result += " (offset " + std::to_string(j) + ")"; } result += "\n"; } } auto& word = data.linked_data.words_by_seg[seg][i]; data.linked_data.append_word_to_string(result, word); if (word.kind() == LinkedWord::TYPE_PTR && word.symbol_name() == "string") { result += "; " + data.linked_data.get_goal_string(seg, i) + "\n"; } } result += '\n'; } return result; } namespace { void append_commented(std::string& line, bool& has_comment, const std::string& to_append, int offset = 0) { // minimum length before comment appears. constexpr int pre_comment_length = 30; // if comment overflows, how much to indent the next one constexpr int overflow_indent = 30; // pad, and add comment if (!has_comment) { if (line.length() < pre_comment_length) { line.append(pre_comment_length - line.length(), ' '); } line += ";; "; line += to_append; has_comment = true; } else { if (std::max(int(line.length()), offset) + to_append.length() > 120) { line += "\n"; line.append(overflow_indent, ' '); line += ";; "; } else { if (int(line.length()) < offset) { line.append(offset - line.length(), ' '); } line += " "; } line += to_append; } } } // namespace std::string ObjectFileDB::ir2_function_to_string(ObjectFileData& data, Function& func, int seg) { std::string result; result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"; result += "; .function " + func.name() + "\n"; result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"; result += func.prologue.to_string(2) + "\n"; if (func.guessed_name.kind == FunctionName::FunctionKind::NV_STATE || func.guessed_name.kind == FunctionName::FunctionKind::V_STATE) { result += fmt::format(" ;internal_name: {}\n", func.state_handler_as_anon_func); } if (func.warnings.has_warnings()) { result += ";; Warnings:\n" + func.warnings.get_warning_text(true) + "\n"; } /* if (func.ir2.env.has_local_vars()) { result += func.ir2.env.print_local_var_types(func.ir2.top_form); } */ bool print_atomics = func.ir2.atomic_ops_succeeded; // print each instruction in the function. bool in_delay_slot = false; int total_instructions_printed = 0; int last_instr_printed = 0; std::string line; auto print_instr_start = [&](int i) { // check for a label to print auto label_id = data.linked_data.get_label_at(seg, (func.start_word + i) * 4); if (label_id != -1) { result += data.linked_data.labels.at(label_id).name + ":\n"; } // check for no misaligned labels in code segments. for (int j = 1; j < 4; j++) { ASSERT(data.linked_data.get_label_at(seg, (func.start_word + i) * 4 + j) == -1); } // print the assembly instruction auto& instr = func.instructions.at(i); line = " " + instr.to_string(data.linked_data.labels); }; auto print_instr_end = [&](int i) { auto& instr = func.instructions.at(i); result += line; result += "\n"; // print delay slot gap if (in_delay_slot) { result += "\n"; in_delay_slot = false; } // for next time... if (gOpcodeInfo[(int)instr.kind].has_delay_slot) { in_delay_slot = true; } total_instructions_printed++; ASSERT(last_instr_printed + 1 == i); last_instr_printed = i; }; // first, print the prologue. we start at word 1 because word 0 is the type tag for (int i = 1; i < func.basic_blocks.front().start_word; i++) { print_instr_start(i); print_instr_end(i); } // next, print each basic block int end_idx = func.basic_blocks.front().start_word; for (int block_id = 0; block_id < int(func.basic_blocks.size()); block_id++) { // block number result += "B" + std::to_string(block_id) + ":\n"; auto& block = func.basic_blocks.at(block_id); const TypeState* init_types = nullptr; if (func.ir2.env.has_type_analysis()) { init_types = &func.ir2.env.get_types_at_block_entry(block_id); } int start_word = block.start_word; // if we have no prologue, skip the type tag. if (start_word == 0) { start_word = 1; } for (int instr_id = start_word; instr_id < block.end_word; instr_id++) { print_instr_start(instr_id); bool printed_comment = false; // print atomic op int op_id = -1; if (print_atomics && func.instr_starts_atomic_op(instr_id)) { auto& op = func.get_atomic_op_at_instr(instr_id); op_id = func.ir2.atomic_ops->instruction_to_atomic_op.at(instr_id); append_commented(line, printed_comment, fmt::format("[{:3d}] {}", op_id, op.to_form(data.linked_data.labels, func.ir2.env).print())); if (func.ir2.env.has_type_analysis()) { append_commented( line, printed_comment, op.reg_type_info_as_string(*init_types, func.ir2.env.get_types_after_op(op_id)), 50); } /*if (func.ir2.env.has_reg_use()) { std::string regs; for (auto r : func.ir2.env.reg_use().op.at(op_id).live_in) { regs += r.to_charp(); regs += ' '; } if (!regs.empty()) { append_commented(line, printed_comment, "lvi: " + regs, 50); } }*/ } auto& instr = func.instructions.at(instr_id); // print linked strings for (int iidx = 0; iidx < instr.n_src; iidx++) { if (instr.get_src(iidx).is_label()) { auto lab = data.linked_data.labels.at(instr.get_src(iidx).get_label()); if (data.linked_data.is_string(lab.target_segment, lab.offset)) { append_commented( line, printed_comment, data.linked_data.get_goal_string(lab.target_segment, lab.offset / 4 - 1)); } } } print_instr_end(instr_id); if (print_atomics && func.ir2.env.has_type_analysis() && func.instr_starts_atomic_op(instr_id)) { init_types = &func.ir2.env.get_types_after_op(op_id); } } end_idx = block.end_word; } for (int i = end_idx; i < func.end_word - func.start_word; i++) { print_instr_start(i); print_instr_end(i); } if (func.cfg) { if (!func.cfg->is_fully_resolved()) { result += func.cfg->to_form_string(); result += "\n"; result += func.cfg->to_dot(); result += "\n"; } } if (func.mips2c_output) { result += ";;-*-MIPS2C-Start-*-\n"; result += *func.mips2c_output; result += ";;-*-MIPS2C-End-*-\n"; } result += "\n"; ASSERT(total_instructions_printed == (func.end_word - func.start_word - 1)); return result; } /*! * Try to look up the type of a function. Looks at the decompiler type info, the hints files, * and other GOAL rules. */ bool ObjectFileDB::lookup_function_type(const FunctionName& name, const std::string& obj_name, const Config& config, TypeSpec* result) { // don't return function types that are explictly flagged as bad in config. if (config.hacks.no_type_analysis_functions_by_name.find(name.to_string()) != config.hacks.no_type_analysis_functions_by_name.end()) { return false; } if (name.kind == FunctionName::FunctionKind::GLOBAL) { // global GOAL function. auto kv = dts.symbol_types.find(name.function_name); if (kv != dts.symbol_types.end() && kv->second.arg_count() >= 1) { if (kv->second.base_type() != "function") { lg::die("Found a function named {} but the symbol has type {}", name.to_string(), kv->second.print()); } // good, found a global function with full type information. *result = kv->second; return true; } } else if (name.kind == FunctionName::FunctionKind::METHOD) { MethodInfo info; if (dts.ts.try_lookup_method(name.type_name, name.method_id, &info)) { if (info.type.arg_count() >= 1) { if (info.type.base_type() != "function") { lg::die("Found a method named {} but the symbol has type {}", name.to_string(), info.type.print()); } // substitute the _type_ for the correct type. *result = info.type.substitute_for_method_call(name.type_name); return true; } } } else if (name.kind == FunctionName::FunctionKind::TOP_LEVEL_INIT) { *result = dts.ts.make_function_typespec({}, "none"); return true; } else if (name.kind == FunctionName::FunctionKind::UNIDENTIFIED) { // try looking up the object const auto& map = config.anon_function_types_by_obj_by_id; auto obj_kv = map.find(obj_name); if (obj_kv != map.end()) { auto func_kv = obj_kv->second.find(name.get_anon_id()); if (func_kv != obj_kv->second.end()) { *result = dts.parse_type_spec(func_kv->second); return true; } } } else if (name.kind == FunctionName::FunctionKind::NV_STATE) { auto sym_type = dts.symbol_types.find(name.state_name); if (sym_type == dts.symbol_types.end()) { lg::error("Could not find symbol with name {} for state. This is likely a decompiler bug.", name.state_name); return false; } *result = get_state_handler_type(name.handler_kind, sym_type->second); return true; } else if (name.kind == FunctionName::FunctionKind::V_STATE) { auto mi = dts.ts.lookup_method(name.type_name, name.state_name); *result = get_state_handler_type(name.handler_kind, mi.type.substitute_for_method_call(name.type_name)); return true; } else { ASSERT(false); } return false; } std::string ObjectFileDB::ir2_final_out(ObjectFileData& data, const std::vector& imports, const std::unordered_set& skip_functions) { if (data.obj_version == 3 || (data.obj_version == 5 && data.linked_data.has_any_functions())) { std::string result; result += ";;-*-Lisp-*-\n"; result += "(in-package goal)\n\n"; ASSERT(data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).size() == 1); auto top_level = data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).at(0); result += write_from_top_level(top_level, dts, data.linked_data, imports, skip_functions); result += "\n\n"; return result; } else { return ";; not a code file."; } } } // namespace decompiler