jak-project/decompiler/ObjectFile/ObjectFileDB_IR2.cpp

1297 lines
48 KiB
C++
Raw Normal View History

/*!
* @file ObjectFileDB_IR2.cpp
* This runs the IR2 analysis passes.
*/
#include "ObjectFileDB.h"
#include "common/formatter/formatter.h"
#include "common/goos/PrettyPrinter.h"
#include "common/link_types.h"
#include "common/log/log.h"
#include "common/util/FileUtil.h"
#include "common/util/Timer.h"
#include "common/util/string_util.h"
#include "decompiler/IR2/Form.h"
#include "decompiler/analysis/analyze_inspect_method.h"
#include "decompiler/analysis/cfg_builder.h"
#include "decompiler/analysis/expression_build.h"
#include "decompiler/analysis/final_output.h"
#include "decompiler/analysis/find_defpartgroup.h"
#include "decompiler/analysis/find_defstates.h"
#include "decompiler/analysis/find_skelgroups.h"
#include "decompiler/analysis/inline_asm_rewrite.h"
#include "decompiler/analysis/insert_lets.h"
#include "decompiler/analysis/label_types.h"
#include "decompiler/analysis/mips2c.h"
#include "decompiler/analysis/reg_usage.h"
#include "decompiler/analysis/stack_spill.h"
#include "decompiler/analysis/static_refs.h"
#include "decompiler/analysis/symbol_def_map.h"
#include "decompiler/analysis/type_analysis.h"
#include "decompiler/analysis/variable_naming.h"
#include "decompiler/types2/types2.h"
namespace decompiler {
void ObjectFileDB::process_object_file_data(
ObjectFileData& data,
const fs::path& output_dir,
const Config& config,
const std::unordered_set<std::string>& skip_functions,
const std::unordered_map<std::string, std::unordered_set<std::string>>& skip_states) {
Timer file_timer;
ir2_do_segment_analysis_phase1(TOP_LEVEL_SEGMENT, config, data);
ir2_do_segment_analysis_phase1(DEBUG_SEGMENT, config, data);
ir2_do_segment_analysis_phase1(MAIN_SEGMENT, config, data);
ir2_setup_labels(config, data);
ir2_do_segment_analysis_phase2(TOP_LEVEL_SEGMENT, config, data);
if (data.linked_data.functions_by_seg.size() == 3) {
enum { DEFPART, DEFSTATE, DEFSKELGROUP } step = DEFPART;
try {
run_defpartgroup(data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).front());
step = DEFSTATE;
run_defstate(data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).front(), skip_states);
step = DEFSKELGROUP;
run_defskelgroups(data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).front());
} catch (const std::exception& e) {
switch (step) {
case DEFPART:
lg::error("Failed to find defpartgroups: {}", e.what());
break;
case DEFSTATE:
lg::error("Failed to find defstates: {}", e.what());
break;
case DEFSKELGROUP:
lg::error("Failed to find defskelgroups: {}", e.what());
break;
}
}
}
ir2_do_segment_analysis_phase2(DEBUG_SEGMENT, config, data);
ir2_do_segment_analysis_phase2(MAIN_SEGMENT, config, data);
ir2_insert_anonymous_functions(DEBUG_SEGMENT, data);
ir2_insert_anonymous_functions(MAIN_SEGMENT, data);
ir2_insert_anonymous_functions(TOP_LEVEL_SEGMENT, data);
ir2_run_mips2c(config, data);
ir2_symbol_definition_map(data);
// TODO - insert the game_name into the import line automatically
// instead of `goal_src/jak1/import/something.gc`
// just `import/something.gc`
//
// Can be relative to the root of the source directory
const auto& imports_it = config.import_deps_by_file.find(data.to_unique_name());
std::vector<std::string> imports;
if (imports_it != config.import_deps_by_file.end()) {
imports = imports_it->second;
}
if (!output_dir.string().empty()) {
ir2_write_results(output_dir, config, imports, data);
} else {
data.output_with_skips = ir2_final_out(data, imports, skip_functions);
data.full_output = ir2_final_out(data, imports, {});
}
if (!config.generate_all_types) {
// this frees ir2 memory, but means future passes can't look back on this function.
for_each_function_def_order_in_obj(data, [&](Function& f, int) { f.ir2 = {}; });
} else {
for_each_function_def_order_in_obj(data, [&](Function& f, int seg) {
if (seg == TOP_LEVEL_SEGMENT) {
return; // keep top-levels
}
if (f.guessed_name.kind == FunctionName::FunctionKind::METHOD &&
f.guessed_name.method_id == GOAL_INSPECT_METHOD) {
return; // keep inspects
}
// otherwise free memory
f.ir2 = {};
});
}
lg::info("Done in {:.2f}ms", file_timer.getMs());
}
/*!
* Main IR2 analysis pass.
* At this point, we assume that the files are loaded and we've run find_code to locate all
* functions, but nothing else.
*/
decomp: finish _almost all of_ the remaining camera code (#845) * decomp: mostly finish `cam-master` * decomp/scripts: lots of work in cam-states * stash * Merge remote-tracking branch 'water111/master' into decomp/camera-master Updated submodule third-party/googletest * decompiler: Add support for non power of 2 offsets for inline arr access * decomp: mostly finish `cam-states` need to fix a macro issue * blocked: `cam-master` decompiler crash when adding casts * decomp: finish `cam-states-dbg` * decomp: mostly finish `pov-camera` with the exception of joint-related code * decomp: `cam-debug` finished decompiling, no way does this compile yet though * decomp: considerable work done in `cam-layout` * decomp: `cam-layout` almost done! * decomp: `pov-camera` finished, TC tests will fail for now * decomp: working on resolving issues * decomp: cam-layout decompiling * fixing more issues in cam-master...one event handler remains * skip problematic function in `cam-master` for now * gsrc: update res macros * decomp: finish `cam-states` * decomp: giving up on `cam-debug` * tests: allow skipping state handlers in ref tests * decomp: working through cam-layout bugs * decomp: allow for shifting non-integers * decomp: finalize `cam-layout` and `cam-master` * decomp: finalize `cam-states` * cleanup: bi-annual formatting of the casting files * formatting * address feedback - leave the float labels alone for now * address feedback * linting/formatting * update gsrc and ref tests Co-authored-by: ManDude <7569514+ManDude@users.noreply.github.com>
2021-10-16 21:01:23 -04:00
void ObjectFileDB::analyze_functions_ir2(
const fs::path& output_dir,
decomp: finish _almost all of_ the remaining camera code (#845) * decomp: mostly finish `cam-master` * decomp/scripts: lots of work in cam-states * stash * Merge remote-tracking branch 'water111/master' into decomp/camera-master Updated submodule third-party/googletest * decompiler: Add support for non power of 2 offsets for inline arr access * decomp: mostly finish `cam-states` need to fix a macro issue * blocked: `cam-master` decompiler crash when adding casts * decomp: finish `cam-states-dbg` * decomp: mostly finish `pov-camera` with the exception of joint-related code * decomp: `cam-debug` finished decompiling, no way does this compile yet though * decomp: considerable work done in `cam-layout` * decomp: `cam-layout` almost done! * decomp: `pov-camera` finished, TC tests will fail for now * decomp: working on resolving issues * decomp: cam-layout decompiling * fixing more issues in cam-master...one event handler remains * skip problematic function in `cam-master` for now * gsrc: update res macros * decomp: finish `cam-states` * decomp: giving up on `cam-debug` * tests: allow skipping state handlers in ref tests * decomp: working through cam-layout bugs * decomp: allow for shifting non-integers * decomp: finalize `cam-layout` and `cam-master` * decomp: finalize `cam-states` * cleanup: bi-annual formatting of the casting files * formatting * address feedback - leave the float labels alone for now * address feedback * linting/formatting * update gsrc and ref tests Co-authored-by: ManDude <7569514+ManDude@users.noreply.github.com>
2021-10-16 21:01:23 -04:00
const Config& config,
const std::optional<std::function<void(std::string)>> prefile_callback,
const std::optional<std::function<void()>> postfile_callback,
decomp: finish _almost all of_ the remaining camera code (#845) * decomp: mostly finish `cam-master` * decomp/scripts: lots of work in cam-states * stash * Merge remote-tracking branch 'water111/master' into decomp/camera-master Updated submodule third-party/googletest * decompiler: Add support for non power of 2 offsets for inline arr access * decomp: mostly finish `cam-states` need to fix a macro issue * blocked: `cam-master` decompiler crash when adding casts * decomp: finish `cam-states-dbg` * decomp: mostly finish `pov-camera` with the exception of joint-related code * decomp: `cam-debug` finished decompiling, no way does this compile yet though * decomp: considerable work done in `cam-layout` * decomp: `cam-layout` almost done! * decomp: `pov-camera` finished, TC tests will fail for now * decomp: working on resolving issues * decomp: cam-layout decompiling * fixing more issues in cam-master...one event handler remains * skip problematic function in `cam-master` for now * gsrc: update res macros * decomp: finish `cam-states` * decomp: giving up on `cam-debug` * tests: allow skipping state handlers in ref tests * decomp: working through cam-layout bugs * decomp: allow for shifting non-integers * decomp: finalize `cam-layout` and `cam-master` * decomp: finalize `cam-states` * cleanup: bi-annual formatting of the casting files * formatting * address feedback - leave the float labels alone for now * address feedback * linting/formatting * update gsrc and ref tests Co-authored-by: ManDude <7569514+ManDude@users.noreply.github.com>
2021-10-16 21:01:23 -04:00
const std::unordered_set<std::string>& skip_functions,
const std::unordered_map<std::string, std::unordered_set<std::string>>& skip_states) {
int total_file_count = 0;
for (auto& f : obj_files_by_name) {
total_file_count += f.second.size();
}
int file_idx = 1;
for_each_obj([&](ObjectFileData& data) {
if (prefile_callback) {
prefile_callback.value()(data.to_unique_name());
}
lg::info("[{:3d}/{}]------ {}", file_idx++, total_file_count, data.to_unique_name());
process_object_file_data(data, output_dir, config, skip_functions, skip_states);
if (postfile_callback) {
postfile_callback.value()();
}
});
lg::info("{}", stats.let.print());
if (config.generate_symbol_definition_map) {
lg::info("Generating symbol definition map...");
map_builder.build_map();
std::string result = map_builder.convert_to_json();
file_util::write_text_file(output_dir / "symbol_map.json", result);
}
}
void ObjectFileDB::ir2_do_segment_analysis_phase1(int seg,
const Config& config,
ObjectFileData& data) {
ir2_basic_block_pass(seg, config, data);
ir2_stack_spill_slot_pass(seg, data);
ir2_atomic_op_pass(seg, config, data);
}
void ObjectFileDB::ir2_do_segment_analysis_phase2(int seg,
const Config& config,
ObjectFileData& data) {
ir2_type_analysis_pass(seg, config, data);
ir2_register_usage_pass(seg, data);
ir2_variable_pass(seg, data);
ir2_cfg_build_pass(seg, data);
ir2_build_expressions(seg, config, data);
ir2_rewrite_inline_asm_instructions(seg, data);
ir2_insert_lets(seg, data);
ir2_add_store_errors(seg, data);
}
void ObjectFileDB::ir2_setup_labels(const Config& config, ObjectFileData& data) {
if (data.linked_data.segments == 3) {
std::unordered_map<std::string, LabelConfigInfo> config_labels;
auto config_it = config.label_types.find(data.to_unique_name());
if (config_it != config.label_types.end()) {
config_labels = config_it->second;
}
try {
data.linked_data.label_db =
std::make_unique<LabelDB>(config_labels, data.linked_data.labels, dts);
analyze_labels(data.linked_data.label_db.get(), &data.linked_data);
} catch (const std::exception& e) {
lg::die("Error parsing labels for {}: {}", data.to_unique_name(), e.what());
}
}
}
void ObjectFileDB::ir2_run_mips2c(const Config& config, ObjectFileData& data) {
for_each_function_def_order_in_obj(data, [&](Function& func, int) {
if (config.hacks.mips2c_functions_by_name.count(func.name())) {
lg::info("MIPS2C on {}", func.name());
run_mips2c(&func, config.game_version);
}
auto it = config.hacks.mips2c_jump_table_functions.find(func.name());
if (it != config.hacks.mips2c_jump_table_functions.end()) {
run_mips2c_jump_table(&func, it->second, config.game_version);
}
});
}
/*!
* Analyze the top level function of each object.
* - Find global function definitions
* - Find type definitions
* - Find method definitions
* - Warn for non-unique function names.
*/
void ObjectFileDB::ir2_top_level_pass(const Config& config) {
Timer timer;
int total_functions = 0;
int total_named_global_functions = 0;
int total_methods = 0;
int total_top_levels = 0;
int total_unknowns = 0;
for_each_obj([&](ObjectFileData& data) {
if (data.linked_data.segments == 3) {
// the top level segment should have a single function
ASSERT(data.linked_data.functions_by_seg.at(2).size() == 1);
auto& func = data.linked_data.functions_by_seg.at(2).front();
ASSERT(func.guessed_name.empty());
func.guessed_name.set_as_top_level(data.to_unique_name());
func.find_global_function_defs(data.linked_data, dts);
func.find_type_defs(data.linked_data, dts);
func.find_method_defs(data.linked_data, dts);
}
});
// check for function uniqueness.
std::unordered_set<std::string> unique_names;
std::unordered_map<std::string, std::unordered_set<std::string>> duplicated_functions;
int uid = 1;
for_each_obj([&](ObjectFileData& data) {
int func_in_obj = 0;
for (int segment_id = 0; segment_id < int(data.linked_data.segments); segment_id++) {
for (auto& func : data.linked_data.functions_by_seg.at(segment_id)) {
func.guessed_name.unique_id = uid++;
func.guessed_name.id_in_object = func_in_obj++;
func.guessed_name.object_name = data.to_unique_name();
auto name = func.name();
switch (func.guessed_name.kind) {
case FunctionName::FunctionKind::METHOD:
total_methods++;
break;
case FunctionName::FunctionKind::GLOBAL:
total_named_global_functions++;
break;
case FunctionName::FunctionKind::TOP_LEVEL_INIT:
total_top_levels++;
break;
case FunctionName::FunctionKind::UNIDENTIFIED:
total_unknowns++;
break;
default:
ASSERT(false);
}
total_functions++;
if (unique_names.find(name) != unique_names.end()) {
duplicated_functions[name].insert(data.to_unique_name());
}
unique_names.insert(name);
TypeSpec ts;
if (lookup_function_type(func.guessed_name, data.to_unique_name(), config, &ts)) {
func.type = ts;
} else {
func.type = TypeSpec("function");
}
if (config.hacks.mips2c_functions_by_name.find(name) !=
config.hacks.mips2c_functions_by_name.end()) {
func.warnings.info("Flagged as mips2c by config");
func.suspected_asm = true;
} else if (config.hacks.asm_functions_by_name.find(name) !=
config.hacks.asm_functions_by_name.end()) {
func.warnings.error("Flagged as asm by config");
func.suspected_asm = true;
}
}
}
});
// we remember duplicates like this so we can warn on all occurances of the duplicate name
for_each_function([&](Function& func, int segment_id, ObjectFileData& data) {
(void)segment_id;
auto name = func.name();
if (duplicated_functions.find(name) != duplicated_functions.end()) {
duplicated_functions[name].insert(data.to_unique_name());
func.warnings.info("this function exists in multiple non-identical object files");
}
});
lg::info("Found a total of {} functions in {:.2f} ms", total_functions, timer.getMs());
lg::info("{:4d} unknown {:.2f}%", total_unknowns, 100.f * total_unknowns / total_functions);
lg::info("{:4d} global {:.2f}%", total_named_global_functions,
100.f * total_named_global_functions / total_functions);
lg::info("{:4d} methods {:.2f}%", total_methods, 100.f * total_methods / total_functions);
lg::info("{:4d} logins {:.2f}%", total_top_levels, 100.f * total_top_levels / total_functions);
}
void ObjectFileDB::ir2_analyze_all_types(const fs::path& output_file,
const std::optional<std::string>& previous_game_types,
const std::unordered_set<std::string>& bad_types) {
auto is_code_file = [](ObjectFileData& data) {
return (data.obj_version == 3 ||
(data.obj_version == 5 && data.linked_data.has_any_functions()));
};
std::unordered_map<std::string, PerObjectAllTypeInfo> per_object;
DecompilerTypeSystem previous_game_ts(GameVersion::Jak2); // version here doesn't matter.
if (previous_game_types) {
previous_game_ts.parse_type_defs({*previous_game_types});
}
TypeInspectorCache ti_cache;
// Do a first pass to initialize all types and symbols
for_each_obj([&](ObjectFileData& data) {
if (is_code_file(data)) {
per_object[data.to_unique_name()] = PerObjectAllTypeInfo();
// Go through the top-level segment first to identify the type names associated with each
// symbol def
for_each_function_in_seg_in_obj(TOP_LEVEL_SEGMENT, data, [&](Function& f) {
inspect_top_level_for_metadata(f, data.linked_data, dts, previous_game_ts,
per_object.at(data.to_unique_name()));
});
}
});
// Guess at non-virtual state type's:
//
// Collect all type names, since the DTS doesn't know the actual type tree (all-types is empty!)
// we can't filter by what is actually a process type (with existing code).
std::unordered_map<std::string, std::vector<std::string>> all_type_names;
for (auto& [obj_name, obj_info] : per_object) {
for (const auto& type_name : obj_info.type_names_in_order) {
if (all_type_names.find(obj_name) == all_type_names.end()) {
all_type_names[obj_name] = {};
}
all_type_names[obj_name].push_back(type_name);
}
}
std::unordered_map<std::string, std::string> state_to_type_map;
for (auto& [obj_name, obj_info] : per_object) {
for (const auto& [sym_name, sym_type] : obj_info.symbol_types) {
if (sym_type == "state") {
int longest_match_length = 0;
std::string longest_match = "";
std::string longest_match_object_name = "";
// Make a best effort guess by finding the longest prefix match
for (const auto& [obj_name, type_names] : all_type_names) {
for (const auto& type_name : type_names) {
if (str_util::starts_with(sym_name, type_name) &&
(int)type_name.length() > longest_match_length) {
longest_match_length = type_name.length();
longest_match = type_name;
longest_match_object_name = obj_name;
}
}
}
if (longest_match != "") {
if (per_object.find(longest_match_object_name) != per_object.end()) {
per_object.at(longest_match_object_name).non_virtual_state_guesses[sym_name] =
longest_match;
obj_info.already_seen_symbols.insert(sym_name);
}
}
}
}
}
// Then another to actually setup the definitions
for_each_obj([&](ObjectFileData& data) {
if (is_code_file(data)) {
auto& object_result = per_object.at(data.to_unique_name());
// Handle the top level last, which is fine as all symbol_defs are always written after
// typedefs
for_each_function_def_order_in_obj(data, [&](Function& f, int seg) {
if (seg != TOP_LEVEL_SEGMENT) {
if (f.is_inspect_method && bad_types.find(f.guessed_name.type_name) == bad_types.end()) {
auto deftype_from_inspect =
inspect_inspect_method(f, f.guessed_name.type_name, dts, data.linked_data,
previous_game_ts, ti_cache, object_result);
bool already_seen = object_result.type_info.count(f.guessed_name.type_name) > 0;
if (!already_seen) {
object_result.type_names_in_order.push_back(f.guessed_name.type_name);
}
auto& info = object_result.type_info[f.guessed_name.type_name];
info.from_inspect_method = true;
info.type_definition = deftype_from_inspect;
} else {
// no inspect methods
// - can we solve custom print methods in a generic way? ie `entity-links`
}
}
});
for_each_function_in_seg_in_obj(TOP_LEVEL_SEGMENT, data, [&](Function& f) {
object_result.symbol_defs += inspect_top_level_symbol_defines(
f, data.linked_data, dts, previous_game_ts, object_result);
});
}
});
// Output result
std::string result;
result += ";; All Types\n\n";
for_each_obj([&](ObjectFileData& data) {
if (is_code_file(data)) {
auto& obj = per_object.at(data.to_unique_name());
result += fmt::format(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n");
result += fmt::format(";; {:30s} ;;\n", data.name_in_dgo);
result += fmt::format(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n\n");
for (const auto& type_name : obj.type_names_in_order) {
auto& info = obj.type_info.at(type_name);
result += info.type_definition;
result += "\n";
}
result += obj.symbol_defs;
result += "\n";
}
});
file_util::write_text_file(output_file, result);
}
/*!
* Initial Function Analysis Pass to build the control flow graph.
* - Find basic blocks
* - Analyze prologue and epilogue
* - Build control flow graph
*/
void ObjectFileDB::ir2_basic_block_pass(int seg, const Config& config, ObjectFileData& data) {
for_each_function_in_seg_in_obj(seg, data, [&](Function& func) {
func.ir2.env.file = &data.linked_data;
func.ir2.env.dts = &dts;
func.ir2.env.func = &func;
// first, find basic blocks.
auto blocks = find_blocks_in_function(data.linked_data, seg, func);
func.basic_blocks = blocks;
if (!func.suspected_asm) {
// find the prologue/epilogue so they can be excluded from basic blocks.
func.analyze_prologue(data.linked_data);
} else {
// manually exclude the type tag from the basic block.
ASSERT(func.basic_blocks.front().start_word == 0);
ASSERT(func.basic_blocks.front().end_word >= 1);
func.basic_blocks.front().start_word = 1;
}
if (!func.suspected_asm) {
// run analysis
// build a control flow graph, just looking at branch instructions.
CondWithElseLengthHack hack;
auto lookup = config.hacks.cond_with_else_len_by_func_name.find(func.name());
if (lookup != config.hacks.cond_with_else_len_by_func_name.end()) {
hack = lookup->second;
}
std::unordered_set<int> asm_br_blocks;
auto asm_lookup = config.hacks.blocks_ending_in_asm_branch_by_func_name.find(func.name());
if (asm_lookup != config.hacks.blocks_ending_in_asm_branch_by_func_name.end()) {
asm_br_blocks = asm_lookup->second;
}
func.cfg = build_cfg(data.linked_data, seg, func, hack, asm_br_blocks, config.game_version);
if (!func.cfg->is_fully_resolved()) {
lg::warn("Function {} from {} failed to build control flow graph!", func.name(),
data.to_unique_name());
} else {
func.cfg_ok = true;
}
}
if (func.suspected_asm) {
func.warnings.info("Assembly Function");
}
});
}
void ObjectFileDB::ir2_stack_spill_slot_pass(int seg, ObjectFileData& data) {
for_each_function_in_seg_in_obj(seg, data, [&](Function& func) {
if (!func.cfg_ok) {
return;
}
try {
auto spill_map = build_spill_map(func.instructions, {func.prologue_end, func.epilogue_start});
func.ir2.env.set_stack_spills(spill_map);
} catch (std::exception& e) {
func.warnings.warning("stack spill failed: {}", e.what());
}
});
}
/*!
* Conversion of MIPS instructions into AtomicOps. The AtomicOps represent what we
* think are IR of the original GOAL compiler.
*/
void ObjectFileDB::ir2_atomic_op_pass(int seg, const Config& config, ObjectFileData& data) {
for_each_function_in_seg_in_obj(seg, data, [&](Function& func) {
if (!func.cfg_ok) {
return;
}
if (!func.suspected_asm) {
func.ir2.atomic_ops_attempted = true;
try {
bool inline_asm = config.hacks.hint_inline_assembly_functions.find(func.name()) !=
config.hacks.hint_inline_assembly_functions.end();
2021-07-19 20:49:33 -04:00
std::unordered_set<int> blocks_ending_in_asm_branch;
auto asm_branch_it =
config.hacks.blocks_ending_in_asm_branch_by_func_name.find(func.name());
2021-07-19 20:49:33 -04:00
if (asm_branch_it != config.hacks.blocks_ending_in_asm_branch_by_func_name.end()) {
blocks_ending_in_asm_branch = asm_branch_it->second;
}
auto ops =
convert_function_to_atomic_ops(func, data.linked_data.labels, func.warnings, inline_asm,
blocks_ending_in_asm_branch, config.game_version);
func.ir2.atomic_ops = std::make_shared<FunctionAtomicOps>(std::move(ops));
func.ir2.atomic_ops_succeeded = true;
func.ir2.env.set_end_var(func.ir2.atomic_ops->end_op().return_var());
} catch (std::exception& e) {
lg::warn("Function {} from {} could not be converted to atomic ops: {}", func.name(),
data.to_unique_name(), e.what());
func.warnings.error("Failed to convert to atomic ops: {}", e.what());
}
}
});
}
void ObjectFileDB::ir2_symbol_definition_map(ObjectFileData& data) {
map_builder.add_object(data);
}
template <typename Key, typename Value>
Value try_lookup(const std::unordered_map<Key, Value>& map, const Key& key) {
auto lookup = map.find(key);
if (lookup == map.end()) {
return Value();
} else {
return lookup->second;
}
}
const std::string* find_file_override_for_art_group(const Config& config,
const std::string& obj_name,
const std::string& type_name) {
// find file override for this type
auto it_file = config.art_group_file_override.find(obj_name);
if (it_file != config.art_group_file_override.end()) {
auto it_type = it_file->second.find(type_name);
if (it_type != it_file->second.end()) {
return &it_type->second;
}
}
return nullptr;
}
/*!
* Analyze registers and determine the type in each register at each instruction.
* - Figure out the type of each function, from configs.
* - Propagate types.
2021-01-12 19:20:08 -05:00
* - NOTE: this will update register info usage more accurately for functions.
*/
void ObjectFileDB::ir2_type_analysis_pass(int seg, const Config& config, ObjectFileData& data) {
auto obj_name = data.to_unique_name();
for_each_function_in_seg_in_obj(seg, data, [&](Function& func) {
if (!func.suspected_asm) {
TypeSpec ts;
if (lookup_function_type(func.guessed_name, data.to_unique_name(), config, &ts) &&
func.ir2.atomic_ops_succeeded) {
func.type = ts;
// try type analysis here.
auto func_name = func.name();
auto register_casts =
try_lookup(config.register_type_casts_by_function_by_atomic_op_idx, func_name);
func.ir2.env.set_type_casts(register_casts);
auto stack_casts =
try_lookup(config.stack_type_casts_by_function_by_stack_offset, func_name);
func.ir2.env.set_stack_casts(stack_casts);
if (config.hacks.pair_functions_by_name.find(func_name) !=
config.hacks.pair_functions_by_name.end()) {
func.ir2.env.set_sloppy_pair_typing();
}
if (config.hacks.reject_cond_to_value.find(func_name) !=
config.hacks.reject_cond_to_value.end()) {
func.ir2.env.aggressively_reject_cond_to_value_rewrite = true;
}
func.ir2.env.set_stack_structure_hints(
try_lookup(config.stack_structure_hints_by_function, func_name));
decomp3: more engine stuff, detect non-virtual state inheritance (#3377) - `speech` - `ambient` - `water-h` - `vol-h` - `generic-obs` - `carry-h` - `pilot-h` - `board-h` - `gun-h` - `flut-h` - `indax-h` - `lightjak-h` - `darkjak-h` - `target-util` - `history` - `collide-reaction-target` - `logic-target` - `sidekick` - `projectile` - `voicebox` - `ragdoll-edit` - most of `ragdoll` (not added to gsrc yet) - `curves` - `find-nearest` - `lightjak-wings` - `target-handler` - `target-anim` - `target` - `target2` - `target-swim` - `target-lightjak` - `target-invisible` - `target-death` - `target-gun` - `gun-util` - `board-util` - `target-board` - `board-states` - `mech-h` - `vol` - `vent` - `viewer` - `gem-pool` - `collectables` - `crates` - `secrets-menu` Additionally: - Detection of non-virtual state inheritance - Added a config file that allows overriding the process stack size set by `stack-size-set!` calls - Fix for integer multiplication with `r0` - Fixed detection for the following macros: - `static-attack-info` - `defpart` and `defpartgroup` (probably still needs adjustments, uses Jak 2 implementation at the moment) - `sound-play` (Jak 3 seems to always call `sound-play-by-name` with a `sound-group` of 0, so the macro has been temporarily defaulted to use that) One somewhat significant change made here that should be noted is that the return type of `process::init-from-entity!` was changed to `object`. I've been thinking about this for a while, since it looks a bit nicer without the `(none)` at the end and I have recently encountered init methods that early return `0`.
2024-03-03 15:15:27 -05:00
if (config.process_stack_size_overrides.find(func_name) !=
config.process_stack_size_overrides.end()) {
func.process_stack_size = config.process_stack_size_overrides.at(func_name);
}
if (func.guessed_name.kind == FunctionName::FunctionKind::V_STATE) {
if (config.art_group_type_remap.find(func.guessed_name.type_name) !=
config.art_group_type_remap.end()) {
auto ag_override =
find_file_override_for_art_group(config, obj_name, func.guessed_name.type_name);
func.ir2.env.set_art_group(
ag_override ? *ag_override
: config.art_group_type_remap.at(func.guessed_name.type_name));
} else {
func.ir2.env.set_art_group(func.guessed_name.type_name + "-ag");
}
} else if (func.guessed_name.kind == FunctionName::FunctionKind::NV_STATE ||
func.type.try_get_tag("behavior").has_value()) {
std::string type = func.type.get_tag("behavior");
if (config.art_group_type_remap.find(type) != config.art_group_type_remap.end()) {
auto ag_override = find_file_override_for_art_group(config, obj_name, type);
func.ir2.env.set_art_group(ag_override ? *ag_override
: config.art_group_type_remap.at(type));
} else {
func.ir2.env.set_art_group(type + "-ag");
}
} else {
func.ir2.env.set_art_group(obj_name + "-ag");
}
func.ir2.env.set_jg(func.ir2.env.art_group());
if (config.joint_node_hacks.find(func.ir2.env.art_group()) !=
config.joint_node_hacks.end()) {
func.ir2.env.set_jg(config.joint_node_hacks.at(func.ir2.env.art_group()));
}
constexpr bool kForceNewTypes = false;
if (config.game_version != GameVersion::Jak1 || kForceNewTypes) {
// use new types for jak 2/3 always
types2::Input in;
types2::Output out;
in.func = &func;
in.function_type = ts;
in.dts = &dts;
try {
types2::run(out, in);
func.ir2.env.set_types(out.block_init_types, out.op_end_types, *func.ir2.atomic_ops,
ts);
} catch (const std::exception& e) {
func.warnings.error("Type analysis failed: {}", e.what());
}
func.ir2.env.types_succeeded = out.succeeded;
} else {
// old type pass
if (run_type_analysis_ir2(ts, dts, func)) {
func.ir2.env.types_succeeded = true;
} else {
func.warnings.error("Type analysis failed");
}
}
} else {
lg::warn("Function {} didn't know its type", func.name());
func.warnings.error("Function {} has unknown type", func.name());
}
}
});
}
void ObjectFileDB::ir2_register_usage_pass(int seg, ObjectFileData& data) {
for_each_function_in_seg_in_obj(seg, data, [&](Function& func) {
2021-01-12 19:20:08 -05:00
if (!func.suspected_asm && func.ir2.atomic_ops_succeeded) {
func.ir2.env.set_reg_use(analyze_ir2_register_usage(func));
auto& block_0_start = func.ir2.env.reg_use().block.at(0).input;
std::vector<Register> dep_regs;
for (auto x : block_0_start) {
dep_regs.push_back(x);
}
if (!dep_regs.empty()) {
std::sort(dep_regs.begin(), dep_regs.end(),
[](const Register& a, const Register& b) { return a.reg_id() < b.reg_id(); });
int end_valid_argument = Register(Reg::GPR, Reg::T3).reg_id() + 1;
if (func.type.arg_count() > 0) {
// end_valid_argument = Register::get_arg_reg(func.type.arg_count() - 1).reg_id();
end_valid_argument = Register(Reg::GPR, Reg::A0).reg_id() + func.type.arg_count() - 1;
}
for (auto& x : dep_regs) {
if ((x.get_kind() == Reg::VF && x.get_vf() != 0) || x.get_kind() == Reg::SPECIAL) {
lg::error("Bad vf dependency on {} in {}", x.to_charp(), func.name());
func.warnings.error("Bad vector register dependency: {}", x.to_string());
continue;
}
if (x == Register(Reg::GPR, Reg::S6) || x == Register(Reg::GPR, Reg::S7) ||
x == Register(Reg::GPR, Reg::SP) || x == Register(Reg::VF, 0)) {
continue;
}
if (x.reg_id() < end_valid_argument) {
continue;
}
lg::error("Bad register dependency on {} in {}", x.to_charp(), func.name());
if (x.to_string() == "f31") {
func.warnings.warning("Function may read a register that is not set: {}",
x.to_string());
} else {
func.warnings.error("Function may read a register that is not set: {}", x.to_string());
}
}
}
2021-01-12 19:20:08 -05:00
}
});
}
void ObjectFileDB::ir2_variable_pass(int seg, ObjectFileData& data) {
for_each_function_in_seg_in_obj(seg, data, [&](Function& func) {
(void)data;
if (!func.suspected_asm && func.ir2.atomic_ops_succeeded && func.ir2.env.has_type_analysis()) {
try {
auto result =
run_variable_renaming(func, func.ir2.env.reg_use(), *func.ir2.atomic_ops, dts);
if (result.has_value()) {
func.ir2.env.set_local_vars(*result);
}
} catch (const std::exception& e) {
lg::warn("variable pass failed on {}: {}", func.name(), e.what());
}
}
});
2021-01-12 19:20:08 -05:00
}
void ObjectFileDB::ir2_cfg_build_pass(int seg, ObjectFileData& data) {
Timer timer;
int total = 0;
int attempted = 0;
int successful = 0;
for_each_function_in_seg_in_obj(seg, data, [&](Function& func) {
(void)data;
total++;
if (!func.suspected_asm && func.ir2.atomic_ops_succeeded && func.cfg->is_fully_resolved()) {
attempted++;
try {
build_initial_forms(func);
} catch (std::exception& e) {
func.warnings.error("Failed to structure: {}", e.what());
func.ir2.top_form = nullptr;
}
}
if (func.ir2.top_form) {
successful++;
}
});
2021-01-22 20:50:37 -05:00
}
void ObjectFileDB::ir2_build_expressions(int seg, const Config& config, ObjectFileData& data) {
for_each_function_in_seg_in_obj(seg, data, [&](Function& func) {
2021-01-22 20:50:37 -05:00
(void)data;
if (func.ir2.top_form && func.ir2.env.has_type_analysis() && func.ir2.env.has_local_vars() &&
func.ir2.env.types_succeeded) {
auto name = func.name();
auto arg_config = config.function_arg_names.find(name);
auto var_config = config.function_var_overrides.find(name);
if (convert_to_expressions(func.ir2.top_form, *func.ir2.form_pool, func,
arg_config != config.function_arg_names.end()
? arg_config->second
: std::vector<std::string>{},
var_config != config.function_var_overrides.end()
? var_config->second
: std::unordered_map<std::string, LocalVarOverride>{},
dts)) {
2021-01-22 20:50:37 -05:00
func.ir2.print_debug_forms = true;
func.ir2.expressions_succeeded = true;
2021-01-22 20:50:37 -05:00
}
}
});
}
void ObjectFileDB::ir2_insert_lets(int seg, ObjectFileData& data) {
for_each_function_in_seg_in_obj(seg, data, [&](Function& func) {
if (func.ir2.expressions_succeeded) {
try {
insert_lets(func, func.ir2.env, *func.ir2.form_pool, func.ir2.top_form, stats.let);
} catch (const std::exception& e) {
const auto err = fmt::format(
"Error while inserting lets: {}. Make sure that the return type is not "
"none if something is actually returned.",
e.what());
lg::warn("{}", err);
func.warnings.error(err);
}
}
});
}
void ObjectFileDB::ir2_add_store_errors(int seg, ObjectFileData& data) {
for_each_function_in_seg_in_obj(seg, data, [&](Function& func) {
if (func.ir2.expressions_succeeded && !func.warnings.has_errors()) {
// print warning about failed store, but only if decompilation passes without any major
// errors
func.ir2.top_form->apply([&](FormElement* f) {
auto as_store = dynamic_cast<StoreElement*>(f);
if (as_store) {
func.warnings.error("Failed store: {} at op {}", as_store->to_string(func.ir2.env),
as_store->op()->op_id());
}
});
}
});
}
void ObjectFileDB::ir2_rewrite_inline_asm_instructions(int seg, ObjectFileData& data) {
for_each_function_in_seg_in_obj(seg, data, [&](Function& func) {
(void)data;
if (func.ir2.top_form && func.ir2.env.has_type_analysis()) {
if (rewrite_inline_asm_instructions(func.ir2.top_form, *func.ir2.form_pool, func, dts)) {
func.ir2.print_debug_forms = true;
}
}
});
}
void ObjectFileDB::ir2_insert_anonymous_functions(int seg, ObjectFileData& data) {
for_each_function_in_seg_in_obj(seg, data, [&](Function& func) {
2021-03-14 19:06:51 -04:00
(void)data;
if (func.ir2.top_form && func.ir2.env.has_type_analysis()) {
try {
insert_static_refs(func.ir2.top_form, *func.ir2.form_pool, func, dts);
} catch (std::exception& e) {
func.warnings.error("Failed static ref finding: {}\n", e.what());
lg::error("Function {} failed static ref: {}\n", func.name(), e.what());
}
2021-03-14 19:06:51 -04:00
}
});
}
void ObjectFileDB::ir2_write_results(const fs::path& output_dir,
const Config& config,
const std::vector<std::string>& imports,
ObjectFileData& obj) {
if (obj.linked_data.has_any_functions()) {
auto file_text = ir2_to_file(obj, config);
auto file_name = output_dir / (obj.to_unique_name() + "_ir2.asm");
file_util::write_text_file(file_name, file_text);
g/j1: Cleanup all main issues in the formatter and format all of `goal_src/jak1` (#3535) This PR does two main things: 1. Work through the main low-hanging fruit issues in the formatter keeping it from feeling mature and usable 2. Iterate and prove that point by formatting all of the Jak 1 code base. **This has removed around 100K lines in total.** - The decompiler will now format it's results for jak 1 to keep things from drifting back to where they were. This is controlled by a new config flag `format_code`. How am I confident this hasn't broken anything?: - I compiled the entire project and stored it's `out/jak1/obj` files separately - I then recompiled the project after formatting and wrote a script that md5's each file and compares it (`compare-compilation-outputs.py` - The results (eventually) were the same: ![Screenshot 2024-05-25 132900](https://github.com/open-goal/jak-project/assets/13153231/015e6f20-8d19-49b7-9951-97fa88ddc6c2) > This proves that the only difference before and after is non-critical whitespace for all code/macros that is actually in use. I'm still aware of improvements that could be made to the formatter, as well as general optimization of it's performance. But in general these are for rare or non-critical situations in my opinion and I'll work through them before doing Jak 2. The vast majority looks great and is working properly at this point. Those known issues are the following if you are curious: ![image](https://github.com/open-goal/jak-project/assets/13153231/0edfaba1-6d36-40f5-ab23-0642209867c4)
2024-06-05 22:17:31 -04:00
auto unformatted_code = ir2_final_out(obj, imports, {});
auto final_name = output_dir / (obj.to_unique_name() + "_disasm.gc");
g/j1: Cleanup all main issues in the formatter and format all of `goal_src/jak1` (#3535) This PR does two main things: 1. Work through the main low-hanging fruit issues in the formatter keeping it from feeling mature and usable 2. Iterate and prove that point by formatting all of the Jak 1 code base. **This has removed around 100K lines in total.** - The decompiler will now format it's results for jak 1 to keep things from drifting back to where they were. This is controlled by a new config flag `format_code`. How am I confident this hasn't broken anything?: - I compiled the entire project and stored it's `out/jak1/obj` files separately - I then recompiled the project after formatting and wrote a script that md5's each file and compares it (`compare-compilation-outputs.py` - The results (eventually) were the same: ![Screenshot 2024-05-25 132900](https://github.com/open-goal/jak-project/assets/13153231/015e6f20-8d19-49b7-9951-97fa88ddc6c2) > This proves that the only difference before and after is non-critical whitespace for all code/macros that is actually in use. I'm still aware of improvements that could be made to the formatter, as well as general optimization of it's performance. But in general these are for rare or non-critical situations in my opinion and I'll work through them before doing Jak 2. The vast majority looks great and is working properly at this point. Those known issues are the following if you are curious: ![image](https://github.com/open-goal/jak-project/assets/13153231/0edfaba1-6d36-40f5-ab23-0642209867c4)
2024-06-05 22:17:31 -04:00
if (config.format_code) {
const auto formatted_code = formatter::format_code(unformatted_code);
if (!formatted_code) {
lg::error(
"Was unable to format the decompiled result of {}, make a github issue. Writing "
"unformatted code",
obj.to_unique_name());
file_util::write_text_file(final_name, unformatted_code);
} else {
file_util::write_text_file(final_name, formatted_code.value());
}
} else {
file_util::write_text_file(final_name, unformatted_code);
}
}
}
std::string ObjectFileDB::ir2_to_file(ObjectFileData& data, const Config& config) {
std::string result;
auto all_types_path = file_util::get_file_path({config.all_types_file});
auto game_version = game_version_names[config.game_version];
result += fmt::format("; ALL_TYPES={}={}\n\n", game_version, all_types_path);
const char* segment_names[] = {"main segment", "debug segment", "top-level segment"};
ASSERT(data.linked_data.segments <= 3);
for (int seg = data.linked_data.segments; seg-- > 0;) {
// segment header
result += ";------------------------------------------\n; ";
result += segment_names[seg];
result += "\n;------------------------------------------\n\n";
// functions
for (auto& func : data.linked_data.functions_by_seg.at(seg)) {
try {
result += ir2_function_to_string(data, func, seg);
} catch (std::exception& e) {
result += "Failed to write ";
result += func.name();
result += ": ";
result += e.what();
result += "\n";
}
if (func.ir2.top_form && func.ir2.env.has_local_vars()) {
result += "\n;;-*-OpenGOAL-Start-*-\n\n";
if (func.ir2.env.has_local_vars()) {
if (!func.ir2.print_debug_forms) {
result += ";; expression building failed part way through, function may be weird\n";
}
result += final_defun_out(func, func.ir2.env, dts);
} else {
result += ";; no variable information\n";
result += pretty_print::to_string(func.ir2.top_form->to_form(func.ir2.env));
}
result += "\n\n;;-*-OpenGOAL-End-*-\n\n";
} else if (func.ir2.atomic_ops_succeeded) {
auto& ao = func.ir2.atomic_ops;
for (size_t i = 0; i < ao->ops.size(); i++) {
auto& op = ao->ops.at(i);
if (!dynamic_cast<FunctionEndOp*>(op.get())) {
auto instr_idx = ao->atomic_op_to_instruction.at(i);
// check for a label to print
auto label_id = data.linked_data.get_label_at(seg, (func.start_word + instr_idx) * 4);
if (label_id != -1) {
result += fmt::format("(label {})\n", data.linked_data.labels.at(label_id).name);
}
// check for no misaligned labels in code segments.
for (int j = 1; j < 4; j++) {
ASSERT(data.linked_data.get_label_at(seg, (func.start_word + instr_idx) * 4 + j) ==
-1);
}
// print assembly ops.
}
// print instruction
result += fmt::format(" {}\n", op->to_string(func.ir2.env));
}
}
2021-01-22 20:50:37 -05:00
// print if it exists, even if it's not okay.
if (config.print_cfgs && func.cfg) {
result += fmt::format("Control Flow Graph:\n{}\n\n",
pretty_print::to_string(func.cfg->to_form()));
}
if (false && func.ir2.print_debug_forms) {
2021-01-22 20:50:37 -05:00
result += '\n';
result += ";; DEBUG OUTPUT BELOW THIS LINE:\n";
2021-01-22 20:50:37 -05:00
result += func.ir2.debug_form_string;
result += '\n';
}
result += ";; .endfunction\n\n";
}
// print data
for (size_t i = data.linked_data.offset_of_data_zone_by_seg.at(seg);
i < data.linked_data.words_by_seg.at(seg).size(); i++) {
for (int j = 0; j < 4; j++) {
auto label_id = data.linked_data.get_label_at(seg, i * 4 + j);
if (label_id != -1) {
result += data.linked_data.labels.at(label_id).name + ":";
if (j != 0) {
result += " (offset " + std::to_string(j) + ")";
}
result += "\n";
}
}
auto& word = data.linked_data.words_by_seg[seg][i];
data.linked_data.append_word_to_string(result, word);
if (word.kind() == LinkedWord::TYPE_PTR && word.symbol_name() == "string") {
result += "; " + data.linked_data.get_goal_string(seg, i) + "\n";
}
}
result += '\n';
}
return result;
}
namespace {
void append_commented(std::string& line,
bool& has_comment,
const std::string& to_append,
int offset = 0) {
// minimum length before comment appears.
constexpr int pre_comment_length = 30;
// if comment overflows, how much to indent the next one
constexpr int overflow_indent = 30;
// pad, and add comment
if (!has_comment) {
if (line.length() < pre_comment_length) {
line.append(pre_comment_length - line.length(), ' ');
}
line += ";; ";
line += to_append;
has_comment = true;
} else {
if (std::max(int(line.length()), offset) + to_append.length() > 120) {
line += "\n";
line.append(overflow_indent, ' ');
line += ";; ";
} else {
if (int(line.length()) < offset) {
line.append(offset - line.length(), ' ');
}
line += " ";
}
line += to_append;
}
}
} // namespace
std::string ObjectFileDB::ir2_function_to_string(ObjectFileData& data, Function& func, int seg) {
std::string result;
result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n";
result += "; .function " + func.name() + "\n";
result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n";
result += func.prologue.to_string(2) + "\n";
if (func.guessed_name.kind == FunctionName::FunctionKind::NV_STATE ||
func.guessed_name.kind == FunctionName::FunctionKind::V_STATE) {
result += fmt::format(" ;internal_name: {}\n", func.state_handler_as_anon_func);
}
if (func.warnings.has_warnings()) {
result += ";; Warnings:\n" + func.warnings.get_warning_text(true) + "\n";
}
/*
if (func.ir2.env.has_local_vars()) {
result += func.ir2.env.print_local_var_types(func.ir2.top_form);
}
*/
bool print_atomics = func.ir2.atomic_ops_succeeded;
// print each instruction in the function.
bool in_delay_slot = false;
int total_instructions_printed = 0;
int last_instr_printed = 0;
std::string line;
auto print_instr_start = [&](int i) {
// check for a label to print
auto label_id = data.linked_data.get_label_at(seg, (func.start_word + i) * 4);
if (label_id != -1) {
result += data.linked_data.labels.at(label_id).name + ":\n";
}
// check for no misaligned labels in code segments.
for (int j = 1; j < 4; j++) {
ASSERT(data.linked_data.get_label_at(seg, (func.start_word + i) * 4 + j) == -1);
}
// print the assembly instruction
auto& instr = func.instructions.at(i);
line = " " + instr.to_string(data.linked_data.labels);
};
auto print_instr_end = [&](int i) {
auto& instr = func.instructions.at(i);
result += line;
result += "\n";
// print delay slot gap
if (in_delay_slot) {
result += "\n";
in_delay_slot = false;
}
// for next time...
if (gOpcodeInfo[(int)instr.kind].has_delay_slot) {
in_delay_slot = true;
}
total_instructions_printed++;
ASSERT(last_instr_printed + 1 == i);
last_instr_printed = i;
};
// first, print the prologue. we start at word 1 because word 0 is the type tag
for (int i = 1; i < func.basic_blocks.front().start_word; i++) {
print_instr_start(i);
print_instr_end(i);
}
// next, print each basic block
int end_idx = func.basic_blocks.front().start_word;
for (int block_id = 0; block_id < int(func.basic_blocks.size()); block_id++) {
// block number
result += "B" + std::to_string(block_id) + ":\n";
auto& block = func.basic_blocks.at(block_id);
const TypeState* init_types = nullptr;
if (func.ir2.env.has_type_analysis()) {
init_types = &func.ir2.env.get_types_at_block_entry(block_id);
}
int start_word = block.start_word;
// if we have no prologue, skip the type tag.
if (start_word == 0) {
start_word = 1;
}
for (int instr_id = start_word; instr_id < block.end_word; instr_id++) {
print_instr_start(instr_id);
bool printed_comment = false;
// print atomic op
int op_id = -1;
if (print_atomics && func.instr_starts_atomic_op(instr_id)) {
auto& op = func.get_atomic_op_at_instr(instr_id);
op_id = func.ir2.atomic_ops->instruction_to_atomic_op.at(instr_id);
append_commented(line, printed_comment,
fmt::format("[{:3d}] {}", op_id,
op.to_form(data.linked_data.labels, func.ir2.env).print()));
if (func.ir2.env.has_type_analysis()) {
append_commented(
line, printed_comment,
op.reg_type_info_as_string(*init_types, func.ir2.env.get_types_after_op(op_id)), 50);
}
2021-01-12 19:20:08 -05:00
/*if (func.ir2.env.has_reg_use()) {
2021-01-12 19:20:08 -05:00
std::string regs;
for (auto r : func.ir2.env.reg_use().op.at(op_id).live_in) {
2021-01-12 19:20:08 -05:00
regs += r.to_charp();
regs += ' ';
}
if (!regs.empty()) {
append_commented(line, printed_comment, "lvi: " + regs, 50);
2021-01-12 19:20:08 -05:00
}
}*/
}
auto& instr = func.instructions.at(instr_id);
// print linked strings
for (int iidx = 0; iidx < instr.n_src; iidx++) {
if (instr.get_src(iidx).is_label()) {
auto lab = data.linked_data.labels.at(instr.get_src(iidx).get_label());
if (data.linked_data.is_string(lab.target_segment, lab.offset)) {
append_commented(
line, printed_comment,
data.linked_data.get_goal_string(lab.target_segment, lab.offset / 4 - 1));
}
}
}
print_instr_end(instr_id);
if (print_atomics && func.ir2.env.has_type_analysis() &&
func.instr_starts_atomic_op(instr_id)) {
init_types = &func.ir2.env.get_types_after_op(op_id);
}
}
end_idx = block.end_word;
}
for (int i = end_idx; i < func.end_word - func.start_word; i++) {
print_instr_start(i);
print_instr_end(i);
}
if (func.cfg) {
if (!func.cfg->is_fully_resolved()) {
result += func.cfg->to_form_string();
result += "\n";
result += func.cfg->to_dot();
result += "\n";
}
}
if (func.mips2c_output) {
result += ";;-*-MIPS2C-Start-*-\n";
result += *func.mips2c_output;
result += ";;-*-MIPS2C-End-*-\n";
}
result += "\n";
ASSERT(total_instructions_printed == (func.end_word - func.start_word - 1));
return result;
}
/*!
* Try to look up the type of a function. Looks at the decompiler type info, the hints files,
* and other GOAL rules.
*/
bool ObjectFileDB::lookup_function_type(const FunctionName& name,
const std::string& obj_name,
const Config& config,
TypeSpec* result) {
// don't return function types that are explictly flagged as bad in config.
if (config.hacks.no_type_analysis_functions_by_name.find(name.to_string()) !=
config.hacks.no_type_analysis_functions_by_name.end()) {
return false;
}
if (name.kind == FunctionName::FunctionKind::GLOBAL) {
// global GOAL function.
auto kv = dts.symbol_types.find(name.function_name);
if (kv != dts.symbol_types.end() && kv->second.arg_count() >= 1) {
if (kv->second.base_type() != "function") {
lg::die("Found a function named {} but the symbol has type {}", name.to_string(),
kv->second.print());
}
// good, found a global function with full type information.
*result = kv->second;
return true;
}
} else if (name.kind == FunctionName::FunctionKind::METHOD) {
MethodInfo info;
if (dts.ts.try_lookup_method(name.type_name, name.method_id, &info)) {
if (info.type.arg_count() >= 1) {
if (info.type.base_type() != "function") {
lg::die("Found a method named {} but the symbol has type {}", name.to_string(),
info.type.print());
}
// substitute the _type_ for the correct type.
*result = info.type.substitute_for_method_call(name.type_name);
return true;
}
}
} else if (name.kind == FunctionName::FunctionKind::TOP_LEVEL_INIT) {
*result = dts.ts.make_function_typespec({}, "none");
return true;
} else if (name.kind == FunctionName::FunctionKind::UNIDENTIFIED) {
// try looking up the object
const auto& map = config.anon_function_types_by_obj_by_id;
auto obj_kv = map.find(obj_name);
if (obj_kv != map.end()) {
auto func_kv = obj_kv->second.find(name.get_anon_id());
if (func_kv != obj_kv->second.end()) {
*result = dts.parse_type_spec(func_kv->second);
return true;
}
}
} else if (name.kind == FunctionName::FunctionKind::NV_STATE) {
auto sym_type = dts.symbol_types.find(name.state_name);
if (sym_type == dts.symbol_types.end()) {
lg::error("Could not find symbol with name {} for state. This is likely a decompiler bug.",
name.state_name);
return false;
}
*result = get_state_handler_type(name.handler_kind, sym_type->second);
return true;
} else if (name.kind == FunctionName::FunctionKind::V_STATE) {
auto mi = dts.ts.lookup_method(name.type_name, name.state_name);
*result = get_state_handler_type(name.handler_kind,
mi.type.substitute_for_method_call(name.type_name));
return true;
} else {
ASSERT(false);
}
return false;
}
std::string ObjectFileDB::ir2_final_out(ObjectFileData& data,
const std::vector<std::string>& imports,
const std::unordered_set<std::string>& skip_functions) {
if (data.obj_version == 3 || (data.obj_version == 5 && data.linked_data.has_any_functions())) {
std::string result;
result += ";;-*-Lisp-*-\n";
result += "(in-package goal)\n\n";
ASSERT(data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).size() == 1);
auto top_level = data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).at(0);
result += write_from_top_level(top_level, dts, data.linked_data, imports, skip_functions);
result += "\n\n";
return result;
} else {
return ";; not a code file.";
}
}
} // namespace decompiler