add more utils

This commit is contained in:
water 2020-11-27 16:38:36 -05:00
parent 4fb8381105
commit 921ae50669
13 changed files with 389 additions and 60 deletions

View file

@ -4,6 +4,7 @@
#include <memory>
#include "CfgVtx.h"
#include "decompiler/util/DecompilerTypeSystem.h"
class LinkedObjectFile;
class Function;
@ -11,6 +12,12 @@ class Function;
struct BasicBlock {
int start_word;
int end_word;
TypeState init_types;
int start_basic_op = -1;
int end_basic_op = -1;
std::string label_name;
std::vector<int> pred;
int succ_ft = -1;

View file

@ -1,6 +1,7 @@
#include "IR.h"
#include "decompiler/ObjectFile/LinkedObjectFile.h"
#include "common/goos/PrettyPrinter.h"
#include "third-party/fmt/core.h"
std::vector<std::shared_ptr<IR>> IR::get_all_ir(LinkedObjectFile& file) const {
(void)file;
@ -34,6 +35,69 @@ bool IR::update_types(TypeMap& reg_types, DecompilerTypeSystem& dts, LinkedObjec
return false;
}
namespace {
void add_regs_to_str(const std::vector<Register>& regs, std::string& str) {
bool first = true;
for (auto& reg : regs) {
if (first) {
first = false;
} else {
str.push_back(' ');
}
str.append(reg.to_charp());
}
}
u32 regs_to_gpr_mask(const std::vector<Register>& regs) {
u32 result = 0;
for (const auto& reg : regs) {
if (reg.get_kind() == Reg::GPR) {
result |= (1 << reg.get_gpr());
}
}
return result;
}
} // namespace
std::string IR_Atomic::print_with_reguse(const LinkedObjectFile& file) const {
std::string result = print(file);
if (result.length() < 40) {
result.append(40 - result.length(), ' ');
}
result += " ;;";
if (!write_regs.empty()) {
result += "write: [";
add_regs_to_str(write_regs, result);
result += "] ";
}
if (!read_regs.empty()) {
result += "read: [";
add_regs_to_str(read_regs, result);
result += "] ";
}
if (!clobber_regs.empty()) {
result += "clobber: [";
add_regs_to_str(clobber_regs, result);
result += "] ";
}
return result;
}
std::string IR_Atomic::print_with_types(const TypeState& init_types,
const LinkedObjectFile& file) const {
std::string result = print(file);
if (result.length() < 40) {
result.append(40 - result.length(), ' ');
}
result += " ;; ";
auto read_mask = regs_to_gpr_mask(read_regs);
auto write_mask = regs_to_gpr_mask(write_regs);
result += fmt::format("[{}] -> [{}]", init_types.print_gpr_masked(read_mask),
end_types.print_gpr_masked(write_mask));
return result;
}
goos::Object IR_Failed::to_form(const LinkedObjectFile& file) const {
(void)file;
return pretty_print::build_list("INVALID-OPERATION");

View file

@ -7,6 +7,7 @@
#include <unordered_map>
#include "decompiler/Disasm/Register.h"
#include "common/type_system/TypeSpec.h"
#include "decompiler/util/DecompilerTypeSystem.h"
class LinkedObjectFile;
class DecompilerTypeSystem;
@ -39,6 +40,11 @@ class IR_Atomic : public virtual IR {
public:
std::vector<Register> read_regs, write_regs, clobber_regs;
bool reg_info_set = false;
TypeState end_types; // types at the end of this instruction
std::string print_with_types(const TypeState& init_types, const LinkedObjectFile& file) const;
std::string print_with_reguse(const LinkedObjectFile& file) const;
};
class IR_Failed : public virtual IR {

View file

@ -793,6 +793,44 @@ std::string LinkedObjectFile::print_disassembly() {
return result;
}
std::string LinkedObjectFile::print_type_analysis_debug() {
std::string result;
assert(segments <= 3);
for (int seg = segments; seg-- > 0;) {
// segment header
result += ";------------------------------------------\n; ";
result += segment_names[seg];
result += "\n;------------------------------------------\n\n";
// functions
for (auto& func : functions_by_seg.at(seg)) {
result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n";
result += "; .function " + func.guessed_name.to_string() + "\n";
result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n";
for (auto& block : func.basic_blocks) {
result += "\n";
if (!block.label_name.empty()) {
result += block.label_name + ":\n";
}
TypeState* init_types = &block.init_types;
for (int i = block.start_basic_op; i < block.end_basic_op; i++) {
result += " ";
// result += func.basic_ops.at(i)->print_with_reguse(*this);
// result += func.basic_ops.at(i)->print(*this);
result += func.basic_ops.at(i)->print_with_types(*init_types, *this);
result += "\n";
init_types = &func.basic_ops.at(i)->end_types;
}
}
}
}
return result;
}
/*!
* Hacky way to get a GOAL string object
*/

View file

@ -59,6 +59,7 @@ class LinkedObjectFile {
void process_fp_relative_links();
std::string print_scripts();
std::string print_disassembly();
std::string print_type_analysis_debug();
bool has_any_functions();
void append_word_to_string(std::string& dest, const LinkedWord& word) const;
std::string to_asm_json(const std::string& obj_file_name);

View file

@ -560,11 +560,35 @@ void ObjectFileDB::write_object_file_words(const std::string& output_dir, bool d
// printf("\n");
}
void ObjectFileDB::write_debug_type_analysis(const std::string& output_dir) {
spdlog::info("- Writing debug type analysis...");
Timer timer;
uint32_t total_bytes = 0, total_files = 0;
for_each_obj([&](ObjectFileData& obj) {
if (obj.linked_data.has_any_functions()) {
auto file_text = obj.linked_data.print_type_analysis_debug();
auto file_name = file_util::combine_path(output_dir, obj.to_unique_name() + "_db.asm");
total_bytes += file_text.size();
file_util::write_text_file(file_name, file_text);
total_files++;
}
});
spdlog::info("Wrote functions dumps:");
spdlog::info(" Total {} files", total_files);
spdlog::info(" Total {} MB", total_bytes / ((float)(1u << 20u)));
spdlog::info(" Total {} ms ({:.3f} MB/sec)", timer.getMs(),
total_bytes / ((1u << 20u) * timer.getSeconds()));
}
/*!
* Dump disassembly for object files containing code. Data zones will also be dumped.
*/
void ObjectFileDB::write_disassembly(const std::string& output_dir,
bool disassemble_objects_without_functions) {
bool disassemble_objects_without_functions,
bool write_json) {
spdlog::info("- Writing functions...");
Timer timer;
uint32_t total_bytes = 0, total_files = 0;
@ -577,7 +601,7 @@ void ObjectFileDB::write_disassembly(const std::string& output_dir,
asm_functions += obj.linked_data.print_asm_function_disassembly(obj.to_unique_name());
auto file_name = file_util::combine_path(output_dir, obj.to_unique_name() + ".asm");
if (get_config().analyze_functions) {
if (get_config().analyze_functions && write_json) {
auto json_asm_text = obj.linked_data.to_asm_json(obj.to_unique_name());
auto json_asm_file_name =
file_util::combine_path(output_dir, obj.to_unique_name() + "_asm.json");
@ -736,6 +760,9 @@ std::string ObjectFileDB::process_game_count() {
return result;
}
/*!
* This is the main decompiler routine which runs after we've identified functions.
*/
void ObjectFileDB::analyze_functions() {
spdlog::info("- Analyzing Functions...");
Timer timer;
@ -744,68 +771,70 @@ void ObjectFileDB::analyze_functions() {
int resolved_cfg_functions = 0;
const auto& config = get_config();
{
timer.start();
for_each_obj([&](ObjectFileData& data) {
if (data.linked_data.segments == 3) {
// the top level segment should have a single function
assert(data.linked_data.functions_by_seg.at(2).size() == 1);
// Step 1 - analyze the "top level" or "login" code for each object file.
// this will give us type definitions, method definitions, and function definitions...
spdlog::info(" - Processing top levels...");
auto& func = data.linked_data.functions_by_seg.at(2).front();
assert(func.guessed_name.empty());
func.guessed_name.set_as_top_level();
func.find_global_function_defs(data.linked_data, dts);
func.find_type_defs(data.linked_data, dts);
func.find_method_defs(data.linked_data, dts);
}
});
timer.start();
for_each_obj([&](ObjectFileData& data) {
if (data.linked_data.segments == 3) {
// the top level segment should have a single function
assert(data.linked_data.functions_by_seg.at(2).size() == 1);
// check for function uniqueness.
std::unordered_set<std::string> unique_names;
std::unordered_map<std::string, std::unordered_set<std::string>> duplicated_functions;
auto& func = data.linked_data.functions_by_seg.at(2).front();
assert(func.guessed_name.empty());
func.guessed_name.set_as_top_level();
func.find_global_function_defs(data.linked_data, dts);
func.find_type_defs(data.linked_data, dts);
func.find_method_defs(data.linked_data, dts);
}
});
int uid = 1;
for_each_obj([&](ObjectFileData& data) {
int func_in_obj = 0;
for (int segment_id = 0; segment_id < int(data.linked_data.segments); segment_id++) {
for (auto& func : data.linked_data.functions_by_seg.at(segment_id)) {
func.guessed_name.unique_id = uid++;
func.guessed_name.id_in_object = func_in_obj++;
func.guessed_name.object_name = data.to_unique_name();
auto name = func.guessed_name.to_string();
// check for function uniqueness.
std::unordered_set<std::string> unique_names;
std::unordered_map<std::string, std::unordered_set<std::string>> duplicated_functions;
if (unique_names.find(name) != unique_names.end()) {
duplicated_functions[name].insert(data.to_unique_name());
}
int uid = 1;
for_each_obj([&](ObjectFileData& data) {
int func_in_obj = 0;
for (int segment_id = 0; segment_id < int(data.linked_data.segments); segment_id++) {
for (auto& func : data.linked_data.functions_by_seg.at(segment_id)) {
func.guessed_name.unique_id = uid++;
func.guessed_name.id_in_object = func_in_obj++;
func.guessed_name.object_name = data.to_unique_name();
auto name = func.guessed_name.to_string();
unique_names.insert(name);
if (unique_names.find(name) != unique_names.end()) {
duplicated_functions[name].insert(data.to_unique_name());
}
if (config.asm_functions_by_name.find(name) != config.asm_functions_by_name.end()) {
func.warnings += "flagged as asm by config\n";
func.suspected_asm = true;
}
unique_names.insert(name);
if (config.asm_functions_by_name.find(name) != config.asm_functions_by_name.end()) {
func.warnings += "flagged as asm by config\n";
func.suspected_asm = true;
}
}
});
}
});
for_each_function([&](Function& func, int segment_id, ObjectFileData& data) {
(void)segment_id;
auto name = func.guessed_name.to_string();
for_each_function([&](Function& func, int segment_id, ObjectFileData& data) {
(void)segment_id;
auto name = func.guessed_name.to_string();
if (duplicated_functions.find(name) != duplicated_functions.end()) {
duplicated_functions[name].insert(data.to_unique_name());
func.warnings += "this function exists in multiple non-identical object files";
}
});
/*
for (const auto& kv : duplicated_functions) {
printf("Function %s is found in non-identical object files:\n", kv.first.c_str());
for (const auto& obj : kv.second) {
printf(" %s\n", obj.c_str());
}
if (duplicated_functions.find(name) != duplicated_functions.end()) {
duplicated_functions[name].insert(data.to_unique_name());
func.warnings += "this function exists in multiple non-identical object files";
}
});
/*
for (const auto& kv : duplicated_functions) {
printf("Function %s is found in non-identical object files:\n", kv.first.c_str());
for (const auto& obj : kv.second) {
printf(" %s\n", obj.c_str());
}
*/
}
}
*/
int total_trivial_cfg_functions = 0;
int total_named_functions = 0;
@ -822,14 +851,19 @@ void ObjectFileDB::analyze_functions() {
timer.start();
int total_basic_blocks = 0;
// Main Pass over each function...
for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) {
total_functions++;
// printf("in %s from %s\n", func.guessed_name.to_string().c_str(),
// data.to_unique_name().c_str());
// first, find basic blocks.
auto blocks = find_blocks_in_function(data.linked_data, segment_id, func);
total_basic_blocks += blocks.size();
func.basic_blocks = blocks;
total_functions++;
// analyze the proluge
if (!func.suspected_asm) {
// first, find the prologue/epilogue
func.analyze_prologue(data.linked_data);
@ -838,19 +872,28 @@ void ObjectFileDB::analyze_functions() {
if (!func.suspected_asm) {
// run analysis
// build a control flow graph
// build a control flow graph, just looking at branch instructions.
func.cfg = build_cfg(data.linked_data, segment_id, func);
// convert individual basic blocks to sequences of IR Basic Ops
for (auto& block : func.basic_blocks) {
if (block.end_word > block.start_word) {
auto label_id =
data.linked_data.get_label_at(segment_id, (func.start_word + block.start_word) * 4);
if (label_id != -1) {
block.label_name = data.linked_data.get_label_name(label_id);
}
block.start_basic_op = func.basic_ops.size();
add_basic_ops_to_block(&func, block, &data.linked_data);
block.end_basic_op = func.basic_ops.size();
}
}
total_basic_ops += func.get_basic_op_count();
total_failed_basic_ops += func.get_failed_basic_op_count();
total_reginfo_ops += func.get_reginfo_basic_op_count();
// if we got an inspect method, inspect it.
if (func.is_inspect_method) {
auto result = inspect_inspect_method(func, func.method_of_type, dts, data.linked_data);
all_type_defs += ";; " + data.to_unique_name() + "\n";

View file

@ -58,7 +58,11 @@ class ObjectFileDB {
void dump_raw_objects(const std::string& output_dir);
void write_object_file_words(const std::string& output_dir, bool dump_v3_only);
void write_disassembly(const std::string& output_dir, bool disassemble_objects_without_functions);
void write_disassembly(const std::string& output_dir,
bool disassemble_objects_without_functions,
bool write_json);
void write_debug_type_analysis(const std::string& output_dir);
void analyze_functions();
void process_tpages();
std::string process_game_count();

View file

@ -32,6 +32,7 @@ void set_config(const std::string& path_to_config_file) {
gConfig.process_game_text = cfg.at("process_game_text").get<bool>();
gConfig.process_game_count = cfg.at("process_game_count").get<bool>();
gConfig.dump_objs = cfg.at("dump_objs").get<bool>();
gConfig.write_func_json = cfg.at("write_func_json").get<bool>();
std::vector<std::string> asm_functions_by_name =
cfg.at("asm_functions_by_name").get<std::vector<std::string>>();

View file

@ -25,6 +25,7 @@ struct Config {
bool process_game_text = false;
bool process_game_count = false;
bool dump_objs = false;
bool write_func_json = false;
std::unordered_set<std::string> asm_functions_by_name;
// ...
};

View file

@ -62,6 +62,7 @@
"process_game_text":true,
"process_game_count":true,
"dump_objs":false,
"write_func_json":false,
// to write out data of each object file
"write_hexdump":false,

View file

@ -83,7 +83,9 @@ int main(int argc, char** argv) {
}
if (get_config().write_disassembly) {
db.write_disassembly(out_folder, get_config().disassemble_objects_without_functions);
db.write_disassembly(out_folder, get_config().disassemble_objects_without_functions,
get_config().write_func_json);
db.write_debug_type_analysis(out_folder);
}
// todo print type summary

View file

@ -1,6 +1,7 @@
#include "DecompilerTypeSystem.h"
#include "common/goos/Reader.h"
#include "common/type_system/deftype.h"
#include "decompiler/Disasm/Register.h"
#include "third-party/spdlog/include/spdlog/spdlog.h"
DecompilerTypeSystem::DecompilerTypeSystem() {
@ -146,4 +147,151 @@ void DecompilerTypeSystem::add_symbol(const std::string& name, const TypeSpec& t
throw std::runtime_error("Type redefinition");
}
}
}
std::string TP_Type::print() const {
switch (kind) {
case OBJECT_OF_TYPE:
return ts.print();
case TYPE_OBJECT:
return fmt::format("[{}]", ts.print());
case FALSE:
return fmt::format("[#f]");
case NONE:
return fmt::format("[none]");
default:
assert(false);
}
}
std::string TypeState::print_gpr_masked(u32 mask) const {
std::string result;
for (int i = 0; i < 32; i++) {
if (mask & (1 << i)) {
result += Register(Reg::GPR, i).to_charp();
result += ": ";
result += gpr_types[i].print();
result += " ";
}
}
return result;
}
TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed) {
switch (existing.kind) {
case TP_Type::OBJECT_OF_TYPE:
switch (add.kind) {
case TP_Type::OBJECT_OF_TYPE: {
// two normal types, do LCA as normal.
TP_Type result;
result.kind = TP_Type::OBJECT_OF_TYPE;
result.ts = ts.lowest_common_ancestor(existing.ts, add.ts);
*changed = (result.ts != existing.ts);
return result;
}
case TP_Type::TYPE_OBJECT: {
// normal, [type object]. Change type object to less specific "type".
TP_Type result;
result.kind = TP_Type::OBJECT_OF_TYPE;
result.ts = ts.lowest_common_ancestor(existing.ts, ts.make_typespec("type"));
*changed = (result.ts != existing.ts);
return result;
}
case TP_Type::FALSE:
// allow #f anywhere
*changed = false;
return existing;
case TP_Type::NONE:
// allow possibly undefined.
*changed = false;
return existing;
default:
assert(false);
}
break;
case TP_Type::TYPE_OBJECT:
switch (add.kind) {
case TP_Type::OBJECT_OF_TYPE: {
TP_Type result;
result.kind = TP_Type::OBJECT_OF_TYPE;
result.ts = ts.lowest_common_ancestor(ts.make_typespec("type"), add.ts);
*changed = true; // changed type
return result;
}
case TP_Type::TYPE_OBJECT: {
// two type objects.
TP_Type result;
result.kind = TP_Type::TYPE_OBJECT;
result.ts = ts.lowest_common_ancestor(existing.ts, add.ts);
*changed = (result.ts != existing.ts);
return result;
}
case TP_Type::FALSE:
// allow #f anywhere
*changed = false;
return existing;
case TP_Type::NONE:
// allow possibly undefined.
*changed = false;
return existing;
default:
assert(false);
}
break;
case TP_Type::FALSE:
switch (add.kind) {
case TP_Type::OBJECT_OF_TYPE:
*changed = true;
return add;
case TP_Type::TYPE_OBJECT:
*changed = true;
return add;
case TP_Type::FALSE:
*changed = false;
return existing;
case TP_Type::NONE:
*changed = false;
return existing;
default:
assert(false);
}
break;
case TP_Type::NONE:
switch (add.kind) {
case TP_Type::OBJECT_OF_TYPE:
case TP_Type::TYPE_OBJECT:
case TP_Type::FALSE:
case TP_Type::NONE:
*changed = false;
return existing;
default:
assert(false);
}
break;
default:
assert(false);
}
}
bool DecompilerTypeSystem::tp_lca(TypeState* combined, const TypeState& add) {
bool result = false;
for (int i = 0; i < 32; i++) {
bool diff = false;
auto new_type = tp_lca(combined->gpr_types[i], add.gpr_types[i], &diff);
if (diff) {
result = true;
combined->gpr_types[i] = new_type;
}
}
for (int i = 0; i < 32; i++) {
bool diff = false;
auto new_type = tp_lca(combined->fpr_types[i], add.fpr_types[i], &diff);
if (diff) {
result = true;
combined->fpr_types[i] = new_type;
}
}
return result;
}

View file

@ -3,6 +3,20 @@
#include "common/type_system/TypeSystem.h"
struct TP_Type {
enum Kind { OBJECT_OF_TYPE, TYPE_OBJECT, FALSE, NONE } kind = NONE;
// in the case that we are type_object, just store the type name in a single arg ts.
TypeSpec ts;
std::string print() const;
};
struct TypeState {
TP_Type gpr_types[32];
TP_Type fpr_types[32];
std::string print_gpr_masked(u32 mask) const;
};
class DecompilerTypeSystem {
public:
DecompilerTypeSystem();
@ -25,15 +39,14 @@ class DecompilerTypeSystem {
}
void add_symbol(const std::string& name, const TypeSpec& type_spec);
void parse_type_defs(const std::vector<std::string>& file_path);
void add_type_flags(const std::string& name, u64 flags);
void add_type_parent(const std::string& child, const std::string& parent);
std::string dump_symbol_types();
std::string lookup_parent_from_inspects(const std::string& child) const;
bool lookup_flags(const std::string& type, u64* dest) const;
TP_Type tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed);
bool tp_lca(TypeState* combined, const TypeState& add);
};
#endif // JAK_DECOMPILERTYPESYSTEM_H