2020-09-13 21:32:55 -04:00
|
|
|
#pragma once
|
|
|
|
|
2020-08-22 23:30:17 -04:00
|
|
|
/*!
|
|
|
|
* @file ObjectFileDB.h
|
|
|
|
* A "database" of object files found in DGO files.
|
|
|
|
* Eliminates duplicate object files, and also assigns unique names to all object files
|
|
|
|
* (there may be different object files with the same name sometimes)
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <unordered_map>
|
|
|
|
#include <vector>
|
2022-05-19 21:30:14 -04:00
|
|
|
#include "common/common_types.h"
|
2020-08-22 23:30:17 -04:00
|
|
|
#include "LinkedObjectFile.h"
|
2022-05-19 21:30:14 -04:00
|
|
|
#include "third-party/fmt/core.h"
|
2020-09-29 20:24:15 -04:00
|
|
|
#include "decompiler/util/DecompilerTypeSystem.h"
|
2021-12-04 12:33:18 -05:00
|
|
|
#include "decompiler/data/TextureDB.h"
|
2021-10-12 20:33:26 -04:00
|
|
|
#include "decompiler/analysis/symbol_def_map.h"
|
2022-02-08 19:02:47 -05:00
|
|
|
#include "common/util/Assert.h"
|
2020-08-22 23:30:17 -04:00
|
|
|
|
2021-01-06 20:04:15 -05:00
|
|
|
namespace decompiler {
|
2020-08-22 23:30:17 -04:00
|
|
|
/*!
|
|
|
|
* A "record" which can be used to identify an object file.
|
|
|
|
*/
|
|
|
|
struct ObjectFileRecord {
|
2020-10-24 14:27:50 -04:00
|
|
|
std::string name; // including -ag, not including dgo suffix
|
2020-08-22 23:30:17 -04:00
|
|
|
int version = -1;
|
|
|
|
uint32_t hash = 0;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*!
|
|
|
|
* All of the data for a single object file
|
|
|
|
*/
|
|
|
|
struct ObjectFileData {
|
|
|
|
std::vector<uint8_t> data; // raw bytes
|
|
|
|
LinkedObjectFile linked_data; // data including linking annotations
|
|
|
|
ObjectFileRecord record; // name
|
2020-09-03 20:11:31 -04:00
|
|
|
std::vector<std::string> dgo_names;
|
|
|
|
int obj_version = -1;
|
|
|
|
bool has_multiple_versions = false;
|
|
|
|
std::string name_in_dgo;
|
2020-10-24 14:27:50 -04:00
|
|
|
std::string name_from_map;
|
2020-09-03 20:11:31 -04:00
|
|
|
std::string to_unique_name() const;
|
2020-08-22 23:30:17 -04:00
|
|
|
uint32_t reference_count = 0; // number of times its used.
|
2021-10-12 20:33:26 -04:00
|
|
|
|
|
|
|
std::string full_output;
|
|
|
|
std::string output_with_skips;
|
2020-08-22 23:30:17 -04:00
|
|
|
};
|
|
|
|
|
2022-05-19 21:30:14 -04:00
|
|
|
/*!
|
|
|
|
* Stats structure for let rewriting.
|
|
|
|
*/
|
|
|
|
struct LetRewriteStats {
|
|
|
|
int dotimes;
|
|
|
|
int countdown;
|
|
|
|
int abs;
|
|
|
|
int abs2;
|
|
|
|
int unused;
|
|
|
|
int ja;
|
|
|
|
int case_no_else;
|
|
|
|
int case_with_else;
|
|
|
|
int set_vector;
|
|
|
|
int set_vector2;
|
|
|
|
int send_event;
|
|
|
|
|
|
|
|
int total() const {
|
|
|
|
return dotimes + countdown + abs + abs2 + unused + ja + case_no_else + case_with_else +
|
|
|
|
set_vector + set_vector2 + send_event;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-08-22 23:30:17 -04:00
|
|
|
class ObjectFileDB {
|
|
|
|
public:
|
2020-11-19 21:22:16 -05:00
|
|
|
ObjectFileDB(const std::vector<std::string>& _dgos,
|
|
|
|
const std::string& obj_file_name_map_file,
|
2020-11-21 15:58:51 -05:00
|
|
|
const std::vector<std::string>& object_files,
|
2021-05-11 20:49:54 -04:00
|
|
|
const std::vector<std::string>& str_files,
|
|
|
|
const Config& config);
|
2020-08-22 23:30:17 -04:00
|
|
|
std::string generate_dgo_listing();
|
2022-01-04 20:32:34 -05:00
|
|
|
std::string generate_obj_listing(const std::unordered_set<std::string>& merged_objs);
|
2021-05-11 20:49:54 -04:00
|
|
|
void process_link_data(const Config& config);
|
2020-08-22 23:30:17 -04:00
|
|
|
void process_labels();
|
2021-05-11 20:49:54 -04:00
|
|
|
void find_code(const Config& config);
|
2020-08-22 23:30:17 -04:00
|
|
|
void find_and_write_scripts(const std::string& output_dir);
|
2022-05-19 21:30:14 -04:00
|
|
|
void extract_art_info();
|
|
|
|
void dump_art_info(const std::string& output_dir);
|
2020-11-24 20:48:38 -05:00
|
|
|
void dump_raw_objects(const std::string& output_dir);
|
2020-08-22 23:30:17 -04:00
|
|
|
|
2021-05-11 19:19:23 -04:00
|
|
|
void write_object_file_words(const std::string& output_dir, bool dump_data, bool dump_code);
|
2020-11-27 16:38:36 -05:00
|
|
|
void write_disassembly(const std::string& output_dir,
|
2021-05-11 19:19:23 -04:00
|
|
|
bool disassemble_data,
|
2021-05-11 20:49:54 -04:00
|
|
|
bool disassemble_code,
|
|
|
|
bool print_hex);
|
2020-11-27 16:38:36 -05:00
|
|
|
|
2021-10-16 21:01:23 -04:00
|
|
|
void analyze_functions_ir2(
|
|
|
|
const std::string& output_dir,
|
|
|
|
const Config& config,
|
|
|
|
const std::unordered_set<std::string>& skip_functions,
|
|
|
|
const std::unordered_map<std::string, std::unordered_set<std::string>>& skip_states = {});
|
2021-05-11 20:49:54 -04:00
|
|
|
void ir2_top_level_pass(const Config& config);
|
2021-10-12 20:33:26 -04:00
|
|
|
void ir2_stack_spill_slot_pass(int seg, ObjectFileData& data);
|
|
|
|
void ir2_basic_block_pass(int seg, const Config& config, ObjectFileData& data);
|
|
|
|
void ir2_atomic_op_pass(int seg, const Config& config, ObjectFileData& data);
|
|
|
|
void ir2_type_analysis_pass(int seg, const Config& config, ObjectFileData& data);
|
|
|
|
void ir2_register_usage_pass(int seg, ObjectFileData& data);
|
|
|
|
void ir2_variable_pass(int seg, ObjectFileData& data);
|
|
|
|
void ir2_cfg_build_pass(int seg, ObjectFileData& data);
|
2022-01-04 20:32:34 -05:00
|
|
|
// void ir2_store_current_forms(int seg);
|
2021-10-12 20:33:26 -04:00
|
|
|
void ir2_build_expressions(int seg, const Config& config, ObjectFileData& data);
|
|
|
|
void ir2_insert_lets(int seg, ObjectFileData& data);
|
|
|
|
void ir2_rewrite_inline_asm_instructions(int seg, ObjectFileData& data);
|
|
|
|
void ir2_insert_anonymous_functions(int seg, ObjectFileData& data);
|
|
|
|
void ir2_symbol_definition_map(ObjectFileData& data);
|
2022-05-23 18:53:02 -04:00
|
|
|
void ir2_write_results(const std::string& output_dir,
|
|
|
|
const Config& config,
|
|
|
|
const std::vector<std::string>& imports,
|
|
|
|
ObjectFileData& data);
|
2021-10-12 20:33:26 -04:00
|
|
|
void ir2_do_segment_analysis_phase1(int seg, const Config& config, ObjectFileData& data);
|
|
|
|
void ir2_do_segment_analysis_phase2(int seg, const Config& config, ObjectFileData& data);
|
|
|
|
void ir2_setup_labels(const Config& config, ObjectFileData& data);
|
|
|
|
void ir2_run_mips2c(const Config& config, ObjectFileData& data);
|
2021-05-24 19:52:19 -04:00
|
|
|
std::string ir2_to_file(ObjectFileData& data, const Config& config);
|
2021-01-09 20:01:48 -05:00
|
|
|
std::string ir2_function_to_string(ObjectFileData& data, Function& function, int seg);
|
2021-03-03 15:42:55 -05:00
|
|
|
std::string ir2_final_out(ObjectFileData& data,
|
2022-05-23 18:53:02 -04:00
|
|
|
const std::vector<std::string>& imports,
|
|
|
|
const std::unordered_set<std::string>& skip_functions);
|
2021-01-09 20:01:48 -05:00
|
|
|
|
2021-12-04 12:33:18 -05:00
|
|
|
std::string process_tpages(TextureDB& tex_db);
|
2021-01-06 20:04:15 -05:00
|
|
|
std::string process_game_count_file();
|
2022-03-10 19:25:01 -05:00
|
|
|
std::string process_game_text_files(const Config& cfg);
|
2020-11-16 19:57:45 -05:00
|
|
|
|
2022-04-15 20:40:10 -04:00
|
|
|
const ObjectFileData& lookup_record(const ObjectFileRecord& rec) const;
|
2020-09-29 20:24:15 -04:00
|
|
|
DecompilerTypeSystem dts;
|
2020-08-22 23:30:17 -04:00
|
|
|
|
2021-01-10 20:46:49 -05:00
|
|
|
bool lookup_function_type(const FunctionName& name,
|
|
|
|
const std::string& obj_name,
|
2021-05-11 20:49:54 -04:00
|
|
|
const Config& config,
|
2021-01-10 20:46:49 -05:00
|
|
|
TypeSpec* result);
|
|
|
|
|
2021-02-06 09:54:23 -05:00
|
|
|
public:
|
2020-10-24 14:27:50 -04:00
|
|
|
void load_map_file(const std::string& map_data);
|
2021-05-11 20:49:54 -04:00
|
|
|
void get_objs_from_dgo(const std::string& filename, const Config& config);
|
2020-08-22 23:30:17 -04:00
|
|
|
void add_obj_from_dgo(const std::string& obj_name,
|
2020-09-03 20:11:31 -04:00
|
|
|
const std::string& name_in_dgo,
|
2020-11-21 15:58:51 -05:00
|
|
|
const uint8_t* obj_data,
|
2020-08-22 23:30:17 -04:00
|
|
|
uint32_t obj_size,
|
2021-05-11 20:49:54 -04:00
|
|
|
const std::string& dgo_name,
|
|
|
|
const Config& config);
|
2020-08-22 23:30:17 -04:00
|
|
|
|
|
|
|
/*!
|
|
|
|
* Apply f to all ObjectFileData's. Does it in the right order.
|
|
|
|
*/
|
|
|
|
template <typename Func>
|
|
|
|
void for_each_obj(Func f) {
|
2022-02-08 19:02:47 -05:00
|
|
|
ASSERT(obj_files_by_name.size() == obj_file_order.size());
|
2020-08-26 01:21:33 -04:00
|
|
|
for (const auto& name : obj_file_order) {
|
|
|
|
for (auto& obj : obj_files_by_name.at(name)) {
|
2021-10-15 18:17:51 -04:00
|
|
|
// lg::info("{}...", name);
|
2020-08-22 23:30:17 -04:00
|
|
|
f(obj);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*!
|
|
|
|
* Apply f to all functions
|
|
|
|
* takes (Function, segment, linked_data)
|
|
|
|
* Does it in the right order.
|
|
|
|
*/
|
|
|
|
template <typename Func>
|
|
|
|
void for_each_function(Func f) {
|
|
|
|
for_each_obj([&](ObjectFileData& data) {
|
|
|
|
for (int i = 0; i < int(data.linked_data.segments); i++) {
|
|
|
|
int fn = 0;
|
|
|
|
for (auto& goal_func : data.linked_data.functions_by_seg.at(i)) {
|
|
|
|
f(goal_func, i, data);
|
|
|
|
fn++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2020-10-24 22:51:40 -04:00
|
|
|
template <typename Func>
|
|
|
|
void for_each_function_def_order(Func f) {
|
|
|
|
for_each_obj([&](ObjectFileData& data) {
|
|
|
|
for (int i = 0; i < int(data.linked_data.segments); i++) {
|
|
|
|
int fn = 0;
|
|
|
|
for (size_t j = data.linked_data.functions_by_seg.at(i).size(); j-- > 0;) {
|
|
|
|
f(data.linked_data.functions_by_seg.at(i).at(j), i, data);
|
|
|
|
fn++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2021-10-12 20:33:26 -04:00
|
|
|
template <typename Func>
|
|
|
|
void for_each_function_def_order_in_obj(ObjectFileData& data, Func f) {
|
|
|
|
for (int i = 0; i < int(data.linked_data.segments); i++) {
|
|
|
|
int fn = 0;
|
|
|
|
for (size_t j = data.linked_data.functions_by_seg.at(i).size(); j-- > 0;) {
|
|
|
|
f(data.linked_data.functions_by_seg.at(i).at(j), i);
|
|
|
|
fn++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-17 20:54:03 -04:00
|
|
|
template <typename Func>
|
|
|
|
void for_each_function_in_seg(int seg, Func f) {
|
|
|
|
for_each_obj([&](ObjectFileData& data) {
|
|
|
|
int fn = 0;
|
2021-08-19 20:35:12 -04:00
|
|
|
if (data.linked_data.segments == 3) {
|
|
|
|
for (size_t j = data.linked_data.functions_by_seg.at(seg).size(); j-- > 0;) {
|
|
|
|
f(data.linked_data.functions_by_seg.at(seg).at(j), data);
|
|
|
|
fn++;
|
|
|
|
}
|
2021-08-17 20:54:03 -04:00
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2021-10-12 20:33:26 -04:00
|
|
|
template <typename Func>
|
|
|
|
void for_each_function_in_seg_in_obj(int seg, ObjectFileData& data, Func f) {
|
|
|
|
int fn = 0;
|
|
|
|
if (data.linked_data.segments == 3) {
|
|
|
|
for (size_t j = data.linked_data.functions_by_seg.at(seg).size(); j-- > 0;) {
|
|
|
|
f(data.linked_data.functions_by_seg.at(seg).at(j));
|
|
|
|
fn++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-22 23:30:17 -04:00
|
|
|
// Danger: after adding all object files, we assume that the vector never reallocates.
|
|
|
|
std::unordered_map<std::string, std::vector<ObjectFileData>> obj_files_by_name;
|
|
|
|
std::unordered_map<std::string, std::vector<ObjectFileRecord>> obj_files_by_dgo;
|
|
|
|
|
|
|
|
std::vector<std::string> obj_file_order;
|
2020-10-24 14:27:50 -04:00
|
|
|
std::unordered_map<std::string, std::unordered_map<std::string, std::string>> dgo_obj_name_map;
|
2020-08-22 23:30:17 -04:00
|
|
|
|
2021-10-12 20:33:26 -04:00
|
|
|
SymbolMapBuilder map_builder;
|
|
|
|
|
2020-08-22 23:30:17 -04:00
|
|
|
struct {
|
2022-05-19 21:30:14 -04:00
|
|
|
LetRewriteStats let;
|
2020-08-22 23:30:17 -04:00
|
|
|
uint32_t total_dgo_bytes = 0;
|
|
|
|
uint32_t total_obj_files = 0;
|
|
|
|
uint32_t unique_obj_files = 0;
|
|
|
|
uint32_t unique_obj_bytes = 0;
|
|
|
|
} stats;
|
|
|
|
};
|
2022-05-19 21:30:14 -04:00
|
|
|
|
|
|
|
std::string print_art_elt_for_dump(const std::string& group_name, const std::string& name, int idx);
|
2021-01-06 20:04:15 -05:00
|
|
|
} // namespace decompiler
|