jak-project/test/offline/offline_test_main.cpp
water111 ddd60fca48
[decompiler] handle pointer to symbol value, clean up prints on offline test (#1978)
- fix issue described in
https://github.com/open-goal/jak-project/issues/1939
- fix `text`, which was manually patched with the wrong offset (was
reading the symbol value off by one byte)
- clean up some random useless prints
- make the offline tests keep trying if there's a comparison error,
clean up the output a bit so the diffs are all at the end.
2022-10-16 18:19:59 -04:00

602 lines
20 KiB
C++

#include <future>
#include <memory>
#include <string>
#include <unordered_set>
#include <vector>
#include "common/common_types.h"
#include "common/log/log.h"
#include "common/util/FileUtil.h"
#include "common/util/Timer.h"
#include "common/util/diff.h"
#include "common/util/json_util.h"
#include <common/util/unicode_util.h>
#include "decompiler/ObjectFile/ObjectFileDB.h"
#include "goalc/compiler/Compiler.h"
#include "third-party/CLI11.hpp"
#include "third-party/fmt/format.h"
// json config file data (previously was in source of offline_test_main.cpp)
struct OfflineTestConfig {
std::vector<std::string> dgos;
std::unordered_set<std::string> skip_compile_files;
std::unordered_set<std::string> skip_compile_functions;
std::unordered_map<std::string, std::unordered_set<std::string>> skip_compile_states;
};
struct DecompilerFile {
fs::path path;
std::string name_in_dgo;
std::string unique_name;
std::string reference;
};
struct DecompilerArtFile {
std::string name_in_dgo;
std::string unique_name;
};
struct Decompiler {
std::unique_ptr<decompiler::ObjectFileDB> db;
std::unique_ptr<decompiler::Config> config;
};
// TODO - this should probably go somewhere common when it's needed eventually
std::unordered_map<std::string, std::string> game_name_to_config = {
{"jak1", "jak1_ntsc_black_label.jsonc"},
{"jak2", "jak2_ntsc_v1.jsonc"}};
// TODO - i think these should be partitioned by game name instead of it being in the filename
// (and the names not being consistent)
std::unordered_map<std::string, std::string> game_name_to_all_types = {
{"jak1", "all-types.gc"},
{"jak2", "jak2/all-types.gc"}};
Decompiler setup_decompiler(const std::vector<DecompilerFile>& files,
const std::vector<DecompilerArtFile>& art_files,
const fs::path& iso_data_path,
const OfflineTestConfig& offline_config,
const std::string& game_name) {
// TODO - pull out extractor logic to determine release into common and use here
Decompiler dc;
decompiler::init_opcode_info();
dc.config = std::make_unique<decompiler::Config>(decompiler::read_config_file(
(file_util::get_jak_project_dir() / "decompiler" / "config" / game_name_to_config[game_name])
.string()));
// modify the config
std::unordered_set<std::string> object_files;
for (auto& file : files) {
object_files.insert(file.name_in_dgo); // todo, make this work with unique_name
}
for (auto& file : art_files) {
object_files.insert(file.unique_name);
}
dc.config->allowed_objects = object_files;
// don't try to do this because we can't write the file
dc.config->generate_symbol_definition_map = false;
std::vector<fs::path> dgo_paths;
for (auto& x : offline_config.dgos) {
dgo_paths.push_back(iso_data_path / x);
}
dc.db = std::make_unique<decompiler::ObjectFileDB>(dgo_paths, dc.config->obj_file_name_map_file,
std::vector<fs::path>{},
std::vector<fs::path>{}, *dc.config);
std::unordered_set<std::string> db_files;
for (auto& files_by_name : dc.db->obj_files_by_name) {
for (auto& f : files_by_name.second) {
db_files.insert(f.to_unique_name());
}
}
if (db_files.size() != files.size() + art_files.size()) {
lg::error("DB file error: {} {} {}", db_files.size(), files.size(), art_files.size());
for (auto& f : files) {
if (!db_files.count(f.unique_name)) {
lg::error(
"didn't find {}, make sure it's part of the DGO inputs and not in the banned objects "
"list\n",
f.unique_name);
}
}
for (auto& f : art_files) {
if (!db_files.count(f.unique_name)) {
lg::error("didn't find {}\n", f.unique_name);
}
}
exit(1);
}
return dc;
}
void disassemble(Decompiler& dc) {
dc.db->process_link_data(*dc.config);
dc.db->find_code(*dc.config);
dc.db->process_labels();
}
void decompile(Decompiler& dc, const OfflineTestConfig& config) {
dc.db->extract_art_info();
dc.db->ir2_top_level_pass(*dc.config);
dc.db->analyze_functions_ir2({}, *dc.config, config.skip_compile_functions,
config.skip_compile_states);
}
/// @brief Removes trailing new-lines and comment lines
std::string clean_decompilation_code(const std::string& in) {
std::vector<std::string> lines = split_string(in);
// Remove all lines that are comments
// comments are added only by us, meaning this _should_ be consistent
std::vector<std::string>::iterator line_itr = lines.begin();
while (line_itr != lines.end()) {
if (line_itr->rfind(";", 0) == 0) {
// remove comment line
line_itr = lines.erase(line_itr);
} else {
// iterate
line_itr++;
}
}
std::string out = fmt::format("{}", fmt::join(lines, "\n"));
while (!out.empty() && out.back() == '\n') {
out.pop_back();
}
return out;
}
decompiler::ObjectFileData& get_data(Decompiler& dc,
const std::string& unique_name,
const std::string& name_in_dgo) {
auto& files = dc.db->obj_files_by_name.at(name_in_dgo);
auto it = std::find_if(files.begin(), files.end(), [&](const decompiler::ObjectFileData& data) {
return data.to_unique_name() == unique_name;
});
ASSERT(it != files.end());
return *it;
}
int line_count(const std::string& str) {
int result = 0;
for (auto& c : str) {
if (c == '\n') {
result++;
}
}
return result;
}
struct CompareResult {
struct Fail {
std::string filename;
std::string diff;
};
std::vector<Fail> failing_files;
int total_files = 0;
int ok_files = 0;
int total_lines = 0;
bool total_pass = true;
void add(const CompareResult& other) {
failing_files.insert(failing_files.end(), other.failing_files.begin(),
other.failing_files.end());
total_files += other.total_files;
ok_files += other.ok_files;
total_lines += other.total_lines;
if (!other.total_pass) {
total_pass = false;
}
}
};
CompareResult compare(Decompiler& dc, const std::vector<DecompilerFile>& refs, bool dump_mode) {
CompareResult compare_result;
for (const auto& file : refs) {
auto& data = get_data(dc, file.unique_name, file.name_in_dgo);
std::string result = clean_decompilation_code(data.full_output);
std::string ref = clean_decompilation_code(file_util::read_text_file(file.path.string()));
compare_result.total_files++;
compare_result.total_lines += line_count(result);
if (result != ref) {
compare_result.failing_files.push_back({file.unique_name, diff_strings(ref, result)});
compare_result.total_pass = false;
if (dump_mode) {
auto failure_dir = file_util::get_jak_project_dir() / "failures";
file_util::create_dir_if_needed(failure_dir);
file_util::write_text_file(failure_dir / fmt::format("{}_REF.gc", file.unique_name),
result);
}
} else {
compare_result.ok_files++;
}
}
return compare_result;
}
struct CompileResult {
bool ok = true;
struct Fail {
std::string filename;
std::string error;
};
std::vector<Fail> failing_files;
int num_lines = 0;
void add(const CompileResult& other) {
failing_files.insert(failing_files.end(), other.failing_files.begin(),
other.failing_files.end());
num_lines += other.num_lines;
if (!other.ok) {
ok = false;
}
}
};
CompileResult compile(Decompiler& dc,
const std::vector<DecompilerFile>& refs,
const OfflineTestConfig& config,
const std::string& game_name) {
CompileResult result;
Compiler compiler(game_name_to_version(game_name));
compiler.run_front_end_on_file({"decompiler", "config", game_name_to_all_types[game_name]});
compiler.run_front_end_on_file(
{"test", "decompiler", "reference", game_name, "decompiler-macros.gc"});
int total_lines = 0;
for (const auto& file : refs) {
if (config.skip_compile_files.count(file.name_in_dgo)) {
fmt::print("Skipping {}\n", file.name_in_dgo);
continue;
}
fmt::print("Compiling {}...\n", file.unique_name);
auto& data = get_data(dc, file.unique_name, file.name_in_dgo);
try {
const auto& src = data.output_with_skips;
total_lines += line_count(src);
compiler.run_full_compiler_on_string_no_save(src, file.name_in_dgo);
} catch (const std::exception& e) {
result.ok = false;
result.failing_files.push_back({file.name_in_dgo, e.what()});
}
}
result.num_lines = total_lines;
return result;
}
std::vector<DecompilerArtFile> find_art_files(const std::string& game_name,
const std::vector<std::string>& dgos) {
std::vector<DecompilerArtFile> result;
// use the all_objs.json file to place them in the correct build order
auto obj_json = parse_commented_json(
file_util::read_text_file(
(file_util::get_jak_project_dir() / "goal_src" / game_name / "build" / "all_objs.json")
.string()),
"all_objs.json");
for (const auto& x : obj_json) {
auto unique_name = x[0].get<std::string>();
auto version = x[2].get<int>();
std::vector<std::string> dgoList = x[3].get<std::vector<std::string>>();
if (version == 4) {
bool skip_this = false;
// Check to see if we've included atleast one of the DGO/CGOs in our hardcoded list
// If not BLOW UP
bool dgoValidated = false;
for (int i = 0; i < (int)dgoList.size(); i++) {
std::string& dgo = dgoList.at(i);
if (dgo == "NO-XGO") {
skip_this = true;
break;
}
// can either be in the DGO or CGO folder, and can either end with .CGO or .DGO
// TODO - Jak 2 Folder structure will be different!
if (std::find(dgos.begin(), dgos.end(), fmt::format("DGO/{}.DGO", dgo)) != dgos.end() ||
std::find(dgos.begin(), dgos.end(), fmt::format("DGO/{}.CGO", dgo)) != dgos.end() ||
std::find(dgos.begin(), dgos.end(), fmt::format("CGO/{}.DGO", dgo)) != dgos.end() ||
std::find(dgos.begin(), dgos.end(), fmt::format("CGO/{}.CGO", dgo)) != dgos.end()) {
dgoValidated = true;
}
}
if (skip_this) {
continue;
}
if (!dgoValidated) {
lg::error(
"File [{}] is in the following DGOs [{}], and not one of these is in our list! Add "
"it!",
unique_name, fmt::join(dgoList, ", "));
exit(1);
}
DecompilerArtFile file;
file.unique_name = unique_name;
file.name_in_dgo = x[1];
result.push_back(file);
}
}
return result;
}
std::vector<DecompilerFile> find_files(const std::string& game_name,
const std::vector<std::string>& dgos,
const std::string& single_file) {
std::vector<DecompilerFile> result;
auto base_dir =
file_util::get_jak_project_dir() / "test" / "decompiler" / "reference" / game_name;
auto ref_file_paths = file_util::find_files_recursively(base_dir, std::regex(".*_REF\\..*"));
std::unordered_map<std::string, fs::path> ref_file_names = {};
for (const auto& path : ref_file_paths) {
auto ref_name = path.filename().replace_extension().string();
ref_name.erase(ref_name.begin() + ref_name.find("_REF"), ref_name.end());
if (single_file.empty() || ref_name == single_file) {
ref_file_names[ref_name] = path;
}
}
lg::info("Found {} reference files", ref_file_paths.size());
// use the all_objs.json file to place them in the correct build order
auto obj_json = parse_commented_json(
file_util::read_text_file(
(file_util::get_jak_project_dir() / "goal_src" / game_name / "build" / "all_objs.json")
.string()),
"all_objs.json");
std::unordered_set<std::string> matched_files;
for (auto& x : obj_json) {
auto unique_name = x[0].get<std::string>();
std::vector<std::string> dgoList = x[3].get<std::vector<std::string>>();
auto it = ref_file_names.find(unique_name);
if (it != ref_file_names.end()) {
// Check to see if we've included atleast one of the DGO/CGOs in our hardcoded list
// If not BLOW UP
bool dgoValidated = false;
for (int i = 0; i < (int)dgoList.size(); i++) {
std::string& dgo = dgoList.at(i);
// can either be in the DGO or CGO folder, and can either end with .CGO or .DGO
// TODO - Jak 2 Folder structure will be different!
if (std::find(dgos.begin(), dgos.end(), fmt::format("DGO/{}.DGO", dgo)) != dgos.end() ||
std::find(dgos.begin(), dgos.end(), fmt::format("DGO/{}.CGO", dgo)) != dgos.end() ||
std::find(dgos.begin(), dgos.end(), fmt::format("CGO/{}.DGO", dgo)) != dgos.end() ||
std::find(dgos.begin(), dgos.end(), fmt::format("CGO/{}.CGO", dgo)) != dgos.end()) {
dgoValidated = true;
}
}
if (!dgoValidated) {
lg::error(
"File [{}] is in the following DGOs [{}], and not one of these is in our list! Add "
"it!",
unique_name, fmt::join(dgoList, ", "));
exit(1);
}
DecompilerFile file;
file.path = it->second;
file.unique_name = it->first;
file.name_in_dgo = x[1];
result.push_back(file);
matched_files.insert(unique_name);
}
}
if (matched_files.size() != ref_file_names.size()) {
lg::error("Some REF files were not matched to files in all_objs.json:");
for (const auto& [path, flag] : ref_file_names) {
if (matched_files.count(path) == 0) {
lg::error("- '{}'", path);
}
}
exit(1);
}
return result;
}
/*!
* Read and parse the json config file, config.json, located in test/offline
*/
std::optional<OfflineTestConfig> parse_config(const std::string_view& game_name) {
lg::info("Reading Configuration...");
auto json_file_path =
file_util::get_jak_project_dir() / "test" / "offline" / "config" / game_name / "config.jsonc";
if (!fs::exists(json_file_path)) {
lg::error("Couldn't load configuration, '{}' doesn't exist", json_file_path.string());
return {};
}
auto json = parse_commented_json(file_util::read_text_file(json_file_path.string()),
json_file_path.string());
OfflineTestConfig result;
result.dgos = json["dgos"].get<std::vector<std::string>>();
result.skip_compile_files = json["skip_compile_files"].get<std::unordered_set<std::string>>();
result.skip_compile_functions =
json["skip_compile_functions"].get<std::unordered_set<std::string>>();
result.skip_compile_states =
json["skip_compile_states"]
.get<std::unordered_map<std::string, std::unordered_set<std::string>>>();
return std::make_optional(result);
}
/// @brief A simple struct to contain the reason for failure from a thread
struct OfflineTestResult {
int exit_code = 0;
std::string reason;
float time_spent_compiling = 0;
float time_spent_decompiling = 0;
float total_time = 0;
CompareResult compare;
CompileResult compile;
void add(const OfflineTestResult& other) {
if (other.exit_code) {
exit_code = other.exit_code;
}
time_spent_compiling += other.time_spent_compiling;
time_spent_decompiling += other.time_spent_decompiling;
total_time += other.total_time;
compare.add(other.compare);
compile.add(other.compile);
}
};
int main(int argc, char* argv[]) {
ArgumentGuard u8_guard(argc, argv);
lg::initialize();
bool dump_current_output = false;
std::string iso_data_path;
std::string game_name;
// Useful for testing in debug mode (dont have to wait for everything to finish)
int max_files = -1;
std::string single_file = "";
uint32_t num_threads = 1;
CLI::App app{"OpenGOAL - Offline Reference Test Runner"};
app.add_option("--iso_data_path", iso_data_path, "The path to the folder with the ISO data files")
->check(CLI::ExistingPath)
->required();
app.add_option("--game", game_name, "The game name, for example 'jak1'")->required();
app.add_flag("-d,--dump_current_output", dump_current_output,
"Output the current output to a folder, use in conjunction with the reference test "
"files update script");
app.add_option("-m,--max_files", max_files,
"Limit the amount of files ran in a single test, picks the first N");
app.add_option("-t,--num_threads", num_threads,
"The number of threads to partition the offline test work between");
app.add_option("-f,--file", single_file,
"Limit the offline test routine to a single file to decompile/compile -- useful "
"when you are just iterating on a single file");
app.validate_positionals();
CLI11_PARSE(app, argc, argv);
if (!file_util::setup_project_path(std::nullopt)) {
lg::error("Couldn't setup project path, tool is supposed to be ran in the jak-project repo!");
return 1;
}
auto config = parse_config(game_name);
if (!config.has_value()) {
return 1;
}
lg::info("Finding files...");
auto files = find_files(game_name, config->dgos, single_file);
if (max_files > 0 && max_files < (int)files.size()) {
files.erase(files.begin() + max_files, files.end());
}
std::vector<DecompilerArtFile> art_files;
if (game_name == "jak1") {
art_files = find_art_files(game_name, config->dgos);
}
// Create a bunch of threads to disassemble/decompile/compile the files
if (num_threads < 1) {
num_threads = 1;
} else if (num_threads > 1) {
num_threads = std::min(num_threads, std::thread::hardware_concurrency());
}
// First, prepare our batches of files to be processed
std::vector<std::vector<DecompilerFile>> work_groups = {};
for (size_t i = 0; i < num_threads; i++) {
work_groups.push_back({});
}
int total_added = 0;
for (auto& file : files) {
work_groups.at(total_added % num_threads).push_back(file);
total_added++;
}
// TODO - nicer printing, very messy with dozens of threads processing the job
// Now we create a thread to process each group of work, and then await them
std::vector<std::future<OfflineTestResult>> threads;
decompiler::init_opcode_info();
for (const auto& work_group : work_groups) {
threads.push_back(std::async(std::launch::async, [&]() {
OfflineTestResult result;
Timer total_timer;
Timer decompiler_timer;
auto decompiler = setup_decompiler(work_group, art_files, fs::path(iso_data_path),
config.value(), game_name);
disassemble(decompiler);
decompile(decompiler, config.value());
// It's about 100ms per file to decompile on average
// meaning that when we have all 900 files, a full offline test will take 1.5 minutes
result.time_spent_decompiling = decompiler_timer.getSeconds();
result.compare = compare(decompiler, work_group, dump_current_output);
if (!result.compare.total_pass) {
result.exit_code = 1;
}
Timer compile_timer;
result.compile = compile(decompiler, work_group, config.value(), game_name);
result.time_spent_compiling = compile_timer.getSeconds();
if (!result.compile.ok) {
result.exit_code = 1;
}
result.total_time = total_timer.getSeconds();
return result;
}));
}
// summarize results:
OfflineTestResult total;
for (auto& thread : threads) {
auto ret = thread.get();
total.add(ret);
}
if (!total.compare.total_pass) {
lg::error("Comparison failed.");
for (auto& f : total.compare.failing_files) {
fmt::print("{}\n", f.diff);
}
lg::error("Failing files:");
for (auto& f : total.compare.failing_files) {
lg::error("- {}", f.filename);
}
}
if (!total.compile.ok) {
for (auto& f : total.compile.failing_files) {
lg::error("{}", f.filename);
fmt::print("{}\n", f.error);
}
}
fmt::print("Compiled {} lines in {:.3f}s ({} lines/sec)\n", total.compile.num_lines,
total.time_spent_compiling,
(int)(total.compile.num_lines / total.time_spent_compiling));
if (!total.exit_code) {
fmt::print("pass!\n");
}
return total.exit_code;
}