add config option for changing cond splitting behavior (#522)

This commit is contained in:
water111 2021-05-24 19:52:19 -04:00 committed by GitHub
parent d719bf5677
commit 0f0902eabf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 115 additions and 46 deletions

View file

@ -376,17 +376,18 @@ Object Reader::read_list(TextStream& ts, bool expect_close_paren) {
auto tok = get_next_token(ts);
// reader macro thing:
bool got_reader_macro = false;
std::string reader_macro_string;
std::vector<std::string> reader_macro_string_stack;
auto kv = reader_macros.find(tok.text);
if (kv != reader_macros.end()) {
// we found a reader macro! Remember this, and get the next token.
got_reader_macro = true;
reader_macro_string = kv->second;
tok = get_next_token(ts);
while (kv != reader_macros.end()) {
// build a stack of reader macros to apply to this form.
reader_macro_string_stack.push_back(kv->second);
tok = get_next_token(ts);
kv = reader_macros.find(tok.text);
}
} else {
// no reader macro
// only look for the dot when we aren't following a quote.
// this makes '. work.
if (tok.text == ".") {
// list dot notation (ex, (1 . 2))
if (got_dot) {
@ -402,17 +403,22 @@ Object Reader::read_list(TextStream& ts, bool expect_close_paren) {
}
// inserter function, used to properly insert a next object
auto insert_object = [&](Object o) {
auto insert_object = [&](const Object& o) {
if (got_thing_after_dot) {
throw_reader_error(ts, "A list cannot have multiple entries after the dot", -1);
}
// create child list if we got a reader macro (ex 'x -> (quote x))
if (got_reader_macro) {
objects.push_back(
build_list({SymbolObject::make_new(symbolTable, reader_macro_string), o}));
} else {
if (reader_macro_string_stack.empty()) {
objects.push_back(o);
} else {
Object to_push_back = o;
while (!reader_macro_string_stack.empty()) {
to_push_back =
build_list({SymbolObject::make_new(symbolTable, reader_macro_string_stack.back()),
to_push_back});
reader_macro_string_stack.pop_back();
}
objects.push_back(to_push_back);
}
// remember if we got an object after the dot

View file

@ -1124,7 +1124,7 @@ bool is_found_after(CfgVtx* a, CfgVtx* b) {
} // namespace
bool ControlFlowGraph::find_cond_w_else() {
bool ControlFlowGraph::find_cond_w_else(const CondWithElseLengthHack& hack) {
bool found = false;
for_each_top_level_vtx([&](CfgVtx* vtx) {
@ -1261,6 +1261,13 @@ bool ControlFlowGraph::find_cond_w_else() {
}
}
auto hack_lookup = hack.max_length_by_start_block.find(c0->to_form().print());
if (hack_lookup != hack.max_length_by_start_block.end()) {
if ((int)entries.size() > hack_lookup->second) {
return true;
}
}
// now we need to add it
// printf("got cwe\n");
auto new_cwe = alloc<CondWithElse>();
@ -1967,7 +1974,10 @@ CfgVtx::DelaySlotKind get_delay_slot(const Instruction& i) {
/*!
* Build and resolve a Control Flow Graph as much as possible.
*/
std::shared_ptr<ControlFlowGraph> build_cfg(const LinkedObjectFile& file, int seg, Function& func) {
std::shared_ptr<ControlFlowGraph> build_cfg(const LinkedObjectFile& file,
int seg,
Function& func,
const CondWithElseLengthHack& cond_with_else_hack) {
// fmt::print("START {}\n", func.guessed_name.to_string());
auto cfg = std::make_shared<ControlFlowGraph>();
@ -2121,7 +2131,7 @@ std::shared_ptr<ControlFlowGraph> build_cfg(const LinkedObjectFile& file, int se
// todo - should we lower the priority of the conds?
changed = changed || cfg->find_cond_w_else();
changed = changed || cfg->find_cond_w_else(cond_with_else_hack);
changed = changed || cfg->find_while_loop_top_level();
changed = changed || cfg->find_seq_top_level();

View file

@ -317,7 +317,7 @@ class ControlFlowGraph {
void link_fall_through(BlockVtx* first, BlockVtx* second, std::vector<BasicBlock>& blocks);
void link_fall_through_likely(BlockVtx* first, BlockVtx* second, std::vector<BasicBlock>& blocks);
void link_branch(BlockVtx* first, BlockVtx* second, std::vector<BasicBlock>& blocks);
bool find_cond_w_else();
bool find_cond_w_else(const CondWithElseLengthHack& hacks);
bool find_cond_w_empty_else();
bool find_cond_n_else();
@ -382,6 +382,9 @@ class ControlFlowGraph {
class LinkedObjectFile;
class Function;
std::shared_ptr<ControlFlowGraph> build_cfg(const LinkedObjectFile& file, int seg, Function& func);
std::shared_ptr<ControlFlowGraph> build_cfg(const LinkedObjectFile& file,
int seg,
Function& func,
const CondWithElseLengthHack& cond_with_else_hack);
} // namespace decompiler
#endif // JAK_DISASSEMBLER_CFGVTX_H

View file

@ -717,7 +717,7 @@ void ObjectFileDB::analyze_functions_ir1(const Config& config) {
// run analysis
// build a control flow graph, just looking at branch instructions.
func.cfg = build_cfg(data.linked_data, segment_id, func);
func.cfg = build_cfg(data.linked_data, segment_id, func, {});
// convert individual basic blocks to sequences of IR Basic Ops
for (auto& block : func.basic_blocks) {

View file

@ -69,7 +69,7 @@ class ObjectFileDB {
void analyze_functions_ir2(const std::string& output_dir, const Config& config);
void ir2_top_level_pass(const Config& config);
void ir2_stack_spill_slot_pass();
void ir2_basic_block_pass();
void ir2_basic_block_pass(const Config& config);
void ir2_atomic_op_pass(const Config& config);
void ir2_type_analysis_pass(const Config& config);
void ir2_register_usage_pass();
@ -80,8 +80,8 @@ class ObjectFileDB {
void ir2_insert_lets();
void ir2_rewrite_inline_asm_instructions();
void ir2_insert_anonymous_functions();
void ir2_write_results(const std::string& output_dir);
std::string ir2_to_file(ObjectFileData& data);
void ir2_write_results(const std::string& output_dir, const Config& config);
std::string ir2_to_file(ObjectFileData& data, const Config& config);
std::string ir2_function_to_string(ObjectFileData& data, Function& function, int seg);
std::string ir2_final_out(ObjectFileData& data,
const std::unordered_set<std::string>& skip_functions = {});

View file

@ -34,7 +34,7 @@ void ObjectFileDB::analyze_functions_ir2(const std::string& output_dir, const Co
lg::info("Processing top-level functions...");
ir2_top_level_pass(config);
lg::info("Processing basic blocks and control flow graph...");
ir2_basic_block_pass();
ir2_basic_block_pass(config);
lg::info("Finding stack spills...");
ir2_stack_spill_slot_pass();
lg::info("Converting to atomic ops...");
@ -63,7 +63,7 @@ void ObjectFileDB::analyze_functions_ir2(const std::string& output_dir, const Co
if (!output_dir.empty()) {
lg::info("Writing results...");
ir2_write_results(output_dir);
ir2_write_results(output_dir, config);
}
}
@ -168,7 +168,7 @@ void ObjectFileDB::ir2_top_level_pass(const Config& config) {
* - Analyze prologue and epilogue
* - Build control flow graph
*/
void ObjectFileDB::ir2_basic_block_pass() {
void ObjectFileDB::ir2_basic_block_pass(const Config& config) {
Timer timer;
// Main Pass over each function...
int total_basic_blocks = 0;
@ -206,7 +206,13 @@ void ObjectFileDB::ir2_basic_block_pass() {
// run analysis
// build a control flow graph, just looking at branch instructions.
func.cfg = build_cfg(data.linked_data, segment_id, func);
CondWithElseLengthHack hack;
auto lookup =
config.hacks.cond_with_else_len_by_func_name.find(func.guessed_name.to_string());
if (lookup != config.hacks.cond_with_else_len_by_func_name.end()) {
hack = lookup->second;
}
func.cfg = build_cfg(data.linked_data, segment_id, func, hack);
if (!func.cfg->is_fully_resolved()) {
lg::warn("Function {} from {} failed to build control flow graph!",
func.guessed_name.to_string(), data.to_unique_name());
@ -543,7 +549,7 @@ void ObjectFileDB::ir2_insert_anonymous_functions() {
lg::info("Inserted {} anonymous functions in {:.2f} ms\n", total, timer.getMs());
}
void ObjectFileDB::ir2_write_results(const std::string& output_dir) {
void ObjectFileDB::ir2_write_results(const std::string& output_dir, const Config& config) {
Timer timer;
lg::info("Writing IR2 results to file...");
int total_files = 0;
@ -552,7 +558,7 @@ void ObjectFileDB::ir2_write_results(const std::string& output_dir) {
if (obj.linked_data.has_any_functions()) {
// todo
total_files++;
auto file_text = ir2_to_file(obj);
auto file_text = ir2_to_file(obj, config);
total_bytes += file_text.length();
auto file_name = file_util::combine_path(output_dir, obj.to_unique_name() + "_ir2.asm");
file_util::write_text_file(file_name, file_text);
@ -566,7 +572,7 @@ void ObjectFileDB::ir2_write_results(const std::string& output_dir) {
timer.getMs());
}
std::string ObjectFileDB::ir2_to_file(ObjectFileData& data) {
std::string ObjectFileDB::ir2_to_file(ObjectFileData& data, const Config& config) {
std::string result;
const char* segment_names[] = {"main segment", "debug segment", "top-level segment"};
@ -627,6 +633,12 @@ std::string ObjectFileDB::ir2_to_file(ObjectFileData& data) {
}
}
// print if it exists, even if it's not okay.
if (config.print_cfgs && func.cfg) {
result += fmt::format("Control Flow Graph:\n{}\n\n",
pretty_print::to_string(func.cfg->to_form()));
}
if (false && func.ir2.print_debug_forms) {
result += '\n';
result += ";; DEBUG OUTPUT BELOW THIS LINE:\n";

View file

@ -419,7 +419,9 @@ SSA make_rc_ssa(const Function& function, const RegUsageInfo& rui, const Functio
if (succ != -1) {
for (auto reg : end_op_info.live) {
// only update phis for variables that are actually live at the next block.
ssa.add_source_to_phi(succ, reg, current_regs.at(reg));
if (reg.get_kind() != Reg::VF) {
ssa.add_source_to_phi(succ, reg, current_regs.at(reg));
}
}
}
}

View file

@ -49,6 +49,7 @@ Config read_config_file(const std::string& path_to_config_file) {
config.hexdump_code = cfg.at("hexdump_code").get<bool>();
config.hexdump_data = cfg.at("hexdump_data").get<bool>();
config.dump_objs = cfg.at("dump_objs").get<bool>();
config.print_cfgs = cfg.at("print_cfgs").get<bool>();
auto allowed = cfg.at("allowed_objects").get<std::vector<std::string>>();
for (const auto& x : allowed) {
@ -142,6 +143,14 @@ Config read_config_file(const std::string& path_to_config_file) {
hacks_json.at("no_type_analysis_functions_by_name").get<std::unordered_set<std::string>>();
config.hacks.types_with_bad_inspect_methods =
hacks_json.at("types_with_bad_inspect_methods").get<std::unordered_set<std::string>>();
for (auto& entry : hacks_json.at("cond_with_else_max_lengths")) {
auto func_name = entry.at(0).get<std::string>();
auto cond_name = entry.at(1).get<std::string>();
auto max_len = entry.at(2).get<int>();
config.hacks.cond_with_else_len_by_func_name[func_name].max_length_by_start_block[cond_name] =
max_len;
}
return config;
}

View file

@ -42,12 +42,17 @@ struct StackVariableHint {
int stack_offset = 0; // where it's located on the stack (relative to sp after prologue)
};
struct CondWithElseLengthHack {
std::unordered_map<std::string, int> max_length_by_start_block;
};
struct DecompileHacks {
std::unordered_set<std::string> types_with_bad_inspect_methods;
std::unordered_set<std::string> no_type_analysis_functions_by_name;
std::unordered_set<std::string> hint_inline_assembly_functions;
std::unordered_set<std::string> asm_functions_by_name;
std::unordered_set<std::string> pair_functions_by_name;
std::unordered_map<std::string, CondWithElseLengthHack> cond_with_else_len_by_func_name;
};
struct Config {
@ -71,6 +76,7 @@ struct Config {
bool hexdump_code = false;
bool hexdump_data = false;
bool dump_objs = false;
bool print_cfgs = false;
std::unordered_set<std::string> allowed_objects;
std::unordered_map<std::string, std::unordered_map<int, std::vector<TypeCast>>>

View file

@ -52,6 +52,8 @@
"hexdump_data": false,
// dump raw obj files
"dump_objs": false,
// print control flow graph
"print_cfgs": false,
////////////////////////////
// CONFIG FILES

View file

@ -12,6 +12,13 @@
"no_type_analysis_functions_by_name": [],
// this limits the number of cases in a cond. The first argument is the name of the function.
// the second argument is the name of the first condition in the cond. Use print_cfg to find it out.
// The third argument is the number of cases. If you set it too small it may fail to build the CFG.
"cond_with_else_max_lengths":[
["(method 20 res-lump)", "b0", 2]
],
// this provides a hint to the decompiler that these functions will have a lot of inline assembly.
"hint_inline_assembly_functions": ["matrix-transpose!"],

View file

@ -40,12 +40,10 @@
- The `&+` form now works on `inline-array` and `structure`.
- In the case where the type system would use a result type of `lca(none, x)`, the result type is now `none` instead of compiler abort.
- The "none value" is now `(none)` instead of `none`
- Creating a field of 128-bit value type no longer causes a compiler crash
- 128-bit fields are inspected as `<cannot-print>`
- Static fields can now contain floating point values
- Fixed a bug where loading a float from an object and immediately using it math would cause a compiler crash
- Arrays of value types can be created on the stack with `new`.
## V0.2
@ -158,4 +156,6 @@
- Support dynamic construction of 128-bit bitfield values
## V0.8 New Calling Convention for 128-bit
- 128-bit values may now be used in function arguments and return values.
- 128-bit values may now be used in function arguments and return values.
- Fixed a bug where reader errors in `goal-lib.gc` or any error in `goos-lib.gs` would cause a crash
- Fixed a bug where `''a` or similar repeated reader macros would generate a reader error.

View file

@ -46,19 +46,23 @@ int main(int argc, char** argv) {
lg::info("OpenGOAL Compiler {}.{}", versions::GOAL_VERSION_MAJOR, versions::GOAL_VERSION_MINOR);
// Init REPL
std::unique_ptr<Compiler> compiler = std::make_unique<Compiler>();
if (argument.empty()) {
ReplStatus status = ReplStatus::WANT_RELOAD;
while (status == ReplStatus::WANT_RELOAD) {
compiler = std::make_unique<Compiler>(std::make_unique<ReplWrapper>());
status = compiler->execute_repl(auto_listen);
if (status == ReplStatus::WANT_RELOAD) {
fmt::print("Reloading compiler...\n");
// the compiler may throw an exception if it fails to load its standard library.
try {
std::unique_ptr<Compiler> compiler = std::make_unique<Compiler>();
if (argument.empty()) {
ReplStatus status = ReplStatus::WANT_RELOAD;
while (status == ReplStatus::WANT_RELOAD) {
compiler = std::make_unique<Compiler>(std::make_unique<ReplWrapper>());
status = compiler->execute_repl(auto_listen);
if (status == ReplStatus::WANT_RELOAD) {
fmt::print("Reloading compiler...\n");
}
}
} else {
compiler->run_front_end_on_string(argument);
}
} else {
compiler->run_front_end_on_string(argument);
} catch (std::exception& e) {
fmt::print("Compiler Fatal Error: {}\n", e.what());
}
return 0;

View file

@ -149,7 +149,7 @@ std::unique_ptr<FormRegressionTest::TestData> FormRegressionTest::make_function(
// analyze function prologue/epilogue
test->func.analyze_prologue(test->file);
// build control flow graph
test->func.cfg = build_cfg(test->file, 0, test->func);
test->func.cfg = build_cfg(test->file, 0, test->func, {});
EXPECT_TRUE(test->func.cfg->is_fully_resolved());
if (!test->func.cfg->is_fully_resolved()) {
fmt::print("CFG:\n{}\n", test->func.cfg->to_dot());

View file

@ -1255,6 +1255,14 @@ TEST(GoosSpecialForms, Quote) {
}
}
TEST(GoosSpecialForms, DoubleQuote) {
Interpreter i;
e(i, "(define x ''y)");
EXPECT_EQ(e(i, "x"), "(quote y)");
e(i, "(define x `'`y)");
EXPECT_EQ(e(i, "x"), "(quote (quasiquote y))");
}
TEST(GoosSpecialForms, QuasiQuote) {
Interpreter i;
e(i, "(define x 'y)");