From 0f0902eabf72df3f61c507a4edee772153edf2f5 Mon Sep 17 00:00:00 2001 From: water111 <48171810+water111@users.noreply.github.com> Date: Mon, 24 May 2021 19:52:19 -0400 Subject: [PATCH] add config option for changing cond splitting behavior (#522) --- common/goos/Reader.cpp | 34 +++++++++++-------- decompiler/Function/CfgVtx.cpp | 16 +++++++-- decompiler/Function/CfgVtx.h | 7 ++-- decompiler/ObjectFile/ObjectFileDB.cpp | 2 +- decompiler/ObjectFile/ObjectFileDB.h | 6 ++-- decompiler/ObjectFile/ObjectFileDB_IR2.cpp | 26 ++++++++++---- decompiler/analysis/variable_naming.cpp | 4 ++- decompiler/config.cpp | 9 +++++ decompiler/config.h | 6 ++++ decompiler/config/jak1_ntsc_black_label.jsonc | 2 ++ .../config/jak1_ntsc_black_label/hacks.jsonc | 7 ++++ docs/markdown/progress-notes/changelog.md | 6 ++-- goalc/main.cpp | 26 ++++++++------ test/decompiler/FormRegressionTest.cpp | 2 +- test/test_goos.cpp | 8 +++++ 15 files changed, 115 insertions(+), 46 deletions(-) diff --git a/common/goos/Reader.cpp b/common/goos/Reader.cpp index ef69a2352..bdc358d43 100644 --- a/common/goos/Reader.cpp +++ b/common/goos/Reader.cpp @@ -376,17 +376,18 @@ Object Reader::read_list(TextStream& ts, bool expect_close_paren) { auto tok = get_next_token(ts); // reader macro thing: - bool got_reader_macro = false; - - std::string reader_macro_string; + std::vector reader_macro_string_stack; auto kv = reader_macros.find(tok.text); if (kv != reader_macros.end()) { - // we found a reader macro! Remember this, and get the next token. - got_reader_macro = true; - reader_macro_string = kv->second; - tok = get_next_token(ts); + while (kv != reader_macros.end()) { + // build a stack of reader macros to apply to this form. + reader_macro_string_stack.push_back(kv->second); + tok = get_next_token(ts); + kv = reader_macros.find(tok.text); + } } else { - // no reader macro + // only look for the dot when we aren't following a quote. + // this makes '. work. if (tok.text == ".") { // list dot notation (ex, (1 . 2)) if (got_dot) { @@ -402,17 +403,22 @@ Object Reader::read_list(TextStream& ts, bool expect_close_paren) { } // inserter function, used to properly insert a next object - auto insert_object = [&](Object o) { + auto insert_object = [&](const Object& o) { if (got_thing_after_dot) { throw_reader_error(ts, "A list cannot have multiple entries after the dot", -1); } - // create child list if we got a reader macro (ex 'x -> (quote x)) - if (got_reader_macro) { - objects.push_back( - build_list({SymbolObject::make_new(symbolTable, reader_macro_string), o})); - } else { + if (reader_macro_string_stack.empty()) { objects.push_back(o); + } else { + Object to_push_back = o; + while (!reader_macro_string_stack.empty()) { + to_push_back = + build_list({SymbolObject::make_new(symbolTable, reader_macro_string_stack.back()), + to_push_back}); + reader_macro_string_stack.pop_back(); + } + objects.push_back(to_push_back); } // remember if we got an object after the dot diff --git a/decompiler/Function/CfgVtx.cpp b/decompiler/Function/CfgVtx.cpp index 2054bf983..3f17a9853 100644 --- a/decompiler/Function/CfgVtx.cpp +++ b/decompiler/Function/CfgVtx.cpp @@ -1124,7 +1124,7 @@ bool is_found_after(CfgVtx* a, CfgVtx* b) { } // namespace -bool ControlFlowGraph::find_cond_w_else() { +bool ControlFlowGraph::find_cond_w_else(const CondWithElseLengthHack& hack) { bool found = false; for_each_top_level_vtx([&](CfgVtx* vtx) { @@ -1261,6 +1261,13 @@ bool ControlFlowGraph::find_cond_w_else() { } } + auto hack_lookup = hack.max_length_by_start_block.find(c0->to_form().print()); + if (hack_lookup != hack.max_length_by_start_block.end()) { + if ((int)entries.size() > hack_lookup->second) { + return true; + } + } + // now we need to add it // printf("got cwe\n"); auto new_cwe = alloc(); @@ -1967,7 +1974,10 @@ CfgVtx::DelaySlotKind get_delay_slot(const Instruction& i) { /*! * Build and resolve a Control Flow Graph as much as possible. */ -std::shared_ptr build_cfg(const LinkedObjectFile& file, int seg, Function& func) { +std::shared_ptr build_cfg(const LinkedObjectFile& file, + int seg, + Function& func, + const CondWithElseLengthHack& cond_with_else_hack) { // fmt::print("START {}\n", func.guessed_name.to_string()); auto cfg = std::make_shared(); @@ -2121,7 +2131,7 @@ std::shared_ptr build_cfg(const LinkedObjectFile& file, int se // todo - should we lower the priority of the conds? - changed = changed || cfg->find_cond_w_else(); + changed = changed || cfg->find_cond_w_else(cond_with_else_hack); changed = changed || cfg->find_while_loop_top_level(); changed = changed || cfg->find_seq_top_level(); diff --git a/decompiler/Function/CfgVtx.h b/decompiler/Function/CfgVtx.h index 7567d75fb..8f43ec0fd 100644 --- a/decompiler/Function/CfgVtx.h +++ b/decompiler/Function/CfgVtx.h @@ -317,7 +317,7 @@ class ControlFlowGraph { void link_fall_through(BlockVtx* first, BlockVtx* second, std::vector& blocks); void link_fall_through_likely(BlockVtx* first, BlockVtx* second, std::vector& blocks); void link_branch(BlockVtx* first, BlockVtx* second, std::vector& blocks); - bool find_cond_w_else(); + bool find_cond_w_else(const CondWithElseLengthHack& hacks); bool find_cond_w_empty_else(); bool find_cond_n_else(); @@ -382,6 +382,9 @@ class ControlFlowGraph { class LinkedObjectFile; class Function; -std::shared_ptr build_cfg(const LinkedObjectFile& file, int seg, Function& func); +std::shared_ptr build_cfg(const LinkedObjectFile& file, + int seg, + Function& func, + const CondWithElseLengthHack& cond_with_else_hack); } // namespace decompiler #endif // JAK_DISASSEMBLER_CFGVTX_H diff --git a/decompiler/ObjectFile/ObjectFileDB.cpp b/decompiler/ObjectFile/ObjectFileDB.cpp index 3e41e6e93..8b1d4ea98 100644 --- a/decompiler/ObjectFile/ObjectFileDB.cpp +++ b/decompiler/ObjectFile/ObjectFileDB.cpp @@ -717,7 +717,7 @@ void ObjectFileDB::analyze_functions_ir1(const Config& config) { // run analysis // build a control flow graph, just looking at branch instructions. - func.cfg = build_cfg(data.linked_data, segment_id, func); + func.cfg = build_cfg(data.linked_data, segment_id, func, {}); // convert individual basic blocks to sequences of IR Basic Ops for (auto& block : func.basic_blocks) { diff --git a/decompiler/ObjectFile/ObjectFileDB.h b/decompiler/ObjectFile/ObjectFileDB.h index 9b8e33f79..be55a0375 100644 --- a/decompiler/ObjectFile/ObjectFileDB.h +++ b/decompiler/ObjectFile/ObjectFileDB.h @@ -69,7 +69,7 @@ class ObjectFileDB { void analyze_functions_ir2(const std::string& output_dir, const Config& config); void ir2_top_level_pass(const Config& config); void ir2_stack_spill_slot_pass(); - void ir2_basic_block_pass(); + void ir2_basic_block_pass(const Config& config); void ir2_atomic_op_pass(const Config& config); void ir2_type_analysis_pass(const Config& config); void ir2_register_usage_pass(); @@ -80,8 +80,8 @@ class ObjectFileDB { void ir2_insert_lets(); void ir2_rewrite_inline_asm_instructions(); void ir2_insert_anonymous_functions(); - void ir2_write_results(const std::string& output_dir); - std::string ir2_to_file(ObjectFileData& data); + void ir2_write_results(const std::string& output_dir, const Config& config); + std::string ir2_to_file(ObjectFileData& data, const Config& config); std::string ir2_function_to_string(ObjectFileData& data, Function& function, int seg); std::string ir2_final_out(ObjectFileData& data, const std::unordered_set& skip_functions = {}); diff --git a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp index 39ec8389e..c2ab4b334 100644 --- a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp +++ b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp @@ -34,7 +34,7 @@ void ObjectFileDB::analyze_functions_ir2(const std::string& output_dir, const Co lg::info("Processing top-level functions..."); ir2_top_level_pass(config); lg::info("Processing basic blocks and control flow graph..."); - ir2_basic_block_pass(); + ir2_basic_block_pass(config); lg::info("Finding stack spills..."); ir2_stack_spill_slot_pass(); lg::info("Converting to atomic ops..."); @@ -63,7 +63,7 @@ void ObjectFileDB::analyze_functions_ir2(const std::string& output_dir, const Co if (!output_dir.empty()) { lg::info("Writing results..."); - ir2_write_results(output_dir); + ir2_write_results(output_dir, config); } } @@ -168,7 +168,7 @@ void ObjectFileDB::ir2_top_level_pass(const Config& config) { * - Analyze prologue and epilogue * - Build control flow graph */ -void ObjectFileDB::ir2_basic_block_pass() { +void ObjectFileDB::ir2_basic_block_pass(const Config& config) { Timer timer; // Main Pass over each function... int total_basic_blocks = 0; @@ -206,7 +206,13 @@ void ObjectFileDB::ir2_basic_block_pass() { // run analysis // build a control flow graph, just looking at branch instructions. - func.cfg = build_cfg(data.linked_data, segment_id, func); + CondWithElseLengthHack hack; + auto lookup = + config.hacks.cond_with_else_len_by_func_name.find(func.guessed_name.to_string()); + if (lookup != config.hacks.cond_with_else_len_by_func_name.end()) { + hack = lookup->second; + } + func.cfg = build_cfg(data.linked_data, segment_id, func, hack); if (!func.cfg->is_fully_resolved()) { lg::warn("Function {} from {} failed to build control flow graph!", func.guessed_name.to_string(), data.to_unique_name()); @@ -543,7 +549,7 @@ void ObjectFileDB::ir2_insert_anonymous_functions() { lg::info("Inserted {} anonymous functions in {:.2f} ms\n", total, timer.getMs()); } -void ObjectFileDB::ir2_write_results(const std::string& output_dir) { +void ObjectFileDB::ir2_write_results(const std::string& output_dir, const Config& config) { Timer timer; lg::info("Writing IR2 results to file..."); int total_files = 0; @@ -552,7 +558,7 @@ void ObjectFileDB::ir2_write_results(const std::string& output_dir) { if (obj.linked_data.has_any_functions()) { // todo total_files++; - auto file_text = ir2_to_file(obj); + auto file_text = ir2_to_file(obj, config); total_bytes += file_text.length(); auto file_name = file_util::combine_path(output_dir, obj.to_unique_name() + "_ir2.asm"); file_util::write_text_file(file_name, file_text); @@ -566,7 +572,7 @@ void ObjectFileDB::ir2_write_results(const std::string& output_dir) { timer.getMs()); } -std::string ObjectFileDB::ir2_to_file(ObjectFileData& data) { +std::string ObjectFileDB::ir2_to_file(ObjectFileData& data, const Config& config) { std::string result; const char* segment_names[] = {"main segment", "debug segment", "top-level segment"}; @@ -627,6 +633,12 @@ std::string ObjectFileDB::ir2_to_file(ObjectFileData& data) { } } + // print if it exists, even if it's not okay. + if (config.print_cfgs && func.cfg) { + result += fmt::format("Control Flow Graph:\n{}\n\n", + pretty_print::to_string(func.cfg->to_form())); + } + if (false && func.ir2.print_debug_forms) { result += '\n'; result += ";; DEBUG OUTPUT BELOW THIS LINE:\n"; diff --git a/decompiler/analysis/variable_naming.cpp b/decompiler/analysis/variable_naming.cpp index 225c157c6..128d0234a 100644 --- a/decompiler/analysis/variable_naming.cpp +++ b/decompiler/analysis/variable_naming.cpp @@ -419,7 +419,9 @@ SSA make_rc_ssa(const Function& function, const RegUsageInfo& rui, const Functio if (succ != -1) { for (auto reg : end_op_info.live) { // only update phis for variables that are actually live at the next block. - ssa.add_source_to_phi(succ, reg, current_regs.at(reg)); + if (reg.get_kind() != Reg::VF) { + ssa.add_source_to_phi(succ, reg, current_regs.at(reg)); + } } } } diff --git a/decompiler/config.cpp b/decompiler/config.cpp index 38b475039..07d72b5af 100644 --- a/decompiler/config.cpp +++ b/decompiler/config.cpp @@ -49,6 +49,7 @@ Config read_config_file(const std::string& path_to_config_file) { config.hexdump_code = cfg.at("hexdump_code").get(); config.hexdump_data = cfg.at("hexdump_data").get(); config.dump_objs = cfg.at("dump_objs").get(); + config.print_cfgs = cfg.at("print_cfgs").get(); auto allowed = cfg.at("allowed_objects").get>(); for (const auto& x : allowed) { @@ -142,6 +143,14 @@ Config read_config_file(const std::string& path_to_config_file) { hacks_json.at("no_type_analysis_functions_by_name").get>(); config.hacks.types_with_bad_inspect_methods = hacks_json.at("types_with_bad_inspect_methods").get>(); + + for (auto& entry : hacks_json.at("cond_with_else_max_lengths")) { + auto func_name = entry.at(0).get(); + auto cond_name = entry.at(1).get(); + auto max_len = entry.at(2).get(); + config.hacks.cond_with_else_len_by_func_name[func_name].max_length_by_start_block[cond_name] = + max_len; + } return config; } diff --git a/decompiler/config.h b/decompiler/config.h index ccdf8cacb..04b20c6a6 100644 --- a/decompiler/config.h +++ b/decompiler/config.h @@ -42,12 +42,17 @@ struct StackVariableHint { int stack_offset = 0; // where it's located on the stack (relative to sp after prologue) }; +struct CondWithElseLengthHack { + std::unordered_map max_length_by_start_block; +}; + struct DecompileHacks { std::unordered_set types_with_bad_inspect_methods; std::unordered_set no_type_analysis_functions_by_name; std::unordered_set hint_inline_assembly_functions; std::unordered_set asm_functions_by_name; std::unordered_set pair_functions_by_name; + std::unordered_map cond_with_else_len_by_func_name; }; struct Config { @@ -71,6 +76,7 @@ struct Config { bool hexdump_code = false; bool hexdump_data = false; bool dump_objs = false; + bool print_cfgs = false; std::unordered_set allowed_objects; std::unordered_map>> diff --git a/decompiler/config/jak1_ntsc_black_label.jsonc b/decompiler/config/jak1_ntsc_black_label.jsonc index 553eddd26..2566964e4 100644 --- a/decompiler/config/jak1_ntsc_black_label.jsonc +++ b/decompiler/config/jak1_ntsc_black_label.jsonc @@ -52,6 +52,8 @@ "hexdump_data": false, // dump raw obj files "dump_objs": false, + // print control flow graph + "print_cfgs": false, //////////////////////////// // CONFIG FILES diff --git a/decompiler/config/jak1_ntsc_black_label/hacks.jsonc b/decompiler/config/jak1_ntsc_black_label/hacks.jsonc index 36055ba62..586af0873 100644 --- a/decompiler/config/jak1_ntsc_black_label/hacks.jsonc +++ b/decompiler/config/jak1_ntsc_black_label/hacks.jsonc @@ -12,6 +12,13 @@ "no_type_analysis_functions_by_name": [], + // this limits the number of cases in a cond. The first argument is the name of the function. + // the second argument is the name of the first condition in the cond. Use print_cfg to find it out. + // The third argument is the number of cases. If you set it too small it may fail to build the CFG. + "cond_with_else_max_lengths":[ + ["(method 20 res-lump)", "b0", 2] + ], + // this provides a hint to the decompiler that these functions will have a lot of inline assembly. "hint_inline_assembly_functions": ["matrix-transpose!"], diff --git a/docs/markdown/progress-notes/changelog.md b/docs/markdown/progress-notes/changelog.md index 4f07f25cd..a51872a09 100644 --- a/docs/markdown/progress-notes/changelog.md +++ b/docs/markdown/progress-notes/changelog.md @@ -40,12 +40,10 @@ - The `&+` form now works on `inline-array` and `structure`. - In the case where the type system would use a result type of `lca(none, x)`, the result type is now `none` instead of compiler abort. - The "none value" is now `(none)` instead of `none` - - Creating a field of 128-bit value type no longer causes a compiler crash - 128-bit fields are inspected as `` - Static fields can now contain floating point values - Fixed a bug where loading a float from an object and immediately using it math would cause a compiler crash - - Arrays of value types can be created on the stack with `new`. ## V0.2 @@ -158,4 +156,6 @@ - Support dynamic construction of 128-bit bitfield values ## V0.8 New Calling Convention for 128-bit -- 128-bit values may now be used in function arguments and return values. \ No newline at end of file +- 128-bit values may now be used in function arguments and return values. +- Fixed a bug where reader errors in `goal-lib.gc` or any error in `goos-lib.gs` would cause a crash +- Fixed a bug where `''a` or similar repeated reader macros would generate a reader error. \ No newline at end of file diff --git a/goalc/main.cpp b/goalc/main.cpp index 60432a6c2..3b0cee6e5 100644 --- a/goalc/main.cpp +++ b/goalc/main.cpp @@ -46,19 +46,23 @@ int main(int argc, char** argv) { lg::info("OpenGOAL Compiler {}.{}", versions::GOAL_VERSION_MAJOR, versions::GOAL_VERSION_MINOR); // Init REPL - std::unique_ptr compiler = std::make_unique(); - - if (argument.empty()) { - ReplStatus status = ReplStatus::WANT_RELOAD; - while (status == ReplStatus::WANT_RELOAD) { - compiler = std::make_unique(std::make_unique()); - status = compiler->execute_repl(auto_listen); - if (status == ReplStatus::WANT_RELOAD) { - fmt::print("Reloading compiler...\n"); + // the compiler may throw an exception if it fails to load its standard library. + try { + std::unique_ptr compiler = std::make_unique(); + if (argument.empty()) { + ReplStatus status = ReplStatus::WANT_RELOAD; + while (status == ReplStatus::WANT_RELOAD) { + compiler = std::make_unique(std::make_unique()); + status = compiler->execute_repl(auto_listen); + if (status == ReplStatus::WANT_RELOAD) { + fmt::print("Reloading compiler...\n"); + } } + } else { + compiler->run_front_end_on_string(argument); } - } else { - compiler->run_front_end_on_string(argument); + } catch (std::exception& e) { + fmt::print("Compiler Fatal Error: {}\n", e.what()); } return 0; diff --git a/test/decompiler/FormRegressionTest.cpp b/test/decompiler/FormRegressionTest.cpp index 3b135f6cd..1875914af 100644 --- a/test/decompiler/FormRegressionTest.cpp +++ b/test/decompiler/FormRegressionTest.cpp @@ -149,7 +149,7 @@ std::unique_ptr FormRegressionTest::make_function( // analyze function prologue/epilogue test->func.analyze_prologue(test->file); // build control flow graph - test->func.cfg = build_cfg(test->file, 0, test->func); + test->func.cfg = build_cfg(test->file, 0, test->func, {}); EXPECT_TRUE(test->func.cfg->is_fully_resolved()); if (!test->func.cfg->is_fully_resolved()) { fmt::print("CFG:\n{}\n", test->func.cfg->to_dot()); diff --git a/test/test_goos.cpp b/test/test_goos.cpp index 029ae0e96..4bbd2d547 100644 --- a/test/test_goos.cpp +++ b/test/test_goos.cpp @@ -1255,6 +1255,14 @@ TEST(GoosSpecialForms, Quote) { } } +TEST(GoosSpecialForms, DoubleQuote) { + Interpreter i; + e(i, "(define x ''y)"); + EXPECT_EQ(e(i, "x"), "(quote y)"); + e(i, "(define x `'`y)"); + EXPECT_EQ(e(i, "x"), "(quote (quasiquote y))"); +} + TEST(GoosSpecialForms, QuasiQuote) { Interpreter i; e(i, "(define x 'y)");