From 4cccaf26453fc79ba1a8ed5b29d518b7a28928d2 Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Tue, 9 Jan 2024 16:50:05 -0500 Subject: [PATCH] d/j3: all-types: guess at associated process for non-virtual states and fix some issues along the way (#3300) This will make a best effort attempt at guessing which process a non-virtual state belongs to: ![Screenshot 2024-01-08 195309](https://github.com/open-goal/jak-project/assets/13153231/69132f10-823d-4df5-b2d6-662d4dd754a0) I also noticed some issues while working on this, mainly around how virtual states were being output. They were being duplicated, for example: ![Screenshot 2024-01-08 184733](https://github.com/open-goal/jak-project/assets/13153231/1edb0f1a-3ac7-46cb-96cd-cf93d42fb01f) or ![Screenshot 2024-01-08 193730](https://github.com/open-goal/jak-project/assets/13153231/45673653-4000-45bb-af00-9baa6e2a70ae) I think I've fixed that, but @Hat-Kid I defer to you to see if i've done something terrible. ![Screenshot 2024-01-08 194513](https://github.com/open-goal/jak-project/assets/13153231/75543d2e-69da-4bbd-b143-2f824b9d8dde) --- .vs/launch.vs.json | 13 ++ decompiler/ObjectFile/ObjectFileDB.h | 3 +- decompiler/ObjectFile/ObjectFileDB_IR2.cpp | 67 ++++++++-- .../analysis/analyze_inspect_method.cpp | 117 +++++++++--------- decompiler/config/jak3/all-types.gc | 89 ++++++------- decompiler/config/jak3/jak3_config.jsonc | 2 +- 6 files changed, 176 insertions(+), 115 deletions(-) diff --git a/.vs/launch.vs.json b/.vs/launch.vs.json index 4c3237e88..9c074ac9a 100644 --- a/.vs/launch.vs.json +++ b/.vs/launch.vs.json @@ -218,6 +218,19 @@ "--config-override \"{\\\"decompile_code\\\": false, \\\"levels_extract\\\": true, \\\"allowed_objects\\\": []}\"" ] }, + { + "type": "default", + "project": "CMakeLists.txt", + "projectTarget": "decompiler.exe (bin\\decompiler.exe)", + "name": "Decompiler - Jak 3", + "args": [ + "${workspaceRoot}/decompiler/config/jak3/jak3_config.jsonc", + "${workspaceRoot}/iso_data", + "${workspaceRoot}/decompiler_out", + "--version", + "ntsc_v1" + ] + }, { "type": "default", "project": "CMakeLists.txt", diff --git a/decompiler/ObjectFile/ObjectFileDB.h b/decompiler/ObjectFile/ObjectFileDB.h index 624263b84..9b02b65db 100644 --- a/decompiler/ObjectFile/ObjectFileDB.h +++ b/decompiler/ObjectFile/ObjectFileDB.h @@ -222,13 +222,14 @@ class ObjectFileDB { void ir2_setup_labels(const Config& config, ObjectFileData& data); void ir2_run_mips2c(const Config& config, ObjectFileData& data); struct PerObjectAllTypeInfo { - std::string object_name; std::unordered_set already_seen_symbols; // type-name : { method id : state name } std::unordered_map> state_methods; // symbol-name : type-name std::unordered_map symbol_types; + // state-name : type-name + std::unordered_map non_virtual_state_guesses; struct TypeInfo { bool from_inspect_method = false; // does this come from an inspect method? diff --git a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp index f5447c718..0637bc41c 100644 --- a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp +++ b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp @@ -10,6 +10,7 @@ #include "common/log/log.h" #include "common/util/FileUtil.h" #include "common/util/Timer.h" +#include "common/util/string_util.h" #include "decompiler/IR2/Form.h" #include "decompiler/analysis/analyze_inspect_method.h" @@ -319,7 +320,7 @@ void ObjectFileDB::ir2_top_level_pass(const Config& config) { void ObjectFileDB::ir2_analyze_all_types(const fs::path& output_file, const std::optional& previous_game_types, const std::unordered_set& bad_types) { - std::vector per_object; + std::unordered_map per_object; DecompilerTypeSystem previous_game_ts(GameVersion::Jak2); // version here doesn't matter. if (previous_game_types) { @@ -328,16 +329,66 @@ void ObjectFileDB::ir2_analyze_all_types(const fs::path& output_file, TypeInspectorCache ti_cache; + // Do a first pass to initialize all types and symbols for_each_obj([&](ObjectFileData& data) { if (data.obj_version == 3 || (data.obj_version == 5 && data.linked_data.has_any_functions())) { - auto& object_result = per_object.emplace_back(); - object_result.object_name = data.to_unique_name(); - + per_object[data.to_unique_name()] = PerObjectAllTypeInfo(); // Go through the top-level segment first to identify the type names associated with each // symbol def for_each_function_in_seg_in_obj(TOP_LEVEL_SEGMENT, data, [&](Function& f) { - inspect_top_level_for_metadata(f, data.linked_data, dts, previous_game_ts, object_result); + inspect_top_level_for_metadata(f, data.linked_data, dts, previous_game_ts, + per_object.at(data.to_unique_name())); }); + } + }); + + // Guess at non-virtual state type's: + // + // Collect all type names, since the DTS doesn't know the actual type tree (all-types is empty!) + // we can't filter by what is actually a process type (with existing code). + std::unordered_map> all_type_names; + for (auto& [obj_name, obj_info] : per_object) { + for (const auto& type_name : obj_info.type_names_in_order) { + if (all_type_names.find(obj_name) == all_type_names.end()) { + all_type_names[obj_name] = {}; + } + all_type_names[obj_name].push_back(type_name); + } + } + + std::unordered_map state_to_type_map; + for (auto& [obj_name, obj_info] : per_object) { + for (const auto& [sym_name, sym_type] : obj_info.symbol_types) { + if (sym_type == "state") { + int longest_match_length = 0; + std::string longest_match = ""; + std::string longest_match_object_name = ""; + // Make a best effort guess by finding the longest prefix match + for (const auto& [obj_name, type_names] : all_type_names) { + for (const auto& type_name : type_names) { + if (str_util::starts_with(sym_name, type_name) && + type_name.length() > longest_match_length) { + longest_match_length = type_name.length(); + longest_match = type_name; + longest_match_object_name = obj_name; + } + } + } + if (longest_match != "") { + if (per_object.find(longest_match_object_name) != per_object.end()) { + per_object.at(longest_match_object_name).non_virtual_state_guesses[sym_name] = + longest_match; + obj_info.already_seen_symbols.insert(sym_name); + } + } + } + } + } + + // Then another to actually setup the definitions + for_each_obj([&](ObjectFileData& data) { + if (data.obj_version == 3 || (data.obj_version == 5 && data.linked_data.has_any_functions())) { + auto& object_result = per_object.at(data.to_unique_name()); // Handle the top level last, which is fine as all symbol_defs are always written after // typedefs @@ -368,12 +419,14 @@ void ObjectFileDB::ir2_analyze_all_types(const fs::path& output_file, } }); + // Output result + std::string result; result += ";; All Types\n\n"; - for (auto& obj : per_object) { + for (auto& [obj_name, obj] : per_object) { result += fmt::format(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"); - result += fmt::format(";; {:30s} ;;\n", obj.object_name); + result += fmt::format(";; {:30s} ;;\n", obj_name); result += fmt::format(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n\n"); for (const auto& type_name : obj.type_names_in_order) { auto& info = obj.type_info.at(type_name); diff --git a/decompiler/analysis/analyze_inspect_method.cpp b/decompiler/analysis/analyze_inspect_method.cpp index cee0cb2c2..1081c7730 100644 --- a/decompiler/analysis/analyze_inspect_method.cpp +++ b/decompiler/analysis/analyze_inspect_method.cpp @@ -1427,10 +1427,15 @@ std::string inspect_inspect_method(Function& inspect_method, object_file_meta); } -std::string old_method_string(const MethodInfo& info) { +std::string old_method_string(const MethodInfo& info, const bool omit_comment = false) { if (info.type.arg_count() > 0) { if (info.type.base_type() == "function" || info.type.base_type() == "state") { - std::string result = fmt::format(" ;; ({} (", info.name); + std::string result; + if (omit_comment) { + result = fmt::format(" ({} (", info.name); + } else { + result = fmt::format(" ;; ({} (", info.name); + } bool add = false; for (int i = 0; i < (int)info.type.arg_count() - 1; i++) { result += info.type.get_arg(i).print(); @@ -1450,6 +1455,9 @@ std::string old_method_string(const MethodInfo& info) { } } + if (omit_comment) { + return fmt::format(" ({} {}) weird method", info.name, info.type.print()); + } return fmt::format(" ;; ({} {}) weird method", info.name, info.type.print()); } @@ -1637,81 +1645,75 @@ std::string TypeInspectorResult::print_as_deftype( result.append("\n "); } + std::string state_methods_list; std::unordered_map method_states = {}; if (object_file_meta.state_methods.count(type_name) != 0) { method_states = object_file_meta.state_methods.at(type_name); + for (const auto& [method_id, state_name] : method_states) { + MethodInfo info; + state_methods_list += fmt::format(" {} ;; {}", state_name, method_id); + if (old_game_type && old_game_type->get_my_method(method_id, &info)) { + state_methods_list += ", old:" + old_method_string(info, true); + } + state_methods_list += "\n"; + } } + std::string methods_list; if (type_method_count > 9) { - std::string methods_list; - std::string state_methods_list; - MethodInfo old_new_method; if (old_game_type && old_game_type->get_my_new_method(&old_new_method)) { methods_list.append(" "); methods_list.append(old_method_string(old_new_method)); methods_list.push_back('\n'); } - bool done_with_state_methods = false; for (int i = parent_method_count; i < type_method_count; i++) { - bool print_as_state_method = false; - if (method_states.count(i) != 0) { - if (!done_with_state_methods) { - print_as_state_method = true; - state_methods_list.append(fmt::format(" {}", method_states.at(i))); - } else { - methods_list.append( - fmt::format(" ({} () _type_ :state) ;; {}", method_states.at(i), i)); - } - } else { - done_with_state_methods = true; - methods_list.append(fmt::format(" ({}-method-{} () none) ;; {}", type_name, i, i)); + // If it's a state-method (virtual state) skip it + if (method_states.find(i) != method_states.end()) { + continue; } + + methods_list.append(fmt::format(" ({}-method-{} () none) ;; {}", type_name, i, i)); if (old_game_type) { MethodInfo info; if (old_game_type->get_my_method(i, &info)) { - if (print_as_state_method) { - state_methods_list += old_method_string(info); - } else { - methods_list += old_method_string(info); - } + methods_list += old_method_string(info); } } - if (print_as_state_method) { - state_methods_list.push_back('\n'); - } else { - methods_list.push_back('\n'); - } - } - if (!state_methods_list.empty()) { - result.append("(:state-methods\n"); - result.append(state_methods_list); - result.append(" )\n "); - } - if (!methods_list.empty()) { - result.append("(:methods"); - result.append(methods_list); - result.append(" )\n "); + methods_list.push_back('\n'); } } - // Print out (normal) states if we have em - // - Could probably assume the process name comes first and associate it with the right type - // but that may or may not be risky so, edit the types yourself... - if (method_states.size() > 0) { - result.append("(:states\n "); - for (const auto& [id, name] : method_states) { - result.append(name); - // Append old symbol def if we have it - auto it = previous_game_ts.symbol_types.find(name); + // non-virtual states + std::string non_virtual_states_list; + for (const auto& [state_name, guessed_type_name] : object_file_meta.non_virtual_state_guesses) { + if (type_name == guessed_type_name) { + std::string line; + line += fmt::format(" {}", state_name); + auto it = previous_game_ts.symbol_types.find(state_name); if (it != previous_game_ts.symbol_types.end()) { - result.append(fmt::format(" ;; {}", it->second.print())); + line += fmt::format(" ;; associated process guessed by decompiler, old: {}", + it->second.print()); } - // Add symbol name to `already_seen_symbols` - object_file_meta.already_seen_symbols.insert(name); - result.append("\n "); + non_virtual_states_list.append(line + "\n"); } - result.append(")\n "); + } + // methods and virtual states + if (!methods_list.empty()) { + result.append("(:methods\n"); + result.append(methods_list); + result.append(" )\n "); + } + if (!state_methods_list.empty()) { + result.append("(:state-methods\n"); + result.append(state_methods_list); + result.append(" )\n "); + } + // non-virtual states + if (!non_virtual_states_list.empty()) { + result.append("(:states\n"); + result.append(non_virtual_states_list); + result.append(" )\n "); } result.append(")\n"); @@ -1800,7 +1802,8 @@ void inspect_top_level_for_metadata(Function& top_level, const auto& aop = top_level.ir2.atomic_ops->ops.at(i); const std::string as_str = aop.get()->to_string(top_level.ir2.env); - // Keep track of the last seen label so we can easily reference it if a later operation uses it + // Keep track of the last seen label so we can easily reference it if a later operation uses + // it auto label_match = get_regex_match(as_str, std::regex("\\(set!\\s[^\\s]*\\s(L.*)\\)")); if (!label_match.empty()) { last_seen_label = label_match; @@ -1858,7 +1861,8 @@ void inspect_top_level_for_metadata(Function& top_level, } // lwu t9, 16(v1) ;; [ 21] (set! t9-0 (l.wu (+ v1-10 16))) - // ;; [v1: ] -> [t9: (function symbol type int type) + // ;; [v1: ] -> [t9: (function symbol type int + // type) const auto& aop_1 = top_level.ir2.atomic_ops->ops.at(i + 1); if (!is_set_reg_to_load(aop_1.get(), Register(Reg::GPR, Reg::T9), 16)) { continue; @@ -1935,8 +1939,9 @@ std::string inspect_top_level_symbol_defines(Function& top_level, if (as_store && as_store->addr().kind() == SimpleExpression::Kind::IDENTITY && as_store->addr().get_arg(0).is_sym_val()) { auto& sym_name = as_store->addr().get_arg(0).get_str(); - if (object_file_meta.already_seen_symbols.find(sym_name) == - object_file_meta.already_seen_symbols.end()) { + const auto sym_already_seen = object_file_meta.already_seen_symbols.find(sym_name) != + object_file_meta.already_seen_symbols.end(); + if (!sym_already_seen) { object_file_meta.already_seen_symbols.insert(sym_name); if (dts.ts.partially_defined_type_exists(sym_name)) { continue; diff --git a/decompiler/config/jak3/all-types.gc b/decompiler/config/jak3/all-types.gc index 900ff930d..88e560113 100644 --- a/decompiler/config/jak3/all-types.gc +++ b/decompiler/config/jak3/all-types.gc @@ -169,7 +169,7 @@ (_data uint8 :score -50 :dynamic :offset 16) ) (:methods - (new (symbol type int) _type_ 0)) + (new (symbol type int) _type_)) :method-count-assert 9 :size-assert #x10 :flag-assert #x900000010 @@ -296,9 +296,9 @@ (self process-tree :offset-assert 32) ) (:methods - (new (symbol type string) _type_ 0) - (activate (_type_ process-tree basic pointer) process-tree 9) - (deactivate (_type_) none 10) + (new (symbol type string) _type_) + (activate (_type_ process-tree basic pointer) process-tree) + (deactivate (_type_) none) (init-from-entity! "Typically the method that does the initial setup on the process, potentially using the [[entity-actor]] provided as part of that. This commonly includes things such as: @@ -306,9 +306,9 @@ - collision information - loading the skeleton group / bones - sounds" - (_type_ entity-actor) none 11) - (run-logic? (_type_) symbol 12) - (process-tree-method-13 () none 13) + (_type_ entity-actor) none) + (run-logic? (_type_) symbol) + (process-tree-method-13 () none) ) :size-assert #x24 :method-count-assert 14 @@ -361,13 +361,13 @@ :size-assert #x5c :flag-assert #xf0000005c (:methods - (new (symbol type int) _type_ 0) - (update-rates! (_type_ float) float 9) - (advance-by! (_type_ float) clock 10) - (tick! (_type_) clock 11) - (save! (_type_ (pointer uint64)) int 12) - (load! (_type_ (pointer uint64)) int 13) - (reset! (_type_) none 14) + (new (symbol type int) _type_) + (update-rates! (_type_ float) float) + (advance-by! (_type_ float) clock) + (tick! (_type_) clock) + (save! (_type_ (pointer uint64)) int) + (load! (_type_ (pointer uint64)) int) + (reset! (_type_) none) ) ) @@ -387,9 +387,9 @@ :size-assert #x28 :flag-assert #xc00000028 (:methods - (stack-size-set! (_type_ int) none 9) - (thread-suspend (_type_) none 10) - (thread-resume (_type_) none 11) + (stack-size-set! (_type_ int) none) + (thread-suspend (_type_) none) + (thread-resume (_type_) none) ) ) @@ -408,7 +408,7 @@ :size-assert #x80 :flag-assert #xc00000080 (:methods - (new (symbol type process symbol int pointer) _type_ 0) + (new (symbol type process symbol int pointer) _type_) ) ) @@ -421,9 +421,9 @@ :flag-assert #x1000000024 ;; Failed to read fields. (:methods - (new (symbol type int int string) _type_ 0) - (get-process (_type_ type int) process 14) - (return-process (_type_ process) none 15) + (new (symbol type int int string) _type_) + (get-process (_type_ type int) process) + (return-process (_type_ process) none) ) ) @@ -459,19 +459,19 @@ :flag-assert #x1c00000068 ;; Failed to read fields. (:methods - (new (symbol type string int int) _type_ 0) - (init (_type_ symbol int) none 16) - (compact (dead-pool-heap int) none 17) - (shrink-heap (dead-pool-heap process) dead-pool-heap 18) - (churn (dead-pool-heap int) none 19) - (memory-used (_type_) int 20) - (memory-total (_type_) int 21) - (memory-free (dead-pool-heap) int 22) - (compact-time (dead-pool-heap) uint 23) - (gap-size (dead-pool-heap dead-pool-heap-rec) int 24) - (gap-location (dead-pool-heap dead-pool-heap-rec) pointer 25) - (find-gap (dead-pool-heap dead-pool-heap-rec) dead-pool-heap-rec 26) - (find-gap-by-size (dead-pool-heap int) dead-pool-heap-rec 27) + (new (symbol type string int int) _type_) + (init (_type_ symbol int) none) + (compact (dead-pool-heap int) none) + (shrink-heap (dead-pool-heap process) dead-pool-heap) + (churn (dead-pool-heap int) none) + (memory-used (_type_) int) + (memory-total (_type_) int) + (memory-free (dead-pool-heap) int) + (compact-time (dead-pool-heap) uint) + (gap-size (dead-pool-heap dead-pool-heap-rec) int) + (gap-location (dead-pool-heap dead-pool-heap-rec) pointer) + (find-gap (dead-pool-heap dead-pool-heap-rec) dead-pool-heap-rec) + (find-gap-by-size (dead-pool-heap int) dead-pool-heap-rec) ) ) @@ -499,7 +499,7 @@ :size-assert #xb0 :flag-assert #x9000000b0 (:methods - (new (symbol type symbol function (pointer uint64)) object 0) + (new (symbol type symbol function (pointer uint64)) object) ) ) @@ -535,7 +535,7 @@ (function none) function (function none) - (function process int symbol event-message-block object)) _type_ 0) + (function process int symbol event-message-block object)) _type_) ) :method-count-assert 9 :size-assert #x24 @@ -566,7 +566,7 @@ :size-assert #x10 :flag-assert #xa00000010 (:methods - (send-all! (_type_) none 9) + (send-all! (_type_) none) ) ) @@ -594,7 +594,7 @@ (stack uint8 :dynamic :offset-assert 128 :score -1) ) (:methods - (new (symbol type string int) _type_ 0) + (new (symbol type string int) _type_) ) (:states dead-state @@ -604,17 +604,6 @@ :no-runtime-type ;; already defined by kscheme. Don't do it again. ) -(deftype sql-result (basic) - ((len int32 :offset-assert 4) - (allocated-length uint32 :offset-assert 8) - (error symbol :offset-assert 12) - (data string :dynamic :offset-assert 16) ;; are these actually symbols, or are they strings (in the GOAL code they are treated as strings atleast) - ) - (:methods (new (symbol type uint) _type_ 0)) - :method-count-assert 9 - :size-assert #x10 - ) - (define-extern *sql-result* sql-result) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -693,4 +682,4 @@ (define-extern *mid-pool* process-tree) (define-extern *pusher-pool* process-tree) (define-extern *bg-pool* process-tree) -(define-extern *default-pool* process-tree) ;; process-tree \ No newline at end of file +(define-extern *default-pool* process-tree) ;; process-tree diff --git a/decompiler/config/jak3/jak3_config.jsonc b/decompiler/config/jak3/jak3_config.jsonc index 3b9a05e2d..dd417762c 100644 --- a/decompiler/config/jak3/jak3_config.jsonc +++ b/decompiler/config/jak3/jak3_config.jsonc @@ -80,7 +80,7 @@ //////////////////////////// // CONFIG FILES //////////////////////////// - + "type_casts_file": "decompiler/config/jak3/ntsc_v1/type_casts.jsonc", "anonymous_function_types_file": "decompiler/config/jak3/ntsc_v1/anonymous_function_types.jsonc", "var_names_file": "decompiler/config/jak3/ntsc_v1/var_names.jsonc",