d/j3: all-types: guess at associated process for non-virtual states and fix some issues along the way (#3300)

This will make a best effort attempt at guessing which process a
non-virtual state belongs to:
![Screenshot 2024-01-08
195309](https://github.com/open-goal/jak-project/assets/13153231/69132f10-823d-4df5-b2d6-662d4dd754a0)

I also noticed some issues while working on this, mainly around how
virtual states were being output. They were being duplicated, for
example:
![Screenshot 2024-01-08
184733](https://github.com/open-goal/jak-project/assets/13153231/1edb0f1a-3ac7-46cb-96cd-cf93d42fb01f)
or
![Screenshot 2024-01-08
193730](https://github.com/open-goal/jak-project/assets/13153231/45673653-4000-45bb-af00-9baa6e2a70ae)

I think I've fixed that, but @Hat-Kid I defer to you to see if i've done
something terrible.
![Screenshot 2024-01-08
194513](https://github.com/open-goal/jak-project/assets/13153231/75543d2e-69da-4bbd-b143-2f824b9d8dde)
This commit is contained in:
Tyler Wilding 2024-01-09 16:50:05 -05:00 committed by GitHub
parent 9d680a0aba
commit 4cccaf2645
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 176 additions and 115 deletions

View file

@ -218,6 +218,19 @@
"--config-override \"{\\\"decompile_code\\\": false, \\\"levels_extract\\\": true, \\\"allowed_objects\\\": []}\""
]
},
{
"type": "default",
"project": "CMakeLists.txt",
"projectTarget": "decompiler.exe (bin\\decompiler.exe)",
"name": "Decompiler - Jak 3",
"args": [
"${workspaceRoot}/decompiler/config/jak3/jak3_config.jsonc",
"${workspaceRoot}/iso_data",
"${workspaceRoot}/decompiler_out",
"--version",
"ntsc_v1"
]
},
{
"type": "default",
"project": "CMakeLists.txt",

View file

@ -222,13 +222,14 @@ class ObjectFileDB {
void ir2_setup_labels(const Config& config, ObjectFileData& data);
void ir2_run_mips2c(const Config& config, ObjectFileData& data);
struct PerObjectAllTypeInfo {
std::string object_name;
std::unordered_set<std::string> already_seen_symbols;
// type-name : { method id : state name }
std::unordered_map<std::string, std::unordered_map<int, std::string>> state_methods;
// symbol-name : type-name
std::unordered_map<std::string, std::string> symbol_types;
// state-name : type-name
std::unordered_map<std::string, std::string> non_virtual_state_guesses;
struct TypeInfo {
bool from_inspect_method = false; // does this come from an inspect method?

View file

@ -10,6 +10,7 @@
#include "common/log/log.h"
#include "common/util/FileUtil.h"
#include "common/util/Timer.h"
#include "common/util/string_util.h"
#include "decompiler/IR2/Form.h"
#include "decompiler/analysis/analyze_inspect_method.h"
@ -319,7 +320,7 @@ void ObjectFileDB::ir2_top_level_pass(const Config& config) {
void ObjectFileDB::ir2_analyze_all_types(const fs::path& output_file,
const std::optional<std::string>& previous_game_types,
const std::unordered_set<std::string>& bad_types) {
std::vector<PerObjectAllTypeInfo> per_object;
std::unordered_map<std::string, PerObjectAllTypeInfo> per_object;
DecompilerTypeSystem previous_game_ts(GameVersion::Jak2); // version here doesn't matter.
if (previous_game_types) {
@ -328,16 +329,66 @@ void ObjectFileDB::ir2_analyze_all_types(const fs::path& output_file,
TypeInspectorCache ti_cache;
// Do a first pass to initialize all types and symbols
for_each_obj([&](ObjectFileData& data) {
if (data.obj_version == 3 || (data.obj_version == 5 && data.linked_data.has_any_functions())) {
auto& object_result = per_object.emplace_back();
object_result.object_name = data.to_unique_name();
per_object[data.to_unique_name()] = PerObjectAllTypeInfo();
// Go through the top-level segment first to identify the type names associated with each
// symbol def
for_each_function_in_seg_in_obj(TOP_LEVEL_SEGMENT, data, [&](Function& f) {
inspect_top_level_for_metadata(f, data.linked_data, dts, previous_game_ts, object_result);
inspect_top_level_for_metadata(f, data.linked_data, dts, previous_game_ts,
per_object.at(data.to_unique_name()));
});
}
});
// Guess at non-virtual state type's:
//
// Collect all type names, since the DTS doesn't know the actual type tree (all-types is empty!)
// we can't filter by what is actually a process type (with existing code).
std::unordered_map<std::string, std::vector<std::string>> all_type_names;
for (auto& [obj_name, obj_info] : per_object) {
for (const auto& type_name : obj_info.type_names_in_order) {
if (all_type_names.find(obj_name) == all_type_names.end()) {
all_type_names[obj_name] = {};
}
all_type_names[obj_name].push_back(type_name);
}
}
std::unordered_map<std::string, std::string> state_to_type_map;
for (auto& [obj_name, obj_info] : per_object) {
for (const auto& [sym_name, sym_type] : obj_info.symbol_types) {
if (sym_type == "state") {
int longest_match_length = 0;
std::string longest_match = "";
std::string longest_match_object_name = "";
// Make a best effort guess by finding the longest prefix match
for (const auto& [obj_name, type_names] : all_type_names) {
for (const auto& type_name : type_names) {
if (str_util::starts_with(sym_name, type_name) &&
type_name.length() > longest_match_length) {
longest_match_length = type_name.length();
longest_match = type_name;
longest_match_object_name = obj_name;
}
}
}
if (longest_match != "") {
if (per_object.find(longest_match_object_name) != per_object.end()) {
per_object.at(longest_match_object_name).non_virtual_state_guesses[sym_name] =
longest_match;
obj_info.already_seen_symbols.insert(sym_name);
}
}
}
}
}
// Then another to actually setup the definitions
for_each_obj([&](ObjectFileData& data) {
if (data.obj_version == 3 || (data.obj_version == 5 && data.linked_data.has_any_functions())) {
auto& object_result = per_object.at(data.to_unique_name());
// Handle the top level last, which is fine as all symbol_defs are always written after
// typedefs
@ -368,12 +419,14 @@ void ObjectFileDB::ir2_analyze_all_types(const fs::path& output_file,
}
});
// Output result
std::string result;
result += ";; All Types\n\n";
for (auto& obj : per_object) {
for (auto& [obj_name, obj] : per_object) {
result += fmt::format(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n");
result += fmt::format(";; {:30s} ;;\n", obj.object_name);
result += fmt::format(";; {:30s} ;;\n", obj_name);
result += fmt::format(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n\n");
for (const auto& type_name : obj.type_names_in_order) {
auto& info = obj.type_info.at(type_name);

View file

@ -1427,10 +1427,15 @@ std::string inspect_inspect_method(Function& inspect_method,
object_file_meta);
}
std::string old_method_string(const MethodInfo& info) {
std::string old_method_string(const MethodInfo& info, const bool omit_comment = false) {
if (info.type.arg_count() > 0) {
if (info.type.base_type() == "function" || info.type.base_type() == "state") {
std::string result = fmt::format(" ;; ({} (", info.name);
std::string result;
if (omit_comment) {
result = fmt::format(" ({} (", info.name);
} else {
result = fmt::format(" ;; ({} (", info.name);
}
bool add = false;
for (int i = 0; i < (int)info.type.arg_count() - 1; i++) {
result += info.type.get_arg(i).print();
@ -1450,6 +1455,9 @@ std::string old_method_string(const MethodInfo& info) {
}
}
if (omit_comment) {
return fmt::format(" ({} {}) weird method", info.name, info.type.print());
}
return fmt::format(" ;; ({} {}) weird method", info.name, info.type.print());
}
@ -1637,82 +1645,76 @@ std::string TypeInspectorResult::print_as_deftype(
result.append("\n ");
}
std::string state_methods_list;
std::unordered_map<int, std::string> method_states = {};
if (object_file_meta.state_methods.count(type_name) != 0) {
method_states = object_file_meta.state_methods.at(type_name);
for (const auto& [method_id, state_name] : method_states) {
MethodInfo info;
state_methods_list += fmt::format(" {} ;; {}", state_name, method_id);
if (old_game_type && old_game_type->get_my_method(method_id, &info)) {
state_methods_list += ", old:" + old_method_string(info, true);
}
state_methods_list += "\n";
}
}
if (type_method_count > 9) {
std::string methods_list;
std::string state_methods_list;
if (type_method_count > 9) {
MethodInfo old_new_method;
if (old_game_type && old_game_type->get_my_new_method(&old_new_method)) {
methods_list.append(" ");
methods_list.append(old_method_string(old_new_method));
methods_list.push_back('\n');
}
bool done_with_state_methods = false;
for (int i = parent_method_count; i < type_method_count; i++) {
bool print_as_state_method = false;
if (method_states.count(i) != 0) {
if (!done_with_state_methods) {
print_as_state_method = true;
state_methods_list.append(fmt::format(" {}", method_states.at(i)));
} else {
methods_list.append(
fmt::format(" ({} () _type_ :state) ;; {}", method_states.at(i), i));
// If it's a state-method (virtual state) skip it
if (method_states.find(i) != method_states.end()) {
continue;
}
} else {
done_with_state_methods = true;
methods_list.append(fmt::format(" ({}-method-{} () none) ;; {}", type_name, i, i));
}
if (old_game_type) {
MethodInfo info;
if (old_game_type->get_my_method(i, &info)) {
if (print_as_state_method) {
state_methods_list += old_method_string(info);
} else {
methods_list += old_method_string(info);
}
}
}
if (print_as_state_method) {
state_methods_list.push_back('\n');
} else {
methods_list.push_back('\n');
}
}
// non-virtual states
std::string non_virtual_states_list;
for (const auto& [state_name, guessed_type_name] : object_file_meta.non_virtual_state_guesses) {
if (type_name == guessed_type_name) {
std::string line;
line += fmt::format(" {}", state_name);
auto it = previous_game_ts.symbol_types.find(state_name);
if (it != previous_game_ts.symbol_types.end()) {
line += fmt::format(" ;; associated process guessed by decompiler, old: {}",
it->second.print());
}
non_virtual_states_list.append(line + "\n");
}
}
// methods and virtual states
if (!methods_list.empty()) {
result.append("(:methods\n");
result.append(methods_list);
result.append(" )\n ");
}
if (!state_methods_list.empty()) {
result.append("(:state-methods\n");
result.append(state_methods_list);
result.append(" )\n ");
}
if (!methods_list.empty()) {
result.append("(:methods");
result.append(methods_list);
// non-virtual states
if (!non_virtual_states_list.empty()) {
result.append("(:states\n");
result.append(non_virtual_states_list);
result.append(" )\n ");
}
}
// Print out (normal) states if we have em
// - Could probably assume the process name comes first and associate it with the right type
// but that may or may not be risky so, edit the types yourself...
if (method_states.size() > 0) {
result.append("(:states\n ");
for (const auto& [id, name] : method_states) {
result.append(name);
// Append old symbol def if we have it
auto it = previous_game_ts.symbol_types.find(name);
if (it != previous_game_ts.symbol_types.end()) {
result.append(fmt::format(" ;; {}", it->second.print()));
}
// Add symbol name to `already_seen_symbols`
object_file_meta.already_seen_symbols.insert(name);
result.append("\n ");
}
result.append(")\n ");
}
result.append(")\n");
result += "|#\n";
@ -1800,7 +1802,8 @@ void inspect_top_level_for_metadata(Function& top_level,
const auto& aop = top_level.ir2.atomic_ops->ops.at(i);
const std::string as_str = aop.get()->to_string(top_level.ir2.env);
// Keep track of the last seen label so we can easily reference it if a later operation uses it
// Keep track of the last seen label so we can easily reference it if a later operation uses
// it
auto label_match = get_regex_match(as_str, std::regex("\\(set!\\s[^\\s]*\\s(L.*)\\)"));
if (!label_match.empty()) {
last_seen_label = label_match;
@ -1858,7 +1861,8 @@ void inspect_top_level_for_metadata(Function& top_level,
}
// lwu t9, 16(v1) ;; [ 21] (set! t9-0 (l.wu (+ v1-10 16)))
// ;; [v1: <the etype type> ] -> [t9: (function symbol type int type)
// ;; [v1: <the etype type> ] -> [t9: (function symbol type int
// type)
const auto& aop_1 = top_level.ir2.atomic_ops->ops.at(i + 1);
if (!is_set_reg_to_load(aop_1.get(), Register(Reg::GPR, Reg::T9), 16)) {
continue;
@ -1935,8 +1939,9 @@ std::string inspect_top_level_symbol_defines(Function& top_level,
if (as_store && as_store->addr().kind() == SimpleExpression::Kind::IDENTITY &&
as_store->addr().get_arg(0).is_sym_val()) {
auto& sym_name = as_store->addr().get_arg(0).get_str();
if (object_file_meta.already_seen_symbols.find(sym_name) ==
object_file_meta.already_seen_symbols.end()) {
const auto sym_already_seen = object_file_meta.already_seen_symbols.find(sym_name) !=
object_file_meta.already_seen_symbols.end();
if (!sym_already_seen) {
object_file_meta.already_seen_symbols.insert(sym_name);
if (dts.ts.partially_defined_type_exists(sym_name)) {
continue;

View file

@ -169,7 +169,7 @@
(_data uint8 :score -50 :dynamic :offset 16)
)
(:methods
(new (symbol type int) _type_ 0))
(new (symbol type int) _type_))
:method-count-assert 9
:size-assert #x10
:flag-assert #x900000010
@ -296,9 +296,9 @@
(self process-tree :offset-assert 32)
)
(:methods
(new (symbol type string) _type_ 0)
(activate (_type_ process-tree basic pointer) process-tree 9)
(deactivate (_type_) none 10)
(new (symbol type string) _type_)
(activate (_type_ process-tree basic pointer) process-tree)
(deactivate (_type_) none)
(init-from-entity!
"Typically the method that does the initial setup on the process, potentially using the [[entity-actor]] provided as part of that.
This commonly includes things such as:
@ -306,9 +306,9 @@
- collision information
- loading the skeleton group / bones
- sounds"
(_type_ entity-actor) none 11)
(run-logic? (_type_) symbol 12)
(process-tree-method-13 () none 13)
(_type_ entity-actor) none)
(run-logic? (_type_) symbol)
(process-tree-method-13 () none)
)
:size-assert #x24
:method-count-assert 14
@ -361,13 +361,13 @@
:size-assert #x5c
:flag-assert #xf0000005c
(:methods
(new (symbol type int) _type_ 0)
(update-rates! (_type_ float) float 9)
(advance-by! (_type_ float) clock 10)
(tick! (_type_) clock 11)
(save! (_type_ (pointer uint64)) int 12)
(load! (_type_ (pointer uint64)) int 13)
(reset! (_type_) none 14)
(new (symbol type int) _type_)
(update-rates! (_type_ float) float)
(advance-by! (_type_ float) clock)
(tick! (_type_) clock)
(save! (_type_ (pointer uint64)) int)
(load! (_type_ (pointer uint64)) int)
(reset! (_type_) none)
)
)
@ -387,9 +387,9 @@
:size-assert #x28
:flag-assert #xc00000028
(:methods
(stack-size-set! (_type_ int) none 9)
(thread-suspend (_type_) none 10)
(thread-resume (_type_) none 11)
(stack-size-set! (_type_ int) none)
(thread-suspend (_type_) none)
(thread-resume (_type_) none)
)
)
@ -408,7 +408,7 @@
:size-assert #x80
:flag-assert #xc00000080
(:methods
(new (symbol type process symbol int pointer) _type_ 0)
(new (symbol type process symbol int pointer) _type_)
)
)
@ -421,9 +421,9 @@
:flag-assert #x1000000024
;; Failed to read fields.
(:methods
(new (symbol type int int string) _type_ 0)
(get-process (_type_ type int) process 14)
(return-process (_type_ process) none 15)
(new (symbol type int int string) _type_)
(get-process (_type_ type int) process)
(return-process (_type_ process) none)
)
)
@ -459,19 +459,19 @@
:flag-assert #x1c00000068
;; Failed to read fields.
(:methods
(new (symbol type string int int) _type_ 0)
(init (_type_ symbol int) none 16)
(compact (dead-pool-heap int) none 17)
(shrink-heap (dead-pool-heap process) dead-pool-heap 18)
(churn (dead-pool-heap int) none 19)
(memory-used (_type_) int 20)
(memory-total (_type_) int 21)
(memory-free (dead-pool-heap) int 22)
(compact-time (dead-pool-heap) uint 23)
(gap-size (dead-pool-heap dead-pool-heap-rec) int 24)
(gap-location (dead-pool-heap dead-pool-heap-rec) pointer 25)
(find-gap (dead-pool-heap dead-pool-heap-rec) dead-pool-heap-rec 26)
(find-gap-by-size (dead-pool-heap int) dead-pool-heap-rec 27)
(new (symbol type string int int) _type_)
(init (_type_ symbol int) none)
(compact (dead-pool-heap int) none)
(shrink-heap (dead-pool-heap process) dead-pool-heap)
(churn (dead-pool-heap int) none)
(memory-used (_type_) int)
(memory-total (_type_) int)
(memory-free (dead-pool-heap) int)
(compact-time (dead-pool-heap) uint)
(gap-size (dead-pool-heap dead-pool-heap-rec) int)
(gap-location (dead-pool-heap dead-pool-heap-rec) pointer)
(find-gap (dead-pool-heap dead-pool-heap-rec) dead-pool-heap-rec)
(find-gap-by-size (dead-pool-heap int) dead-pool-heap-rec)
)
)
@ -499,7 +499,7 @@
:size-assert #xb0
:flag-assert #x9000000b0
(:methods
(new (symbol type symbol function (pointer uint64)) object 0)
(new (symbol type symbol function (pointer uint64)) object)
)
)
@ -535,7 +535,7 @@
(function none)
function
(function none)
(function process int symbol event-message-block object)) _type_ 0)
(function process int symbol event-message-block object)) _type_)
)
:method-count-assert 9
:size-assert #x24
@ -566,7 +566,7 @@
:size-assert #x10
:flag-assert #xa00000010
(:methods
(send-all! (_type_) none 9)
(send-all! (_type_) none)
)
)
@ -594,7 +594,7 @@
(stack uint8 :dynamic :offset-assert 128 :score -1)
)
(:methods
(new (symbol type string int) _type_ 0)
(new (symbol type string int) _type_)
)
(:states
dead-state
@ -604,17 +604,6 @@
:no-runtime-type ;; already defined by kscheme. Don't do it again.
)
(deftype sql-result (basic)
((len int32 :offset-assert 4)
(allocated-length uint32 :offset-assert 8)
(error symbol :offset-assert 12)
(data string :dynamic :offset-assert 16) ;; are these actually symbols, or are they strings (in the GOAL code they are treated as strings atleast)
)
(:methods (new (symbol type uint) _type_ 0))
:method-count-assert 9
:size-assert #x10
)
(define-extern *sql-result* sql-result)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;