jak-project/decompiler/util/DecompilerTypeSystem.cpp

581 lines
20 KiB
C++
Raw Normal View History

#include "DecompilerTypeSystem.h"
#include "TP_Type.h"
#include "common/goos/Printer.h"
#include "common/goos/Reader.h"
#include "common/log/log.h"
#include "common/type_system/defenum.h"
#include "common/type_system/deftype.h"
#include "common/util/string_util.h"
2020-11-27 16:38:36 -05:00
#include "decompiler/Disasm/Register.h"
namespace decompiler {
DecompilerTypeSystem::DecompilerTypeSystem(GameVersion version) : m_version(version) {
ts.add_builtin_types(version);
}
namespace {
// some utilities for parsing the type def file
goos::Object& car(const goos::Object& pair) {
if (pair.is_pair()) {
return pair.as_pair()->car;
} else {
throw std::runtime_error("car called on something that was not a pair: " + pair.print());
}
}
goos::Object& cdr(const goos::Object& pair) {
if (pair.is_pair()) {
return pair.as_pair()->cdr;
} else {
throw std::runtime_error("cdr called on something that was not a pair");
}
}
template <typename T>
void for_each_in_list(goos::Object& list, T f) {
goos::Object* iter = &list;
while (iter->is_pair()) {
f(car(*iter));
iter = &cdr(*iter);
}
if (!iter->is_empty_list()) {
throw std::runtime_error("malformed list");
}
}
} // namespace
void DecompilerTypeSystem::parse_type_defs(const std::vector<std::string>& file_path) {
auto read = m_reader.read_from_file(file_path);
auto& data = cdr(read);
for_each_in_list(data, [&](goos::Object& o) {
try {
if (car(o).as_symbol() == "define-extern") {
auto symbol_metadata = DefinitionMetadata();
auto* rest = &cdr(o);
auto sym_name = car(*rest);
rest = &cdr(*rest);
// check for docstring
if (rest->is_pair() && car(*rest).is_string()) {
symbol_metadata.docstring = str_util::trim_newline_indents(car(*rest).as_string()->data);
rest = &cdr(*rest);
}
auto sym_type = car(*rest);
if (!cdr(*rest).is_empty_list()) {
throw std::runtime_error("malformed define-extern");
}
symbol_metadata.definition_info = m_reader.db.get_short_info_for(o);
add_symbol(sym_name.as_symbol().name_ptr, parse_typespec(&ts, sym_type), symbol_metadata);
} else if (car(o).as_symbol() == "def-event-handler") {
auto symbol_metadata = DefinitionMetadata();
auto* rest = &cdr(o);
auto sym_name = car(*rest);
rest = &cdr(*rest);
// check for docstring
if (rest->is_pair() && car(*rest).is_string()) {
symbol_metadata.docstring = str_util::trim_newline_indents(car(*rest).as_string()->data);
rest = &cdr(*rest);
}
if (!cdr(*rest).is_empty_list()) {
throw std::runtime_error("malformed def-event-handler");
}
auto behavior_tag = std::string(car(*rest).as_symbol().name_ptr);
std::vector<std::string> signature = {
"function", "process", "int", "symbol", "event-message-block",
"object", ":behavior", behavior_tag};
auto sym_type = pretty_print::build_list(signature);
symbol_metadata.definition_info = m_reader.db.get_short_info_for(o);
add_symbol(sym_name.as_symbol().name_ptr, parse_typespec(&ts, sym_type), symbol_metadata);
} else if (car(o).as_symbol() == "deftype") {
auto dtr = parse_deftype(cdr(o), &ts);
dtr.type_info->m_metadata.definition_info = m_reader.db.get_short_info_for(o);
if (dtr.create_runtime_type) {
add_symbol(dtr.type.base_type(), "type", dtr.type_info->m_metadata);
}
// declare the type's states globally
for (auto& state : dtr.type_info->get_states_declared_for_type()) {
// TODO - get definition info for the state definitions specifically
add_symbol(state.first, state.second, dtr.type_info->m_metadata);
}
docs: add support for `:override-doc` in method declarations as well as documenting state handlers (#2139) Adding support for better child-type method docstrings. This is a problem unique to methods. Usually, a child-type will have the same signature and a common name will apply, but the implementation is different. This means, you probably want a different docstring to describe what is happening. Currently this is possible to do via `:replace`. The problem with replace is two fold: - a replaced method ends up in the generated `deftype`...because you usually change the signature! - we don't put docstrings in the `deftype` in normal GOAL, this is just something we do for the `all-types` file (they go in the `defmethod` instead) - more importantly, this means anytime you now want to change the parent's name/args/return type -- you have to apply that change everywhere. So this is a better design you can now just declare the method like so: ```clj (:override-doc "my new docstring" <method_id>) ``` And internally a pseudo-replaced method will be added, but it will inherit everything from the parent (except the docstring of course) Unrelated - I also made all the keyword args for declaring methods not depend on ordering This also adds support for documenting virtual and non-virtual state handlers. For example: ```clj (:states (part-tester-idle (:event "test") symbol)) ``` or ```clj (idle () _type_ :state (:event "test") 20) ``` I will probably add the ability to give some sort of over-view docstring at a later date. Co-authored-by: water <awaterford111445@gmail.com>
2023-01-21 20:45:45 -05:00
// add state documentation to the DTS
virtual_state_metadata.emplace(dtr.type.base_type(),
dtr.type_info->m_virtual_state_definition_meta);
for (const auto& [state_name, meta] : dtr.type_info->m_state_definition_meta) {
state_metadata.emplace(state_name, meta);
}
} else if (car(o).as_symbol() == "declare-type") {
auto* rest = &cdr(o);
auto type_name = car(*rest);
rest = &cdr(*rest);
auto type_kind = car(*rest);
if (!cdr(*rest).is_empty_list()) {
throw std::runtime_error("malformed declare-type");
}
ts.forward_declare_type_as(type_name.as_symbol().name_ptr, type_kind.as_symbol().name_ptr);
} else if (car(o).as_symbol() == "defenum") {
auto symbol_metadata = DefinitionMetadata();
parse_defenum(cdr(o), &ts, &symbol_metadata);
symbol_metadata.definition_info = m_reader.db.get_short_info_for(o);
auto* rest = &cdr(o);
const auto& enum_name = car(*rest).as_symbol();
symbol_metadata_map[enum_name.name_ptr] = symbol_metadata;
// so far, enums are never runtime types so there's no symbol for them.
2020-10-25 12:07:10 -04:00
} else {
throw std::runtime_error("Decompiler cannot parse " + car(o).print());
2020-10-25 12:07:10 -04:00
}
} catch (std::exception& e) {
auto info = m_reader.db.get_info_for(o);
lg::error("{} when parsing decompiler type file:{}", e.what(), info);
throw;
}
});
}
void DecompilerTypeSystem::parse_enum_defs(const std::vector<std::string>& file_path) {
auto read = m_reader.read_from_file(file_path);
auto& data = cdr(read);
for_each_in_list(data, [&](goos::Object& o) {
try {
if (car(o).as_symbol() == "defenum") {
auto symbol_metadata = DefinitionMetadata();
parse_defenum(cdr(o), &ts, &symbol_metadata);
symbol_metadata.definition_info = m_reader.db.get_short_info_for(o);
auto* rest = &cdr(o);
const auto& enum_name = car(*rest).as_symbol();
symbol_metadata_map[enum_name.name_ptr] = symbol_metadata;
// so far, enums are never runtime types so there's no symbol for them.
}
} catch (std::exception& e) {
auto info = m_reader.db.get_info_for(o);
lg::error("{} when parsing decompiler type file:{}", e.what(), info);
`deftype` and `defmethod` syntax major changes (#3094) Major change to how `deftype` shows up in our code: - the decompiler will no longer emit the `offset-assert`, `method-count-assert`, `size-assert` and `flag-assert` parameters. There are extremely few cases where having this in the decompiled code is helpful, as the types there come from `all-types` which already has those parameters. This also doesn't break type consistency because: - the asserts aren't compared. - the first step of the test uses `all-types`, which has the asserts, which will throw an error if they're bad. - the decompiler won't emit the `heap-base` parameter unless necessary now. - the decompiler will try its hardest to turn a fixed-offset field into an `overlay-at` field. It falls back to the old offset if all else fails. - `overlay-at` now supports field "dereferencing" to specify the offset that's within a field that's a structure, e.g.: ```lisp (deftype foobar (structure) ((vec vector :inline) (flags int32 :overlay-at (-> vec w)) ) ) ``` in this structure, the offset of `flags` will be 12 because that is the final offset of `vec`'s `w` field within this structure. - **removed ID from all method declarations.** IDs are only ever automatically assigned now. Fixes #3068. - added an `:overlay` parameter to method declarations, in order to declare a new method that goes on top of a previously-defined method. Syntax is `:overlay <method-name>`. Please do not ever use this. - added `state-methods` list parameter. This lets you quickly specify a list of states to be put in the method table. Same syntax as the `states` list parameter. The decompiler will try to put as many states in this as it can without messing with the method ID order. Also changes `defmethod` to make the first type definition (before the arguments) optional. The type can now be inferred from the first argument. Fixes #3093. --------- Co-authored-by: Hat Kid <6624576+Hat-Kid@users.noreply.github.com>
2023-10-29 23:20:02 -04:00
throw;
}
});
}
TypeSpec DecompilerTypeSystem::parse_type_spec(const std::string& str) const {
auto read = m_reader.read_from_string(str);
auto data = cdr(read);
return parse_typespec(&ts, car(data));
}
std::string DecompilerTypeSystem::dump_symbol_types() {
ASSERT(symbol_add_order.size() == symbols.size());
std::string result;
for (auto& symbol_name : symbol_add_order) {
auto skv = symbol_types.find(symbol_name);
if (skv == symbol_types.end()) {
result += fmt::format(";;(define-extern {} object) ;; unknown type\n", symbol_name);
} else {
result += fmt::format("(define-extern {} {})\n", symbol_name, skv->second.print());
}
}
return result;
}
void DecompilerTypeSystem::add_type_flags(const std::string& name, u64 flags) {
auto kv = type_flags.find(name);
if (kv != type_flags.end()) {
if (kv->second != flags) {
lg::warn("duplicated type flags for {}, was 0x{:x}, now 0x{:x}", name.c_str(), kv->second,
flags);
lg::warn("duplicated type flags that are inconsistent!");
}
}
type_flags[name] = flags;
}
void DecompilerTypeSystem::add_type_parent(const std::string& child, const std::string& parent) {
auto kv = type_parents.find(child);
if (kv != type_parents.end()) {
if (kv->second != parent) {
lg::warn("duplicated type parents for {} was {} now {}", child.c_str(), kv->second.c_str(),
parent.c_str());
throw std::runtime_error("duplicated type parents that are inconsistent!");
}
}
type_parents[child] = parent;
}
std::string DecompilerTypeSystem::lookup_parent_from_inspects(const std::string& child) const {
if (child == "process-tree")
return "basic";
if (child == "process")
return "process-tree";
auto kv_tp = type_parents.find(child);
if (kv_tp != type_parents.end()) {
return kv_tp->second;
}
return "UNKNOWN";
}
bool DecompilerTypeSystem::lookup_flags(const std::string& type, u64* dest) const {
if (type == "process-tree") {
*dest = ((u64)0xe << 32) + (0 << 16) + 0x24;
return true;
}
if (type == "process") {
*dest = ((u64)0xe << 32) + (0 << 16) + 0x80;
return true;
}
auto kv = type_flags.find(type);
if (kv != type_flags.end()) {
*dest = kv->second;
return true;
}
return false;
}
void DecompilerTypeSystem::add_symbol(const std::string& name,
const TypeSpec& type_spec,
const DefinitionMetadata& symbol_metadata) {
add_symbol(name);
auto skv = symbol_types.find(name);
if (skv == symbol_types.end() || skv->second == type_spec) {
symbol_types[name] = type_spec;
// TODO - could get rid of this if there is a way to go from TypeSpec -> full Type
if (symbol_metadata.definition_info) {
symbol_metadata_map[name] = symbol_metadata;
}
} else {
if (ts.tc(type_spec, skv->second)) {
} else {
lg::warn("Attempting to redefine type of symbol {} from {} to {}", name, skv->second.print(),
type_spec.print());
throw std::runtime_error("Type redefinition");
}
}
2020-11-27 16:38:36 -05:00
}
/*!
* Compute the least common ancestor of two TP Types.
*/
TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing,
const TP_Type& add,
bool* changed) const {
// starting from most vague to most specific
// simplest case, no difference.
if (existing == add) {
*changed = false;
return existing;
}
// being sometimes uninitialized should not modify types.
if (add.kind == TP_Type::Kind::UNINITIALIZED) {
*changed = false;
return existing;
}
// replace anything that's uninitialized sometimes.
if (existing.kind == TP_Type::Kind::UNINITIALIZED) {
*changed = true; // existing != none because of previous check.
return add;
}
// similar to before, false as null shouldn't modify types.
if (add.kind == TP_Type::Kind::FALSE_AS_NULL) {
*changed = false;
return existing;
}
// replace any false as nulls.
if (existing.kind == TP_Type::Kind::FALSE_AS_NULL) {
*changed = true; // existing != false because of previous check.
return add;
}
// different values, but the same kind.
if (existing.kind == add.kind) {
switch (existing.kind) {
case TP_Type::Kind::TYPESPEC: {
auto new_result = TP_Type::make_from_ts(coerce_to_reg_type(ts.lowest_common_ancestor(
existing.get_objects_typespec(), add.get_objects_typespec())));
*changed = (new_result != existing);
return new_result;
2020-11-27 16:38:36 -05:00
}
case TP_Type::Kind::TYPE_OF_TYPE_OR_CHILD: {
auto new_result = TP_Type::make_type_allow_virtual_object(ts.lowest_common_ancestor(
existing.get_type_objects_typespec(), add.get_type_objects_typespec()));
*changed = (new_result != existing);
return new_result;
}
case TP_Type::Kind::TYPE_OF_TYPE_NO_VIRTUAL: {
auto new_result = TP_Type::make_type_no_virtual_object(ts.lowest_common_ancestor(
existing.get_type_objects_typespec(), add.get_type_objects_typespec()));
*changed = (new_result != existing);
return new_result;
}
case TP_Type::Kind::PRODUCT_WITH_CONSTANT:
// we know they are different.
*changed = true;
return TP_Type::make_from_ts(TypeSpec("int"));
case TP_Type::Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT:
*changed = true;
// todo - there might be cases where we need to LCA the base types??
return TP_Type::make_from_ts(TypeSpec("object"));
case TP_Type::Kind::OBJECT_NEW_METHOD:
*changed = true;
// this case should never happen I think.
return TP_Type::make_from_ts(TypeSpec("function"));
case TP_Type::Kind::STRING_CONSTANT: {
auto existing_count = get_format_arg_count(existing.get_string());
auto added_count = get_format_arg_count(add.get_string());
*changed = true;
if (added_count == existing_count) {
return TP_Type::make_from_format_string(existing_count);
} else {
return TP_Type::make_from_ts(TypeSpec("string"));
2020-11-27 16:38:36 -05:00
}
}
case TP_Type::Kind::INTEGER_CONSTANT:
*changed = true;
return TP_Type::make_from_ts(TypeSpec("int"));
case TP_Type::Kind::FORMAT_STRING:
if (existing.get_format_string_arg_count() == add.get_format_string_arg_count()) {
2020-11-27 16:38:36 -05:00
*changed = false;
return existing;
} else {
*changed = true;
return TP_Type::make_from_ts(TypeSpec("string"));
}
case TP_Type::Kind::INTEGER_CONSTANT_PLUS_VAR:
if (existing.get_integer_constant() == add.get_integer_constant()) {
auto new_t = coerce_to_reg_type(ts.lowest_common_ancestor(existing.get_objects_typespec(),
add.get_objects_typespec()));
auto new_child = TP_Type::make_from_integer_constant_plus_var(
existing.get_integer_constant(), new_t, new_t);
*changed = (new_child != existing);
return new_child;
} else {
*changed = true;
return TP_Type::make_from_ts("int");
}
case TP_Type::Kind::INTEGER_CONSTANT_PLUS_VAR_MULT:
// a bit lazy here, but I don't think you can ever merge these.
*changed = true;
return TP_Type::make_from_ts("int");
case TP_Type::Kind::VIRTUAL_METHOD:
// never allow this to remain method
*changed = true;
return TP_Type::make_from_ts(
ts.lowest_common_ancestor(existing.typespec(), add.typespec()));
case TP_Type::Kind::NON_VIRTUAL_METHOD:
// never allow this to remain method
*changed = true;
return TP_Type::make_from_ts(
ts.lowest_common_ancestor(existing.typespec(), add.typespec()));
case TP_Type::Kind::LABEL_ADDR:
*changed = false;
return existing;
case TP_Type::Kind::SYMBOL:
*changed = true;
return TP_Type::make_from_ts("symbol");
case TP_Type::Kind::FALSE_AS_NULL:
case TP_Type::Kind::UNINITIALIZED:
case TP_Type::Kind::DYNAMIC_METHOD_ACCESS:
case TP_Type::Kind::INVALID:
default:
ASSERT(false);
return {};
}
} else {
// trying to combine two of different types.
if (existing.can_be_format_string() && add.can_be_format_string()) {
int existing_count = get_format_arg_count(existing);
int add_count = get_format_arg_count(add);
TP_Type result_type;
if (existing_count == add_count) {
result_type = TP_Type::make_from_format_string(existing_count);
} else {
result_type = TP_Type::make_from_ts(TypeSpec("string"));
}
*changed = (result_type != existing);
return result_type;
}
2020-11-27 16:38:36 -05:00
if (existing.kind == TP_Type::Kind::TYPE_OF_TYPE_NO_VIRTUAL &&
add.kind == TP_Type::Kind::TYPE_OF_TYPE_OR_CHILD) {
auto result_type = TP_Type::make_type_no_virtual_object(ts.lowest_common_ancestor(
existing.get_type_objects_typespec(), add.get_type_objects_typespec()));
*changed = (result_type != existing);
return result_type;
}
if (existing.kind == TP_Type::Kind::TYPE_OF_TYPE_OR_CHILD &&
add.kind == TP_Type::Kind::TYPE_OF_TYPE_NO_VIRTUAL) {
auto result_type = TP_Type::make_type_no_virtual_object(ts.lowest_common_ancestor(
existing.get_type_objects_typespec(), add.get_type_objects_typespec()));
*changed = (result_type != existing);
return result_type;
}
// otherwise, as an absolute fallback, convert both to TypeSpecs and do TypeSpec LCA
auto new_result = TP_Type::make_from_ts(
coerce_to_reg_type(ts.lowest_common_ancestor(existing.typespec(), add.typespec())));
*changed = (new_result != existing);
return new_result;
}
}
/*!
* Find the least common ancestor of an entire typestate.
*/
2020-11-27 16:38:36 -05:00
bool DecompilerTypeSystem::tp_lca(TypeState* combined, const TypeState& add) {
bool result = false;
for (int i = 0; i < 32; i++) {
bool diff = false;
auto new_type = tp_lca(combined->gpr_types[i], add.gpr_types[i], &diff);
if (diff) {
result = true;
combined->gpr_types[i] = new_type;
}
}
for (int i = 0; i < 32; i++) {
bool diff = false;
auto new_type = tp_lca(combined->fpr_types[i], add.fpr_types[i], &diff);
if (diff) {
result = true;
combined->fpr_types[i] = new_type;
}
}
for (auto& x : add.spill_slots) {
// auto existing = combined->spill_slots.find(x.first);
// if (existing == combined->spill_slots.end()) {
// result = true;
// combined->spill_slots.insert({existing->first, existing->second});
// }
bool diff = false;
auto new_type = tp_lca(combined->spill_slots[x.first], x.second, &diff);
if (diff) {
result = true;
combined->spill_slots[x.first] = new_type;
}
}
bool diff = false;
auto new_type = tp_lca(combined->next_state_type, add.next_state_type, &diff);
if (diff) {
result = true;
combined->next_state_type = new_type;
}
2020-11-27 16:38:36 -05:00
return result;
}
int DecompilerTypeSystem::get_format_arg_count(const std::string& str) const {
auto bad_it = bad_format_strings.find(str);
if (bad_it != bad_format_strings.end()) {
return bad_it->second;
}
static const std::vector<std::string> code_ignore_list = {
"%", "T", "0L", "1L", "3L", "1k", "1K", "2j", "0k",
"0K", "30L", "1T", "2T", "100h", "200h", "350h", "t"};
int arg_count = 0;
for (size_t i = 0; i < str.length(); i++) {
if (str.at(i) == '~') {
i++; // also eat the next character.
bool code_takes_no_arg = false;
for (auto& ignored_code : code_ignore_list) {
[decomp] Decompile some time-of-day stuff, support new style Jak 2 time of day (#1943) - Add "tfrag-water" tfrag tree support (may just be the same as Jak 1's 'dirt' for the settings) - Add "tfrag-trans" tfrag tree support, reusing "trans-tfrag" from jak 1. - Add a hack to `LinkedObjectFileCreation` to handle `oracle`, which is accidentally multiply defined as a type leftover from jak 1 (an entity in village1), and level info for jak 2. - Add `VI1.DGO` - add `time-of-day.gc`, and a few other stub functions so it works - Set up some time of day stuff in GOAL for jak 2/PC renderers - Clean up time of day in c++ renderers, support the more complicated weight system used by jak 2 (backward compatible with jak 1, thankfully) The mood functions now run, so this could cause problems if they rely on stuff we don't have yet. But it seems fine for ctysluma and prison for now. ![image](https://user-images.githubusercontent.com/48171810/194719441-d185f59c-19dc-4cd3-a5c4-00b0cfe1d6c3.png) ![image](https://user-images.githubusercontent.com/48171810/194719449-6e051bf3-0750-42e5-a654-901313dbe479.png) ![image](https://user-images.githubusercontent.com/48171810/194719455-3ca6793e-873a-449a-8e85-9c20ffeb4da3.png) ![image](https://user-images.githubusercontent.com/48171810/194719461-8f27af17-4434-4492-96cd-8c5eec6eafdf.png) ![image](https://user-images.githubusercontent.com/48171810/194719468-720715b9-985a-4acf-928c-eab948cfcb03.png) ![image](https://user-images.githubusercontent.com/48171810/194719486-bfb91e83-f6ca-4585-80ad-3b2c0cbbd5af.png) ![image](https://user-images.githubusercontent.com/48171810/194719492-df065d2f-cb5a-47e3-a248-f5317c42082f.png) ![image](https://user-images.githubusercontent.com/48171810/194719507-91e1f477-ecfe-4d6c-b744-5f24646255ca.png)
2022-10-08 13:33:03 -04:00
size_t j = i;
bool match = true;
for (const char c : ignored_code) {
if (j > str.length()) {
match = false;
break;
}
if (str.at(j) != c) {
match = false;
break;
}
j++;
}
if (match) {
code_takes_no_arg = true;
break;
}
}
if (!code_takes_no_arg) {
arg_count++;
}
}
}
return arg_count;
}
int DecompilerTypeSystem::get_format_arg_count(const TP_Type& type) const {
if (type.is_constant_string()) {
return get_format_arg_count(type.get_string());
} else {
return type.get_format_string_arg_count();
}
}
int DecompilerTypeSystem::get_dynamic_format_arg_count(const std::string& func_name,
int op_idx) const {
auto kv = format_ops_with_dynamic_string_by_func_name.find(func_name);
if (kv == format_ops_with_dynamic_string_by_func_name.end()) {
throw std::runtime_error(fmt::format("Unknown dynamic format string."));
} else {
auto& formats = kv->second;
auto the_format =
std::find_if(formats.begin(), formats.end(),
[op_idx](const std::vector<int> vec) { return vec.at(0) == op_idx; });
if (the_format == formats.end()) {
throw std::runtime_error(fmt::format("Unknown dynamic format string."));
}
return the_format->at(1);
}
}
TypeSpec DecompilerTypeSystem::lookup_symbol_type(const std::string& name) const {
auto kv = symbol_types.find(name);
if (kv == symbol_types.end()) {
throw std::runtime_error(
fmt::format("Decompiler type system did not know the type of symbol {}. Add it!", name));
} else {
return kv->second;
}
}
bool DecompilerTypeSystem::should_attempt_cast_simplify(const TypeSpec& expected,
const TypeSpec& actual) const {
if (expected == TypeSpec("meters") && actual == TypeSpec("float")) {
return true;
}
if (expected == TypeSpec("seconds") && actual == TypeSpec("int64")) {
return true;
}
if (expected == TypeSpec("degrees") && actual == TypeSpec("float")) {
return true;
}
return !ts.tc(expected, actual);
}
} // namespace decompiler