mirror of
https://github.com/open-goal/jak-project.git
synced 2024-10-20 00:57:44 -04:00
Organize all types (#523)
* get a full mapping of all `symbol:file-name` for jak1 * cleanup before any additional work is done * more pre-organize changes * scripts: script written to cleanup all-types...but what will cleanup the script... * resolve clashing enum symbol names * some manual changes to symbol order * very close! don't completely lose types that are fully commented out * scripts: New symbol mapping and further script polishing * changes needed to make the script function properly * brand new all-types! * remove now irrelevant symbol definitions * formatting cpp * decompiler: Remove symbol dumping feature, not accurate anyway
This commit is contained in:
parent
784119188c
commit
2b3c4d957a
|
@ -55,6 +55,13 @@
|
|||
"name" : "Run - Decompiler - Jak 1",
|
||||
"args" : [ "${workspaceRoot}/decompiler/config/jak1_ntsc_black_label.jsonc", "${workspaceRoot}/iso_data/jak1", "${workspaceRoot}/decompiler_out/jak1"]
|
||||
},
|
||||
{
|
||||
"type" : "default",
|
||||
"project" : "CMakeLists.txt",
|
||||
"projectTarget" : "decompiler.exe (bin\\decompiler.exe)",
|
||||
"name" : "Run - Disassembler - Jak 1",
|
||||
"args" : [ "${workspaceRoot}/decompiler/config/jak1_ntsc_black_label.jsonc", "${workspaceRoot}/iso_data/jak1", "${workspaceRoot}/decompiler_out/jak1_ir1"]
|
||||
},
|
||||
{
|
||||
"type" : "default",
|
||||
"project" : "CMakeLists.txt",
|
||||
|
|
10
.vscode/launch.json
vendored
10
.vscode/launch.json
vendored
|
@ -5,12 +5,12 @@
|
|||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python: Current File",
|
||||
"name": "Cleanup All Types",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"program": "${workspaceFolder}/scripts/cleanup-all-types.py",
|
||||
"console": "integratedTerminal",
|
||||
"cwd": "${workspaceFolder}/scripts"
|
||||
}
|
||||
"cwd": "${workspaceFolder}"
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ tasks:
|
|||
decomp-watch:
|
||||
cmds:
|
||||
- watchmedo shell-command --drop --patterns="*.gc;*.jsonc" --recursive --command='task decomp-file FILE="{{.FILE}}"' ./decompiler/config/
|
||||
cleanup-all-types:
|
||||
clean-all-types:
|
||||
cmds:
|
||||
- python ./scripts/cleanup-all-types.py
|
||||
analyze-ee-memory:
|
||||
|
|
|
@ -822,4 +822,4 @@ LinkedObjectFile to_linked_object_file(const std::vector<uint8_t>& data,
|
|||
|
||||
return result;
|
||||
}
|
||||
} // namespace decompiler
|
||||
} // namespace decompiler
|
||||
|
|
|
@ -767,4 +767,4 @@ void ObjectFileDB::dump_raw_objects(const std::string& output_dir) {
|
|||
file_util::write_binary_file(dest, data.data.data(), data.data.size());
|
||||
});
|
||||
}
|
||||
} // namespace decompiler
|
||||
} // namespace decompiler
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -85,7 +85,7 @@ int main(int argc, char** argv) {
|
|||
db.analyze_functions_ir2(out_folder, config);
|
||||
}
|
||||
|
||||
// write out all symbols TODO - organize by file
|
||||
// write out all symbols
|
||||
file_util::write_text_file(file_util::combine_path(out_folder, "all-syms.gc"),
|
||||
db.dts.dump_symbol_types());
|
||||
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
# all-types.gc can easily drift
|
||||
# initially it's populated with many types that are unknown (these are commented out)
|
||||
# but when we manually replace them, it's easy to forget to delete the duplicate commented out variant.
|
||||
#
|
||||
# additionally, ideally we'd be organizing them as well but this can come later (dont want to manually move all the type defs!)
|
||||
|
||||
# (deftype) or (define-extern)
|
||||
# if we find a deftype or define-extern that has a define-extern with ;; unknown type on the same line, delete it!
|
||||
# This does a (currently) 3 pass cleanup on all-types
|
||||
# 1. Cleanup any symbol definitions that are redundant
|
||||
# 2. Reorder symbol definitions based on file build order
|
||||
# 3. Check for any necessary forward declarations
|
||||
|
||||
import os
|
||||
|
||||
# First pass!
|
||||
print("First Pass - Cleaning up File")
|
||||
new_file = []
|
||||
with open("./decompiler/config/all-types.gc") as f:
|
||||
symbols_found = []
|
||||
|
@ -25,3 +23,318 @@ with open("./decompiler/config/all-types.gc") as f:
|
|||
os.remove("./decompiler/config/all-types.gc")
|
||||
with open("./decompiler/config/all-types.gc", "w") as f:
|
||||
f.writelines(new_file)
|
||||
|
||||
# Second Pass!
|
||||
# I try to preserve comments as best I can:
|
||||
# - comments that are file names are discarded, they are now redundant!
|
||||
# - comments prior to a symbol definition are considered part of that symbol definition, comments after are NOT
|
||||
# Build up a mapping of symbol-name -> symbol-definitions and then we can easily drop them into place
|
||||
# Symbols are defined by reading line-by-line until we see the next:
|
||||
# - deftype
|
||||
# - define-extern
|
||||
# - defenum
|
||||
# declare-types can be discarded, they are handled in the third pass
|
||||
# If something is defined more than once, take the longer definition
|
||||
|
||||
from jak1_file_list import file_list
|
||||
import json
|
||||
|
||||
def is_filename_comment(line):
|
||||
if not line.startswith(";"):
|
||||
return False
|
||||
if "define" in line or "deftype" in line:
|
||||
return False
|
||||
cleaned_line = line.replace(";", "").strip()
|
||||
for item in file_list:
|
||||
file_name = item[0]
|
||||
if file_name == cleaned_line:
|
||||
return True
|
||||
return False
|
||||
|
||||
with open('./scripts/jak1-symbol-mapping.json') as f:
|
||||
data = json.load(f)
|
||||
all_symbols = []
|
||||
for object_file, symbols in data.items():
|
||||
for symbol in symbols:
|
||||
all_symbols.append(symbol)
|
||||
|
||||
def no_runtime_type(definition):
|
||||
for line in definition:
|
||||
if "no-runtime-type" in line:
|
||||
return True
|
||||
return False
|
||||
|
||||
def strip_trailing_new_lines(definition):
|
||||
new_definition = []
|
||||
found_content = False
|
||||
for line in reversed(definition):
|
||||
if found_content:
|
||||
new_definition.insert(0, line)
|
||||
else:
|
||||
cleaned_line = line.strip()
|
||||
if len(line) != 0:
|
||||
found_content = True
|
||||
new_definition.insert(0, line)
|
||||
return new_definition
|
||||
|
||||
symbol_definitions = {}
|
||||
# Anything that is custom / not part of the game, I'll place at the top of the file because I have no idea where it should go
|
||||
unknown_symbol_definitions = []
|
||||
|
||||
print("Second Pass - Re-Organizing File")
|
||||
with open("./decompiler/config/all-types.gc") as f:
|
||||
lines = f.readlines()
|
||||
current_symbol = ""
|
||||
current_symbol_definition = []
|
||||
comment_buffer = []
|
||||
commented_type = False
|
||||
for i, line in enumerate(lines):
|
||||
# Ignore the following lines:
|
||||
# - declare-types
|
||||
# - empty lines
|
||||
# - file name comments
|
||||
# - comments i generate
|
||||
if is_filename_comment(line) or (commented_type is False and ("declare-type" in line or (line == "\n" and i != len(lines) - 1) or line.startswith(";; File -") or line.startswith(";; Source Path -") or line.startswith(";; Containing DGOs -") or line.startswith(";; Version -") or line.startswith(";; Types") or line.startswith(";; Functions") or line.startswith(";; Unknowns") or line.startswith(";; NO FILE"))):
|
||||
continue
|
||||
|
||||
# Handle the first line of the file properly
|
||||
if len(current_symbol_definition) == 0: # The only time this variable should be empty, is at the beginning, after that it should always be in use
|
||||
current_symbol_definition.append(line)
|
||||
if line.startswith("(deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern") or line.startswith("(defenum"):
|
||||
current_symbol = line.split(" ")[1].rstrip("\n")
|
||||
continue
|
||||
|
||||
# To support comments being associated with the following symbol def, we have to keep track of them
|
||||
# then either associate them with the new symbol OR realize they are part of the current one!
|
||||
if not commented_type and line.startswith(";") and not (line.startswith("; (deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern") or line.startswith("(defenum")):
|
||||
current_symbol_definition.append(line)
|
||||
comment_buffer.append(line)
|
||||
continue
|
||||
|
||||
# Check if we've reached a new symbol or reached the end of the file
|
||||
if i == len(lines) - 1 or line.startswith("(deftype") or line.startswith("; (deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern") or line.startswith("(defenum"):
|
||||
# Remove any comments from the previous symbol def
|
||||
if not commented_type and len(comment_buffer) > 0:
|
||||
current_symbol_definition = current_symbol_definition[:-len(comment_buffer)]
|
||||
# Check if the symbol we found is valid or invalid
|
||||
if current_symbol in all_symbols:
|
||||
if current_symbol in symbol_definitions:
|
||||
# print("Symbol re-defintion found for '{}', choosing the bigger one!".format(current_symbol))
|
||||
if len(current_symbol_definition) > len(symbol_definitions[current_symbol]):
|
||||
if no_runtime_type(current_symbol_definition):
|
||||
current_symbol_definition.insert(0, "(define-extern {} type) ; deftype provided by C Kernel\n".format(current_symbol))
|
||||
symbol_definitions[current_symbol] = strip_trailing_new_lines(current_symbol_definition.copy())
|
||||
else:
|
||||
if no_runtime_type(current_symbol_definition):
|
||||
current_symbol_definition.insert(0, "(define-extern {} type) ; deftype provided by C Kernel\n".format(current_symbol))
|
||||
symbol_definitions[current_symbol] = strip_trailing_new_lines(current_symbol_definition.copy())
|
||||
else:
|
||||
print("Found a symbol '{}' that is not part of Jak 1!".format(current_symbol))
|
||||
unknown_symbol_definitions.append(current_symbol_definition.copy())
|
||||
if i != len(lines) - 1:
|
||||
if line.startswith("; (deftype"):
|
||||
current_symbol = line.split(" ")[2].rstrip("\n")
|
||||
commented_type = True
|
||||
else:
|
||||
current_symbol = line.split(" ")[1].rstrip("\n")
|
||||
commented_type = False
|
||||
current_symbol_definition.clear()
|
||||
current_symbol_definition += comment_buffer
|
||||
comment_buffer.clear()
|
||||
current_symbol_definition.append(line)
|
||||
else:
|
||||
current_symbol_definition.append(line)
|
||||
if len(comment_buffer) > 0:
|
||||
comment_buffer.clear()
|
||||
|
||||
# Now armed with our complete list of symbols, print them out in a nice organized manner.
|
||||
# Precedence:
|
||||
# - unknown symbols (after kernel definitions - `gstate` is the last)
|
||||
# - file build order (include info about src file path / DGOs it's contained in)
|
||||
# - types
|
||||
# - functions
|
||||
# - unknown types/symbols/functions
|
||||
|
||||
with open('./scripts/jak1-symbol-mapping.json') as f:
|
||||
symbol_mapping = json.load(f)
|
||||
|
||||
def first_relevant_line(definition):
|
||||
if len(definition) == 1:
|
||||
return definition[0]
|
||||
for line in definition:
|
||||
if line.startswith("; (deftype") or not line.startswith(";"):
|
||||
return line
|
||||
|
||||
def was_previous_definition_multi_line(definition):
|
||||
line_count = 0
|
||||
for line in definition:
|
||||
if not line.strip().startswith(";"):
|
||||
line_count = line_count + 1
|
||||
return line_count > 1
|
||||
|
||||
def print_definition_block(file_lines, header, prev_block_exists, def_list):
|
||||
if len(def_list) == 0:
|
||||
return False
|
||||
if prev_block_exists:
|
||||
file_lines.append("\n")
|
||||
file_lines.append(";; - {}\n\n".format(header))
|
||||
prev_definition = None
|
||||
for definition in def_list:
|
||||
if prev_definition is not None and was_previous_definition_multi_line(prev_definition):
|
||||
file_lines.append("\n")
|
||||
file_lines.append("".join(definition))
|
||||
prev_definition = definition
|
||||
return True
|
||||
|
||||
def print_definition_blocks(file_lines, types, functions, symbols, unknowns):
|
||||
if not types and not functions and not symbols and not unknowns:
|
||||
file_lines.append(";; - Nothing Defined in This File!\n")
|
||||
else:
|
||||
prev_block_exists = False
|
||||
prev_block_exists |= print_definition_block(file_lines, "Types", prev_block_exists, types)
|
||||
prev_block_exists |= print_definition_block(file_lines, "Functions", prev_block_exists, functions)
|
||||
prev_block_exists |= print_definition_block(file_lines, "Symbols", prev_block_exists, symbols)
|
||||
prev_block_exists |= print_definition_block(file_lines, "Unknowns", prev_block_exists, unknowns)
|
||||
file_lines.append("\n")
|
||||
|
||||
new_file = []
|
||||
for item in file_list:
|
||||
file_name = item[0]
|
||||
extension = "gc"
|
||||
if item[2] == 4:
|
||||
extension = "gd"
|
||||
src_path = "{}/{}.{}".format(item[4], file_name, extension)
|
||||
new_file.append("\n;; ----------------------\n;; File - {}\n;; Source Path - {}\n;; Containing DGOs - {}\n;; Version - {}\n\n".format(file_name, src_path, item[3], item[2]))
|
||||
types = []
|
||||
functions = []
|
||||
symbols = []
|
||||
unknowns = []
|
||||
if file_name in symbol_mapping:
|
||||
for symbol in symbol_mapping[file_name]:
|
||||
if symbol not in symbol_definitions:
|
||||
print("Could not find definition for '{}'".format(symbol))
|
||||
else:
|
||||
symbol_definition = symbol_definitions[symbol]
|
||||
if ";;(define-extern" in first_relevant_line(symbol_definition):
|
||||
unknowns.append(symbol_definition)
|
||||
elif "(function" in first_relevant_line(symbol_definition):
|
||||
functions.append(symbol_definition)
|
||||
elif "deftype" in first_relevant_line(symbol_definition):
|
||||
types.append(symbol_definition)
|
||||
elif "define-extern" in first_relevant_line(symbol_definition):
|
||||
symbols.append(symbol_definition)
|
||||
else:
|
||||
print("Could not find associated symbol def for '{}'".format(symbol))
|
||||
|
||||
print_definition_blocks(new_file, types, functions, symbols, unknowns)
|
||||
|
||||
if file_name == "gcommon":
|
||||
new_file.append("\n;; ----------------------\n;; NO FILE\n;; Unknowns / Built-Ins / Non-Original Types\n\n")
|
||||
for definition in unknown_symbol_definitions:
|
||||
if not definition[0].startswith(";;(define-extern"):
|
||||
new_file.append("".join(definition) + "\n")
|
||||
|
||||
new_file.append("\n;; ----------------------\n;; NO FILE\n;; Unknowns with No Definition\n\n")
|
||||
for definition in unknown_symbol_definitions:
|
||||
if definition[0].startswith(";;(define-extern"):
|
||||
new_file.append("".join(definition).rstrip() + "\n")
|
||||
|
||||
os.remove("./decompiler/config/all-types-test.gc")
|
||||
with open("./decompiler/config/all-types-test.gc", "w") as f:
|
||||
f.writelines(new_file)
|
||||
|
||||
|
||||
# Third pass! Add any necessary forward declarations
|
||||
# - First, let's identify the line numbers where types are defined, and used
|
||||
# - Then, repeat the process, adding forward declarations when appropriate
|
||||
|
||||
type_usages = {}
|
||||
|
||||
def get_root_parent_type(t):
|
||||
if t["parent_type"] in ["basic", "structure", "symbol", "object", "integer", "pair", "number", "binteger", "function", "array", "type", "string", "uint8", "int8", "uint16", "int16", "uint32", "int32", "uint64", "int64", "uint128", "int128", "float", "kheap"]:
|
||||
parent_type = t["parent_type"]
|
||||
if parent_type not in ["basic", "structure"]:
|
||||
return "type" # NOTE - this does not work currently!!! but is VERY RARE
|
||||
return t["parent_type"]
|
||||
return get_root_parent_type(type_usages[t["parent_type"]])
|
||||
|
||||
def symbol_usage(line, sym):
|
||||
if line.strip().startswith(";"):
|
||||
return False
|
||||
tokens = line.strip().split(" ")
|
||||
for token in tokens[1:]:
|
||||
sanitized_token = token.replace("(", "").replace(")", "").strip()
|
||||
if sanitized_token == sym:
|
||||
return True
|
||||
return False
|
||||
|
||||
new_file = []
|
||||
print("Third Pass - Adding Forward Type Declarations")
|
||||
with open("./decompiler/config/all-types-test.gc") as f:
|
||||
lines = f.readlines()
|
||||
# Get the types
|
||||
for i, line in enumerate(lines):
|
||||
clean_line = line.replace(";", "").strip()
|
||||
if clean_line.startswith("(deftype"):
|
||||
symbol = clean_line.split(" ")[1]
|
||||
parent_type = clean_line.split(" ")[2].rstrip("\n").replace("(", "").replace(")", "")
|
||||
if parent_type == "UNKNOWN":
|
||||
continue
|
||||
type_usages[symbol] = {
|
||||
"type_name": symbol,
|
||||
"parent_type": parent_type,
|
||||
"declared_on_line": i,
|
||||
"first_symbol_usage": "",
|
||||
"used_on_lines": [],
|
||||
"commented_out_type": line.startswith(";")
|
||||
}
|
||||
# Identify Usages - heavy loop
|
||||
for symbol, usage_info in type_usages.items():
|
||||
symbol_index = list(type_usages.keys()).index(symbol)
|
||||
if symbol_index % 100 == 0:
|
||||
print("[{}/{}]: Finding Type Usages".format(symbol_index, len(type_usages)))
|
||||
current_symbol = ""
|
||||
for i, line in enumerate(lines):
|
||||
if i > usage_info["declared_on_line"]:
|
||||
break # For speed reasons, we don't care about usages after the declaration
|
||||
if "; deftype provided by C Kernel" in line:
|
||||
continue
|
||||
if line.startswith("(deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern") or line.startswith("(defenum"):
|
||||
current_symbol = line.split(" ")[1]
|
||||
if i != usage_info["declared_on_line"] and symbol_usage(line, usage_info["type_name"]):
|
||||
if len(usage_info["used_on_lines"]) == 0:
|
||||
usage_info["first_symbol_usage"] = current_symbol
|
||||
usage_info["used_on_lines"].append(i)
|
||||
|
||||
# Identify Necessary Forward Declarations
|
||||
forward_declarations = {}
|
||||
for symbol, usage_info in type_usages.items():
|
||||
declaration_line = usage_info["declared_on_line"]
|
||||
if len(usage_info["used_on_lines"]) == 0:
|
||||
continue
|
||||
earliest_usage = usage_info["used_on_lines"][0]
|
||||
if declaration_line > earliest_usage or usage_info["commented_out_type"]:
|
||||
if usage_info["first_symbol_usage"] not in forward_declarations:
|
||||
forward_declarations[usage_info["first_symbol_usage"]] = ["(declare-type {} {})\n".format(symbol, get_root_parent_type(usage_info))]
|
||||
else:
|
||||
forward_declarations[usage_info["first_symbol_usage"]].append("(declare-type {} {})\n".format(symbol, get_root_parent_type(usage_info)))
|
||||
|
||||
# FINALLY - add the forward declarations\
|
||||
skip_next = False
|
||||
for i, line in enumerate(lines):
|
||||
if skip_next:
|
||||
skip_next = False
|
||||
new_file.append(line)
|
||||
continue
|
||||
if "; deftype provided by C Kernel" in line:
|
||||
skip_next = True
|
||||
if line.startswith("(deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern"):
|
||||
current_symbol = line.split(" ")[1]
|
||||
if current_symbol in forward_declarations:
|
||||
new_file.append("".join(forward_declarations[current_symbol]))
|
||||
new_file.append(line)
|
||||
|
||||
os.remove("./decompiler/config/all-types-test.gc")
|
||||
with open("./decompiler/config/all-types-test.gc", "w") as f:
|
||||
f.writelines(new_file)
|
||||
|
||||
|
|
7144
scripts/jak1-symbol-mapping.json
Normal file
7144
scripts/jak1-symbol-mapping.json
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,4 +1,5 @@
|
|||
file_list = [["gcommon", "gcommon", 3, ["KERNEL"], "kernel"],
|
||||
file_list = [\
|
||||
["gcommon", "gcommon", 3, ["KERNEL"], "kernel"],
|
||||
["gstring-h", "gstring-h", 3, ["KERNEL"], "kernel"],
|
||||
["gkernel-h", "gkernel-h", 3, ["KERNEL"], "kernel"],
|
||||
["gkernel", "gkernel", 3, ["KERNEL"], "kernel"],
|
||||
|
@ -1084,4 +1085,4 @@ file_list = [["gcommon", "gcommon",
|
|||
["vil3-bridge-36-ag", "vil3-bridge-36", 4, ["VI3"], "levels/village3"],
|
||||
["water-anim-village3-ag", "water-anim-village3", 4, ["VI3"], "levels/village3"],
|
||||
["village3-vis", "village3-vis", 4, ["VI3"], "levels/village3"],
|
||||
["lava", "lava", 3, ["WATER-AN"], "old/lava"]]
|
||||
["lava", "lava", 3, ["WATER-AN"], "old/lava"]]
|
||||
|
|
Loading…
Reference in a new issue