mirror of
https://github.com/open-goal/jak-project.git
synced 2024-10-20 11:26:18 -04:00
b209c9e1ba
* scripts: improve decomp-next script * script: Make it possible to consistently cleanup `all-types` * now use the actual parent type! * adjust the symbol order for collectables * re-organize * manually fix this edge-case for now
391 lines
16 KiB
Python
391 lines
16 KiB
Python
# This does a (currently) 3 pass cleanup on all-types
|
|
# 1. Cleanup any symbol definitions that are redundant
|
|
# 2. Reorder symbol definitions based on file build order
|
|
# 3. Check for any necessary forward declarations
|
|
|
|
import os
|
|
|
|
# First pass!
|
|
print("First Pass - Cleaning up File")
|
|
new_file = []
|
|
with open("./decompiler/config/all-types.gc") as f:
|
|
symbols_found = []
|
|
lines = f.readlines()
|
|
for line in lines:
|
|
if line.startswith("(deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern"):
|
|
symbol = line.split(" ")[1]
|
|
if symbol in symbols_found and "unknown type" in line:
|
|
continue
|
|
else:
|
|
symbols_found.append(symbol)
|
|
new_file.append(line)
|
|
|
|
os.remove("./decompiler/config/all-types.gc")
|
|
with open("./decompiler/config/all-types.gc", "w") as f:
|
|
f.writelines(new_file)
|
|
|
|
# Second Pass!
|
|
# I try to preserve comments as best I can:
|
|
# - comments that are file names are discarded, they are now redundant!
|
|
# - comments prior to a symbol definition are considered part of that symbol definition, comments after are NOT
|
|
# Build up a mapping of symbol-name -> symbol-definitions and then we can easily drop them into place
|
|
# Symbols are defined by reading line-by-line until we see the next:
|
|
# - deftype
|
|
# - define-extern
|
|
# - defenum
|
|
# declare-types can be discarded, they are handled in the third pass
|
|
# If something is defined more than once, take the longer definition
|
|
|
|
from jak1_file_list import file_list
|
|
import json
|
|
|
|
script_comments = [
|
|
";; ----------------------",
|
|
";; File -",
|
|
";; Source Path -",
|
|
";; Containing DGOs -",
|
|
";; Version -",
|
|
";; - Types",
|
|
";; - Functions",
|
|
";; - Symbols",
|
|
";; - Unknowns",
|
|
";; NO FILE",
|
|
";; Unknowns / Built-Ins / Non-Original Types",
|
|
";; - Nothing Defined in This File!",
|
|
";; Unknowns with No Definition"
|
|
]
|
|
|
|
def is_filename_comment(line):
|
|
if not line.startswith(";"):
|
|
return False
|
|
if "define" in line or "deftype" in line:
|
|
return False
|
|
cleaned_line = line.replace(";", "").strip()
|
|
for item in file_list:
|
|
file_name = item[0]
|
|
if file_name == cleaned_line:
|
|
return True
|
|
return False
|
|
|
|
with open('./scripts/jak1-symbol-mapping.json') as f:
|
|
data = json.load(f)
|
|
all_symbols = []
|
|
for object_file, symbols in data.items():
|
|
for symbol in symbols:
|
|
all_symbols.append(symbol)
|
|
|
|
def no_runtime_type(definition):
|
|
for line in definition:
|
|
if "no-runtime-type" in line:
|
|
return True
|
|
return False
|
|
|
|
def strip_trailing_new_lines(definition):
|
|
new_definition = []
|
|
found_content = False
|
|
for line in reversed(definition):
|
|
if found_content:
|
|
new_definition.insert(0, line)
|
|
else:
|
|
cleaned_line = line.strip()
|
|
if len(cleaned_line) != 0:
|
|
found_content = True
|
|
new_definition.insert(0, line)
|
|
# Check if the last line has a new-line or not, if it doesn't add one
|
|
if not new_definition[len(new_definition)-1].endswith("\n"):
|
|
new_definition[len(new_definition)-1].append("\n")
|
|
return new_definition
|
|
|
|
symbol_definitions = {}
|
|
# Anything that is custom / not part of the game, I'll place at the top of the file because I have no idea where it should go
|
|
unknown_symbol_definitions = []
|
|
|
|
def is_script_comment(line):
|
|
for comment in script_comments:
|
|
if line.startswith(comment):
|
|
return True
|
|
return False
|
|
|
|
print("Second Pass - Re-Organizing File")
|
|
with open("./decompiler/config/all-types.gc") as f:
|
|
lines = f.readlines()
|
|
current_symbol = ""
|
|
current_symbol_definition = []
|
|
comment_buffer = []
|
|
commented_type = False
|
|
for i, line in enumerate(lines):
|
|
# Ignore the following lines:
|
|
# - declare-types
|
|
# - empty lines
|
|
# - file name comments
|
|
# - comments i generate
|
|
if is_filename_comment(line) or is_script_comment(line) or (commented_type is False and ("declare-type" in line or (line == "\n" and i != len(lines) - 1))):
|
|
continue
|
|
|
|
# Handle the first line of the file properly
|
|
if len(current_symbol_definition) == 0: # The only time this variable should be empty, is at the beginning, after that it should always be in use
|
|
if line.startswith("(deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern") or line.startswith("(defenum"):
|
|
current_symbol_definition.append(line)
|
|
current_symbol = line.split(" ")[1].rstrip("\n")
|
|
continue
|
|
|
|
# To support comments being associated with the following symbol def, we have to keep track of them
|
|
# then either associate them with the new symbol OR realize they are part of the current one!
|
|
if not commented_type and line.startswith(";") and not (line.startswith("; (deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern") or line.startswith("(defenum")):
|
|
current_symbol_definition.append(line)
|
|
comment_buffer.append(line)
|
|
continue
|
|
|
|
# Check if we've reached a new symbol or reached the end of the file
|
|
if i == len(lines) - 1 or line.startswith("(deftype") or line.startswith("; (deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern") or line.startswith("(defenum"):
|
|
# Remove any comments from the previous symbol def
|
|
if not commented_type and len(comment_buffer) > 0:
|
|
current_symbol_definition = current_symbol_definition[:-len(comment_buffer)]
|
|
# Check if the symbol we found is valid or invalid
|
|
if current_symbol in all_symbols:
|
|
if current_symbol in symbol_definitions:
|
|
# print("Symbol re-defintion found for '{}', choosing the bigger one!".format(current_symbol))
|
|
if len(current_symbol_definition) > len(symbol_definitions[current_symbol]):
|
|
if no_runtime_type(current_symbol_definition):
|
|
current_symbol_definition.insert(0, "(define-extern {} type) ; deftype provided by C Kernel\n".format(current_symbol))
|
|
symbol_definitions[current_symbol] = strip_trailing_new_lines(current_symbol_definition.copy())
|
|
else:
|
|
if no_runtime_type(current_symbol_definition):
|
|
current_symbol_definition.insert(0, "(define-extern {} type) ; deftype provided by C Kernel\n".format(current_symbol))
|
|
symbol_definitions[current_symbol] = strip_trailing_new_lines(current_symbol_definition.copy())
|
|
else:
|
|
print("Found a symbol '{}' that is not part of Jak 1!".format(current_symbol))
|
|
unknown_symbol_definitions.append(current_symbol_definition.copy())
|
|
if i != len(lines) - 1:
|
|
if line.startswith("; (deftype"):
|
|
current_symbol = line.split(" ")[2].rstrip("\n")
|
|
commented_type = True
|
|
else:
|
|
current_symbol = line.split(" ")[1].rstrip("\n")
|
|
commented_type = False
|
|
current_symbol_definition.clear()
|
|
current_symbol_definition += comment_buffer
|
|
comment_buffer.clear()
|
|
current_symbol_definition.append(line)
|
|
elif line.startswith("(deftype") or line.startswith("; (deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern") or line.startswith("(defenum"):
|
|
if line.startswith("; (deftype"):
|
|
current_symbol = line.split(" ")[2].rstrip("\n")
|
|
else:
|
|
current_symbol = line.split(" ")[1].rstrip("\n")
|
|
current_symbol_definition.clear()
|
|
current_symbol_definition += comment_buffer
|
|
comment_buffer.clear()
|
|
current_symbol_definition.append(line)
|
|
symbol_definitions[current_symbol] = strip_trailing_new_lines(current_symbol_definition.copy())
|
|
else:
|
|
current_symbol_definition.append(line)
|
|
if len(comment_buffer) > 0:
|
|
comment_buffer.clear()
|
|
|
|
# Now armed with our complete list of symbols, print them out in a nice organized manner.
|
|
# Precedence:
|
|
# - unknown symbols (after kernel definitions - `gstate` is the last)
|
|
# - file build order (include info about src file path / DGOs it's contained in)
|
|
# - types
|
|
# - functions
|
|
# - unknown types/symbols/functions
|
|
|
|
with open('./scripts/jak1-symbol-mapping.json') as f:
|
|
symbol_mapping = json.load(f)
|
|
|
|
def first_relevant_line(definition):
|
|
if len(definition) == 1:
|
|
return definition[0]
|
|
for line in definition:
|
|
if line.startswith("; (deftype") or not line.startswith(";"):
|
|
return line
|
|
|
|
def was_previous_definition_multi_line(definition):
|
|
line_count = 0
|
|
for line in definition:
|
|
if "; (deftype" in line:
|
|
return True
|
|
if not line.strip().startswith(";"):
|
|
line_count = line_count + 1
|
|
return line_count > 1
|
|
|
|
def print_definition_block(file_lines, header, prev_block_exists, def_list):
|
|
if len(def_list) == 0:
|
|
return False
|
|
if prev_block_exists:
|
|
file_lines.append("\n")
|
|
file_lines.append(";; - {}\n\n".format(header))
|
|
prev_definition = None
|
|
for definition in def_list:
|
|
if prev_definition is not None and was_previous_definition_multi_line(prev_definition):
|
|
file_lines.append("\n")
|
|
file_lines.append("".join(definition))
|
|
prev_definition = definition
|
|
return True
|
|
|
|
def print_definition_blocks(file_lines, types, functions, symbols, unknowns):
|
|
if not types and not functions and not symbols and not unknowns:
|
|
file_lines.append(";; - Nothing Defined in This File!\n")
|
|
else:
|
|
prev_block_exists = False
|
|
prev_block_exists |= print_definition_block(file_lines, "Types", prev_block_exists, types)
|
|
prev_block_exists |= print_definition_block(file_lines, "Functions", prev_block_exists, functions)
|
|
prev_block_exists |= print_definition_block(file_lines, "Symbols", prev_block_exists, symbols)
|
|
prev_block_exists |= print_definition_block(file_lines, "Unknowns", prev_block_exists, unknowns)
|
|
file_lines.append("\n")
|
|
|
|
new_file = []
|
|
for item in file_list:
|
|
file_name = item[0]
|
|
extension = "gc"
|
|
if item[2] == 4:
|
|
extension = "gd"
|
|
src_path = "{}/{}.{}".format(item[4], file_name, extension)
|
|
new_file.append("\n;; ----------------------\n;; File - {}\n;; Source Path - {}\n;; Containing DGOs - {}\n;; Version - {}\n\n".format(file_name, src_path, item[3], item[2]))
|
|
types = []
|
|
functions = []
|
|
symbols = []
|
|
unknowns = []
|
|
if file_name in symbol_mapping:
|
|
for symbol in symbol_mapping[file_name]:
|
|
if symbol not in symbol_definitions:
|
|
print("Could not find definition for '{}'".format(symbol))
|
|
else:
|
|
symbol_definition = symbol_definitions[symbol]
|
|
if ";;(define-extern" in first_relevant_line(symbol_definition):
|
|
unknowns.append(symbol_definition)
|
|
elif "(function" in first_relevant_line(symbol_definition) or "function)" in first_relevant_line(symbol_definition):
|
|
functions.append(symbol_definition)
|
|
elif "deftype" in first_relevant_line(symbol_definition):
|
|
types.append(symbol_definition)
|
|
elif "define-extern" in first_relevant_line(symbol_definition):
|
|
symbols.append(symbol_definition)
|
|
else:
|
|
print("Could not find associated symbol def for '{}'".format(symbol))
|
|
|
|
print_definition_blocks(new_file, types, functions, symbols, unknowns)
|
|
|
|
cleaned_unknown_symbol_defs = []
|
|
if file_name == "gcommon":
|
|
new_file.append("\n;; ----------------------\n;; NO FILE\n;; Unknowns / Built-Ins / Non-Original Types\n\n")
|
|
for definition in unknown_symbol_definitions:
|
|
if not definition[0].startswith(";;(define-extern"):
|
|
new_file.append("".join(definition) + "\n")
|
|
else:
|
|
cleaned_unknown_symbol_defs.append(definition)
|
|
|
|
if len(cleaned_unknown_symbol_defs) > 0:
|
|
new_file.append("\n;; ----------------------\n;; NO FILE\n;; Unknowns with No Definition\n\n")
|
|
for definition in cleaned_unknown_symbol_defs:
|
|
if definition[0].startswith(";;(define-extern"):
|
|
new_file.append("".join(definition).rstrip() + "\n")
|
|
|
|
os.remove("./decompiler/config/all-types.gc")
|
|
with open("./decompiler/config/all-types.gc", "w") as f:
|
|
f.writelines(new_file)
|
|
|
|
|
|
# Third pass! Add any necessary forward declarations
|
|
# - First, let's identify the line numbers where types are defined, and used
|
|
# - Then, repeat the process, adding forward declarations when appropriate
|
|
|
|
type_usages = {}
|
|
|
|
def get_root_parent_type(t):
|
|
if t["parent_type"] in ["basic", "structure", "symbol", "object", "integer", "pair", "number", "binteger", "function", "array", "type", "string", "uint8", "int8", "uint16", "int16", "uint32", "int32", "uint64", "int64", "uint128", "int128", "float", "kheap"]:
|
|
parent_type = t["parent_type"]
|
|
if parent_type not in ["basic", "structure"]:
|
|
return "type" # NOTE - this does not work currently!!! but is VERY RARE
|
|
return t["parent_type"]
|
|
return get_root_parent_type(type_usages[t["parent_type"]])
|
|
|
|
def get_safe_parent_type(current_type, all_types, earliest_usage_line):
|
|
parent_type_name = current_type["parent_type"]
|
|
if parent_type_name in ["basic", "structure", "type"]:
|
|
return parent_type_name
|
|
parent_type = all_types[parent_type_name]
|
|
if parent_type["declared_on_line"] < earliest_usage_line:
|
|
return parent_type["type_name"]
|
|
return get_root_parent_type(current_type)
|
|
|
|
def symbol_usage(line, sym):
|
|
if line.strip().startswith(";"):
|
|
return False
|
|
tokens = line.strip().split(" ")
|
|
for token in tokens[1:]:
|
|
sanitized_token = token.replace("(", "").replace(")", "").strip()
|
|
if sanitized_token == sym:
|
|
return True
|
|
return False
|
|
|
|
new_file = []
|
|
print("Third Pass - Adding Forward Type Declarations")
|
|
with open("./decompiler/config/all-types.gc") as f:
|
|
lines = f.readlines()
|
|
# Get the types
|
|
for i, line in enumerate(lines):
|
|
clean_line = line.replace(";", "").strip()
|
|
if clean_line.startswith("(deftype"):
|
|
symbol = clean_line.split(" ")[1]
|
|
parent_type = clean_line.split(" ")[2].rstrip("\n").replace("(", "").replace(")", "")
|
|
if parent_type == "UNKNOWN":
|
|
continue
|
|
type_usages[symbol] = {
|
|
"type_name": symbol,
|
|
"parent_type": parent_type,
|
|
"declared_on_line": i,
|
|
"first_symbol_usage": "",
|
|
"used_on_lines": [],
|
|
"commented_out_type": line.startswith(";")
|
|
}
|
|
# Identify Usages - heavy loop
|
|
for symbol, usage_info in type_usages.items():
|
|
symbol_index = list(type_usages.keys()).index(symbol)
|
|
if symbol_index % 100 == 0:
|
|
print("[{}/{}]: Finding Type Usages".format(symbol_index, len(type_usages)))
|
|
current_symbol = ""
|
|
for i, line in enumerate(lines):
|
|
if i > usage_info["declared_on_line"]:
|
|
break # For speed reasons, we don't care about usages after the declaration
|
|
if "; deftype provided by C Kernel" in line:
|
|
continue
|
|
if line.startswith("(deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern") or line.startswith("(defenum"):
|
|
current_symbol = line.split(" ")[1]
|
|
if i != usage_info["declared_on_line"] and symbol_usage(line, usage_info["type_name"]):
|
|
if len(usage_info["used_on_lines"]) == 0:
|
|
usage_info["first_symbol_usage"] = current_symbol
|
|
usage_info["used_on_lines"].append(i)
|
|
|
|
# Identify Necessary Forward Declarations
|
|
forward_declarations = {}
|
|
for symbol, usage_info in type_usages.items():
|
|
declaration_line = usage_info["declared_on_line"]
|
|
if len(usage_info["used_on_lines"]) == 0:
|
|
continue
|
|
earliest_usage = usage_info["used_on_lines"][0]
|
|
if declaration_line > earliest_usage or usage_info["commented_out_type"]:
|
|
if usage_info["first_symbol_usage"] not in forward_declarations:
|
|
forward_declarations[usage_info["first_symbol_usage"]] = ["(declare-type {} {})\n".format(symbol, get_safe_parent_type(usage_info, type_usages, earliest_usage))]
|
|
else:
|
|
forward_declarations[usage_info["first_symbol_usage"]].append("(declare-type {} {})\n".format(symbol, get_safe_parent_type(usage_info, type_usages, earliest_usage)))
|
|
|
|
# FINALLY - add the forward declarations\
|
|
skip_next = False
|
|
for i, line in enumerate(lines):
|
|
if skip_next:
|
|
skip_next = False
|
|
new_file.append(line)
|
|
continue
|
|
if "; deftype provided by C Kernel" in line:
|
|
skip_next = True
|
|
if line.startswith("(deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern"):
|
|
current_symbol = line.split(" ")[1]
|
|
if current_symbol in forward_declarations:
|
|
new_file.append("".join(forward_declarations[current_symbol]))
|
|
new_file.append(line)
|
|
|
|
os.remove("./decompiler/config/all-types.gc")
|
|
with open("./decompiler/config/all-types.gc", "w") as f:
|
|
f.writelines(new_file)
|
|
|