jak-project/scripts/cleanup-all-types.py
Tyler Wilding b209c9e1ba
Fixing organization error in all-types (#553)
* scripts: improve decomp-next script

* script: Make it possible to consistently cleanup `all-types`

* now use the actual parent type!

* adjust the symbol order for collectables

* re-organize

* manually fix this edge-case for now
2021-06-16 20:29:15 -04:00

391 lines
16 KiB
Python

# This does a (currently) 3 pass cleanup on all-types
# 1. Cleanup any symbol definitions that are redundant
# 2. Reorder symbol definitions based on file build order
# 3. Check for any necessary forward declarations
import os
# First pass!
print("First Pass - Cleaning up File")
new_file = []
with open("./decompiler/config/all-types.gc") as f:
symbols_found = []
lines = f.readlines()
for line in lines:
if line.startswith("(deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern"):
symbol = line.split(" ")[1]
if symbol in symbols_found and "unknown type" in line:
continue
else:
symbols_found.append(symbol)
new_file.append(line)
os.remove("./decompiler/config/all-types.gc")
with open("./decompiler/config/all-types.gc", "w") as f:
f.writelines(new_file)
# Second Pass!
# I try to preserve comments as best I can:
# - comments that are file names are discarded, they are now redundant!
# - comments prior to a symbol definition are considered part of that symbol definition, comments after are NOT
# Build up a mapping of symbol-name -> symbol-definitions and then we can easily drop them into place
# Symbols are defined by reading line-by-line until we see the next:
# - deftype
# - define-extern
# - defenum
# declare-types can be discarded, they are handled in the third pass
# If something is defined more than once, take the longer definition
from jak1_file_list import file_list
import json
script_comments = [
";; ----------------------",
";; File -",
";; Source Path -",
";; Containing DGOs -",
";; Version -",
";; - Types",
";; - Functions",
";; - Symbols",
";; - Unknowns",
";; NO FILE",
";; Unknowns / Built-Ins / Non-Original Types",
";; - Nothing Defined in This File!",
";; Unknowns with No Definition"
]
def is_filename_comment(line):
if not line.startswith(";"):
return False
if "define" in line or "deftype" in line:
return False
cleaned_line = line.replace(";", "").strip()
for item in file_list:
file_name = item[0]
if file_name == cleaned_line:
return True
return False
with open('./scripts/jak1-symbol-mapping.json') as f:
data = json.load(f)
all_symbols = []
for object_file, symbols in data.items():
for symbol in symbols:
all_symbols.append(symbol)
def no_runtime_type(definition):
for line in definition:
if "no-runtime-type" in line:
return True
return False
def strip_trailing_new_lines(definition):
new_definition = []
found_content = False
for line in reversed(definition):
if found_content:
new_definition.insert(0, line)
else:
cleaned_line = line.strip()
if len(cleaned_line) != 0:
found_content = True
new_definition.insert(0, line)
# Check if the last line has a new-line or not, if it doesn't add one
if not new_definition[len(new_definition)-1].endswith("\n"):
new_definition[len(new_definition)-1].append("\n")
return new_definition
symbol_definitions = {}
# Anything that is custom / not part of the game, I'll place at the top of the file because I have no idea where it should go
unknown_symbol_definitions = []
def is_script_comment(line):
for comment in script_comments:
if line.startswith(comment):
return True
return False
print("Second Pass - Re-Organizing File")
with open("./decompiler/config/all-types.gc") as f:
lines = f.readlines()
current_symbol = ""
current_symbol_definition = []
comment_buffer = []
commented_type = False
for i, line in enumerate(lines):
# Ignore the following lines:
# - declare-types
# - empty lines
# - file name comments
# - comments i generate
if is_filename_comment(line) or is_script_comment(line) or (commented_type is False and ("declare-type" in line or (line == "\n" and i != len(lines) - 1))):
continue
# Handle the first line of the file properly
if len(current_symbol_definition) == 0: # The only time this variable should be empty, is at the beginning, after that it should always be in use
if line.startswith("(deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern") or line.startswith("(defenum"):
current_symbol_definition.append(line)
current_symbol = line.split(" ")[1].rstrip("\n")
continue
# To support comments being associated with the following symbol def, we have to keep track of them
# then either associate them with the new symbol OR realize they are part of the current one!
if not commented_type and line.startswith(";") and not (line.startswith("; (deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern") or line.startswith("(defenum")):
current_symbol_definition.append(line)
comment_buffer.append(line)
continue
# Check if we've reached a new symbol or reached the end of the file
if i == len(lines) - 1 or line.startswith("(deftype") or line.startswith("; (deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern") or line.startswith("(defenum"):
# Remove any comments from the previous symbol def
if not commented_type and len(comment_buffer) > 0:
current_symbol_definition = current_symbol_definition[:-len(comment_buffer)]
# Check if the symbol we found is valid or invalid
if current_symbol in all_symbols:
if current_symbol in symbol_definitions:
# print("Symbol re-defintion found for '{}', choosing the bigger one!".format(current_symbol))
if len(current_symbol_definition) > len(symbol_definitions[current_symbol]):
if no_runtime_type(current_symbol_definition):
current_symbol_definition.insert(0, "(define-extern {} type) ; deftype provided by C Kernel\n".format(current_symbol))
symbol_definitions[current_symbol] = strip_trailing_new_lines(current_symbol_definition.copy())
else:
if no_runtime_type(current_symbol_definition):
current_symbol_definition.insert(0, "(define-extern {} type) ; deftype provided by C Kernel\n".format(current_symbol))
symbol_definitions[current_symbol] = strip_trailing_new_lines(current_symbol_definition.copy())
else:
print("Found a symbol '{}' that is not part of Jak 1!".format(current_symbol))
unknown_symbol_definitions.append(current_symbol_definition.copy())
if i != len(lines) - 1:
if line.startswith("; (deftype"):
current_symbol = line.split(" ")[2].rstrip("\n")
commented_type = True
else:
current_symbol = line.split(" ")[1].rstrip("\n")
commented_type = False
current_symbol_definition.clear()
current_symbol_definition += comment_buffer
comment_buffer.clear()
current_symbol_definition.append(line)
elif line.startswith("(deftype") or line.startswith("; (deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern") or line.startswith("(defenum"):
if line.startswith("; (deftype"):
current_symbol = line.split(" ")[2].rstrip("\n")
else:
current_symbol = line.split(" ")[1].rstrip("\n")
current_symbol_definition.clear()
current_symbol_definition += comment_buffer
comment_buffer.clear()
current_symbol_definition.append(line)
symbol_definitions[current_symbol] = strip_trailing_new_lines(current_symbol_definition.copy())
else:
current_symbol_definition.append(line)
if len(comment_buffer) > 0:
comment_buffer.clear()
# Now armed with our complete list of symbols, print them out in a nice organized manner.
# Precedence:
# - unknown symbols (after kernel definitions - `gstate` is the last)
# - file build order (include info about src file path / DGOs it's contained in)
# - types
# - functions
# - unknown types/symbols/functions
with open('./scripts/jak1-symbol-mapping.json') as f:
symbol_mapping = json.load(f)
def first_relevant_line(definition):
if len(definition) == 1:
return definition[0]
for line in definition:
if line.startswith("; (deftype") or not line.startswith(";"):
return line
def was_previous_definition_multi_line(definition):
line_count = 0
for line in definition:
if "; (deftype" in line:
return True
if not line.strip().startswith(";"):
line_count = line_count + 1
return line_count > 1
def print_definition_block(file_lines, header, prev_block_exists, def_list):
if len(def_list) == 0:
return False
if prev_block_exists:
file_lines.append("\n")
file_lines.append(";; - {}\n\n".format(header))
prev_definition = None
for definition in def_list:
if prev_definition is not None and was_previous_definition_multi_line(prev_definition):
file_lines.append("\n")
file_lines.append("".join(definition))
prev_definition = definition
return True
def print_definition_blocks(file_lines, types, functions, symbols, unknowns):
if not types and not functions and not symbols and not unknowns:
file_lines.append(";; - Nothing Defined in This File!\n")
else:
prev_block_exists = False
prev_block_exists |= print_definition_block(file_lines, "Types", prev_block_exists, types)
prev_block_exists |= print_definition_block(file_lines, "Functions", prev_block_exists, functions)
prev_block_exists |= print_definition_block(file_lines, "Symbols", prev_block_exists, symbols)
prev_block_exists |= print_definition_block(file_lines, "Unknowns", prev_block_exists, unknowns)
file_lines.append("\n")
new_file = []
for item in file_list:
file_name = item[0]
extension = "gc"
if item[2] == 4:
extension = "gd"
src_path = "{}/{}.{}".format(item[4], file_name, extension)
new_file.append("\n;; ----------------------\n;; File - {}\n;; Source Path - {}\n;; Containing DGOs - {}\n;; Version - {}\n\n".format(file_name, src_path, item[3], item[2]))
types = []
functions = []
symbols = []
unknowns = []
if file_name in symbol_mapping:
for symbol in symbol_mapping[file_name]:
if symbol not in symbol_definitions:
print("Could not find definition for '{}'".format(symbol))
else:
symbol_definition = symbol_definitions[symbol]
if ";;(define-extern" in first_relevant_line(symbol_definition):
unknowns.append(symbol_definition)
elif "(function" in first_relevant_line(symbol_definition) or "function)" in first_relevant_line(symbol_definition):
functions.append(symbol_definition)
elif "deftype" in first_relevant_line(symbol_definition):
types.append(symbol_definition)
elif "define-extern" in first_relevant_line(symbol_definition):
symbols.append(symbol_definition)
else:
print("Could not find associated symbol def for '{}'".format(symbol))
print_definition_blocks(new_file, types, functions, symbols, unknowns)
cleaned_unknown_symbol_defs = []
if file_name == "gcommon":
new_file.append("\n;; ----------------------\n;; NO FILE\n;; Unknowns / Built-Ins / Non-Original Types\n\n")
for definition in unknown_symbol_definitions:
if not definition[0].startswith(";;(define-extern"):
new_file.append("".join(definition) + "\n")
else:
cleaned_unknown_symbol_defs.append(definition)
if len(cleaned_unknown_symbol_defs) > 0:
new_file.append("\n;; ----------------------\n;; NO FILE\n;; Unknowns with No Definition\n\n")
for definition in cleaned_unknown_symbol_defs:
if definition[0].startswith(";;(define-extern"):
new_file.append("".join(definition).rstrip() + "\n")
os.remove("./decompiler/config/all-types.gc")
with open("./decompiler/config/all-types.gc", "w") as f:
f.writelines(new_file)
# Third pass! Add any necessary forward declarations
# - First, let's identify the line numbers where types are defined, and used
# - Then, repeat the process, adding forward declarations when appropriate
type_usages = {}
def get_root_parent_type(t):
if t["parent_type"] in ["basic", "structure", "symbol", "object", "integer", "pair", "number", "binteger", "function", "array", "type", "string", "uint8", "int8", "uint16", "int16", "uint32", "int32", "uint64", "int64", "uint128", "int128", "float", "kheap"]:
parent_type = t["parent_type"]
if parent_type not in ["basic", "structure"]:
return "type" # NOTE - this does not work currently!!! but is VERY RARE
return t["parent_type"]
return get_root_parent_type(type_usages[t["parent_type"]])
def get_safe_parent_type(current_type, all_types, earliest_usage_line):
parent_type_name = current_type["parent_type"]
if parent_type_name in ["basic", "structure", "type"]:
return parent_type_name
parent_type = all_types[parent_type_name]
if parent_type["declared_on_line"] < earliest_usage_line:
return parent_type["type_name"]
return get_root_parent_type(current_type)
def symbol_usage(line, sym):
if line.strip().startswith(";"):
return False
tokens = line.strip().split(" ")
for token in tokens[1:]:
sanitized_token = token.replace("(", "").replace(")", "").strip()
if sanitized_token == sym:
return True
return False
new_file = []
print("Third Pass - Adding Forward Type Declarations")
with open("./decompiler/config/all-types.gc") as f:
lines = f.readlines()
# Get the types
for i, line in enumerate(lines):
clean_line = line.replace(";", "").strip()
if clean_line.startswith("(deftype"):
symbol = clean_line.split(" ")[1]
parent_type = clean_line.split(" ")[2].rstrip("\n").replace("(", "").replace(")", "")
if parent_type == "UNKNOWN":
continue
type_usages[symbol] = {
"type_name": symbol,
"parent_type": parent_type,
"declared_on_line": i,
"first_symbol_usage": "",
"used_on_lines": [],
"commented_out_type": line.startswith(";")
}
# Identify Usages - heavy loop
for symbol, usage_info in type_usages.items():
symbol_index = list(type_usages.keys()).index(symbol)
if symbol_index % 100 == 0:
print("[{}/{}]: Finding Type Usages".format(symbol_index, len(type_usages)))
current_symbol = ""
for i, line in enumerate(lines):
if i > usage_info["declared_on_line"]:
break # For speed reasons, we don't care about usages after the declaration
if "; deftype provided by C Kernel" in line:
continue
if line.startswith("(deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern") or line.startswith("(defenum"):
current_symbol = line.split(" ")[1]
if i != usage_info["declared_on_line"] and symbol_usage(line, usage_info["type_name"]):
if len(usage_info["used_on_lines"]) == 0:
usage_info["first_symbol_usage"] = current_symbol
usage_info["used_on_lines"].append(i)
# Identify Necessary Forward Declarations
forward_declarations = {}
for symbol, usage_info in type_usages.items():
declaration_line = usage_info["declared_on_line"]
if len(usage_info["used_on_lines"]) == 0:
continue
earliest_usage = usage_info["used_on_lines"][0]
if declaration_line > earliest_usage or usage_info["commented_out_type"]:
if usage_info["first_symbol_usage"] not in forward_declarations:
forward_declarations[usage_info["first_symbol_usage"]] = ["(declare-type {} {})\n".format(symbol, get_safe_parent_type(usage_info, type_usages, earliest_usage))]
else:
forward_declarations[usage_info["first_symbol_usage"]].append("(declare-type {} {})\n".format(symbol, get_safe_parent_type(usage_info, type_usages, earliest_usage)))
# FINALLY - add the forward declarations\
skip_next = False
for i, line in enumerate(lines):
if skip_next:
skip_next = False
new_file.append(line)
continue
if "; deftype provided by C Kernel" in line:
skip_next = True
if line.startswith("(deftype") or line.startswith("(define-extern") or line.startswith(";;(define-extern"):
current_symbol = line.split(" ")[1]
if current_symbol in forward_declarations:
new_file.append("".join(forward_declarations[current_symbol]))
new_file.append(line)
os.remove("./decompiler/config/all-types.gc")
with open("./decompiler/config/all-types.gc", "w") as f:
f.writelines(new_file)