mirror of
https://github.com/open-goal/jak-project.git
synced 2024-10-20 21:27:52 -04:00
b3c58d2247
* scripts: cleanup scripts directory some more * stash, getting there * stash again * closer! * it works! decently....cleanup time and add some extra features * minor cleanup
70 lines
2.3 KiB
Python
70 lines
2.3 KiB
Python
jak2_files = None
|
|
|
|
import json
|
|
import hashlib
|
|
|
|
def get_md5_from_file(filename):
|
|
file_bytes = open(filename, "rb").read()
|
|
# Remove the last 16bytes of the data as it's 16-byte aligned
|
|
# if this is the only difference in the entire file, it is very likely identical
|
|
# but this data is potentially garbage
|
|
length_bytes = bytes([file_bytes[12], file_bytes[13], file_bytes[14], file_bytes[15]])
|
|
offset_to_truncate = int.from_bytes(length_bytes, "little")
|
|
bytes_chopped = len(file_bytes) - offset_to_truncate
|
|
file_bytes = file_bytes[:offset_to_truncate]
|
|
|
|
return [hashlib.md5(file_bytes).hexdigest(), bytes_chopped]
|
|
|
|
# print(get_md5_from_file("../../decompiler_out/jak2/raw_obj/air-train-ag-CAP.go"))
|
|
|
|
with open("../../goal_src/jak2/build/all_objs.json", "r") as f:
|
|
jak2_files = json.load(f)
|
|
|
|
dgos = set()
|
|
|
|
for file in jak2_files:
|
|
for dgo in file[3]:
|
|
dgos.add(dgo.lower())
|
|
|
|
import glob
|
|
from pathlib import Path
|
|
|
|
obj_files = glob.glob("../../decompiler_out/jak2/raw_obj/*")
|
|
|
|
print(len(obj_files))
|
|
|
|
# Find all the files that have the same name, but a different prefix
|
|
potentially_identical_files = {}
|
|
|
|
for file in obj_files:
|
|
name = Path(file).stem
|
|
base_name = None
|
|
duplicate = False
|
|
for dgo in dgos:
|
|
if name.lower().endswith("-{}".format(dgo)):
|
|
# some files have multiple dgos in the name
|
|
base_name_parts = []
|
|
for token in name.split("-"):
|
|
if token.lower() not in dgos:
|
|
base_name_parts.append(token)
|
|
base_name = "-".join(base_name_parts)
|
|
duplicate = True
|
|
if duplicate:
|
|
# Calculate the hash of the file
|
|
hash = get_md5_from_file(file)
|
|
if base_name not in potentially_identical_files:
|
|
potentially_identical_files[base_name] = [{"path": file, "hash": hash[0], "bytes_dropped": hash[1]}]
|
|
else:
|
|
potentially_identical_files[base_name].append({"path": file, "hash": hash[0], "bytes_dropped": hash[1]})
|
|
|
|
# Log any that actually differ
|
|
for base_file in potentially_identical_files:
|
|
common_hash = potentially_identical_files[base_file][0]["hash"]
|
|
for file in potentially_identical_files[base_file]:
|
|
if common_hash != file["hash"]:
|
|
print("{} is different!".format(base_name))
|
|
break
|
|
|
|
with open('./potentially-identical-files.json', 'w') as json_file:
|
|
json.dump(potentially_identical_files, json_file)
|