mirror of
https://github.com/open-goal/jak-project.git
synced 2024-10-20 00:57:44 -04:00
c87db7e670
The main thing that was done here was to slightly modify the new subtitle-v2 JSON schema to be more similar to the existing one so that it can properly be used in Crowdin. Draft while I double-check the diff myself Along the way the following was also done (among other things): - got rid of as much duplication as was feasible in the serialization and editor code - separated the text serialization code from the subtitle code for better organization - simplified "base language" in the editor. The new subtitle format has built-in support for defining a base language so the editor doesn't have to be used as a crutch. Also, cutscenes only defined in the base come first in the list now as that is generally the order you'd work from (what you havn't done first) - got rid of the GOAL subtitle format code completely - switched jak 2 text translations to the JSON format as well - found a few mistakes in the jak 1 subtitle metadata files - added a couple minor features to the editor - consolidate and removed complexity, ie. recently all jak 1 hints were forced to the `named` type, so I got rid of the two types as there isn't a need anymore. - removed subtitle editor groups for jak 1, the only reason they existed was so when the GOAL file was manually written out they were somewhat organized, the editor has a decent filter control, there's no need for them. - removed the GOAL -> JSON python script helper, it's been a month or so and no one has come forward with existing translations that they need help with migrating. If they do need it, the script will be in the git history. I did some reasonably through testing in Jak1/Jak 2 and everything seemed to work. But more testing is always a good idea. --------- Co-authored-by: ManDude <7569514+ManDude@users.noreply.github.com>
168 lines
7 KiB
Python
168 lines
7 KiB
Python
import glob
|
||
import json
|
||
import re
|
||
|
||
import argparse
|
||
|
||
parser = argparse.ArgumentParser()
|
||
parser.add_argument("--fix", action="store_true")
|
||
parser.set_defaults(fix=False)
|
||
args = parser.parse_args()
|
||
|
||
# TODO - trim strings
|
||
# TODO - jak 2 files
|
||
|
||
# fmt: off
|
||
JAK1_ALLOWED_CHARACTERS = [
|
||
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
|
||
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
|
||
"'", "!", "(", ")", "+", "-", ",", ".", "/", ":", "=", "<", ">", "*", "%", "?", "\"",
|
||
"`", "ˇ", "¨", "º", "¡", "¿", "Æ", "Ç", "ß", "™", "、", " ", "Å", "Ø",
|
||
"Ñ", "Ã", "Õ", "Á", "É", "Í", "Ó", "Ú", "Ő", "Ű", "Â", "Ê", "Î", "Ô", "Û", "À", "È", "Ì", "Ò", "Ù", "Ä", "Ë", "Ï", "Ö", "ö", "Ü",
|
||
"海", "界", "学", "ワ", "ヲ", "ン", "岩", "旧", "空", "ヮ", "撃", "賢", "湖", "口", "行", "合", "士", "寺", "山", "者", "所", "書", "小", "沼", "上", "城", "場", "出", "闇", "遺", "黄", "屋", "下", "家", "火", "花", "レ", "Œ", "ロ", "青", "・", "゛", "゜", "ー", "『", "』", "宝", "石", "赤", "跡", "川", "戦", "村", "隊", "台", "長", "鳥", "艇", "洞", "道", "発", "飛", "噴", "池", "中", "塔", "島", "部", "砲", "産", "眷", "力", "緑", "岸", "像", "谷", "心", "森", "水", "船", "世",
|
||
"ぁ", "あ", "ぃ", "い", "ぅ", "う", "ぇ", "え", "ぉ", "お", "か", "き", "く", "け", "こ", "さ", "し", "す", "せ", "そ", "た", "ち", "っ", "つ", "て", "と", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", "ま", "み", "む", "め", "も", "ゃ", "や", "ゅ", "ゆ", "ょ", "よ", "ら", "り", "る", "れ", "ろ", "ゎ", "わ", "を", "ん",
|
||
"が", "ぎ", "ぐ", "げ", "ご", "ざ", "じ", "ず", "ぜ", "ぞ", "だ", "ぢ", "づ", "で", "ど", "ば", "び", "ぶ", "べ", "ぼ",
|
||
"ぱ", "ぴ", "ぷ", "ぺ", "ぽ",
|
||
"ァ", "ア", "ィ", "イ", "ゥ", "ウ", "ェ", "エ", "ォ", "オ", "カ", "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", "タ", "チ", "ッ", "ツ", "テ", "ト", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ", "フ", "ヘ", "ホ", "マ", "ミ", "ム", "メ", "モ", "ャ", "ヤ", "ュ", "ユ", "ョ", "ヨ", "ラ", "リ", "ル",
|
||
"ヴ", "ガ", "ギ", "グ", "ゲ", "ゴ", "ザ", "ジ", "ズ", "ゼ", "ゾ", "ダ", "ヂ", "ヅ", "デ", "ド", "バ", "ビ", "ブ", "ベ", "ボ",
|
||
"パ", "ピ", "プ", "ペ", "ポ",
|
||
"~"
|
||
]
|
||
|
||
JAK1_ALLOWED_CODES = [
|
||
"<TIL>",
|
||
"<PAD_X>", "<PAD_TRIANGLE>", "<PAD_CIRCLE>", "<PAD_SQUARE>"
|
||
]
|
||
|
||
JAK1_AUTO_REPLACEMENTS = {
|
||
"ª": "º",
|
||
"\n": "",
|
||
"’": "'",
|
||
}
|
||
# fmt: on
|
||
|
||
invalid_characters_found = False
|
||
|
||
|
||
def is_allowed_code(pos, text):
|
||
# Find any occurences of allowed codes in the string
|
||
# if the position overlaps with these occurrences, it's allowed
|
||
for code in JAK1_ALLOWED_CODES:
|
||
for match in re.finditer(code, text):
|
||
if pos >= match.start() and pos <= match.end():
|
||
return match.end()
|
||
return -1
|
||
|
||
|
||
def char_allowed(char):
|
||
return char in JAK1_ALLOWED_CHARACTERS
|
||
|
||
|
||
def fix_character(char):
|
||
# First let's try upper-casing it, if that's allowed, let's use that instead
|
||
upper_case = char.upper()
|
||
if char_allowed(upper_case):
|
||
return upper_case
|
||
if char in JAK1_AUTO_REPLACEMENTS:
|
||
return JAK1_AUTO_REPLACEMENTS[char]
|
||
return char
|
||
|
||
|
||
def replace_character(string, position, new_character):
|
||
string_list = list(string)
|
||
string_list[position] = new_character
|
||
new_string = "".join(string_list)
|
||
return new_string
|
||
|
||
|
||
def lint_jak1_characters(text):
|
||
invalid_characters_found = False
|
||
pos = 0
|
||
while pos < len(text):
|
||
character = text[pos]
|
||
if not char_allowed(character):
|
||
# Check to see if it's an allowed code
|
||
code_end_pos = is_allowed_code(pos, text)
|
||
if code_end_pos == -1:
|
||
# If we are fixing instances, attempt to do so
|
||
char_fixed = False
|
||
if args.fix:
|
||
new_char = fix_character(character)
|
||
if new_char != character:
|
||
text = replace_character(text, pos, new_char)
|
||
char_fixed = True
|
||
if not char_fixed:
|
||
print(
|
||
"Character '{}' not allowed - Found in {}".format(
|
||
character, text
|
||
)
|
||
)
|
||
invalid_characters_found = True
|
||
pos = pos + 1
|
||
else:
|
||
# advance to the end of the code and continue checking
|
||
pos = code_end_pos
|
||
else:
|
||
pos = pos + 1
|
||
return invalid_characters_found, text
|
||
|
||
|
||
# Iterate through the translations making sure there are no characters that are not allowed
|
||
text_files = glob.glob("./game/assets/jak1/text/*.json")
|
||
|
||
for text_file in text_files:
|
||
print("Checking {}".format(text_file))
|
||
with open(text_file, encoding="utf-8") as f:
|
||
file_data = json.load(f)
|
||
for id, text in file_data.items():
|
||
invalid_chars_exist, new_text = lint_jak1_characters(text)
|
||
if args.fix:
|
||
file_data[id] = new_text
|
||
if invalid_chars_exist:
|
||
invalid_characters_found = True
|
||
if args.fix:
|
||
# save the modified file back out
|
||
with open(text_file, "w", encoding="utf-8") as f:
|
||
json.dump(file_data, f, indent=2, ensure_ascii=False)
|
||
f.write("\n")
|
||
|
||
subtitle_files = glob.glob("./game/assets/jak1/subtitle/*lines*.json")
|
||
|
||
for subtitle_file in subtitle_files:
|
||
print("Checking {}...".format(subtitle_file))
|
||
with open(subtitle_file, encoding="utf-8") as f:
|
||
file_data = json.load(f)
|
||
# Check Speakers
|
||
for id, text in file_data["speakers"].items():
|
||
invalid_chars_exist, new_text = lint_jak1_characters(text)
|
||
if args.fix and new_text != text:
|
||
file_data["speakers"][id] = new_text
|
||
if invalid_chars_exist:
|
||
invalid_characters_found = True
|
||
# Check Lines
|
||
for id, lines in file_data["cutscenes"].items():
|
||
for i, line in enumerate(lines):
|
||
invalid_chars_exist, new_text = lint_jak1_characters(line)
|
||
if args.fix and new_text != line:
|
||
lines[i] = new_text
|
||
if invalid_chars_exist:
|
||
invalid_characters_found = True
|
||
for id, lines in file_data["hints"].items():
|
||
for i, line in enumerate(lines):
|
||
invalid_chars_exist, new_text = lint_jak1_characters(line)
|
||
if args.fix and new_text != line:
|
||
lines[i] = new_text
|
||
if invalid_chars_exist:
|
||
invalid_characters_found = True
|
||
if args.fix:
|
||
# save the modified file back out
|
||
with open(subtitle_file, "w", encoding="utf-8") as f:
|
||
json.dump(file_data, f, indent=2, ensure_ascii=False)
|
||
f.write("\n")
|
||
|
||
if invalid_characters_found:
|
||
print("Invalid characters were found, see above")
|
||
exit(1)
|
||
else:
|
||
print("No invalid characters found!")
|