mirror of
https://github.com/open-goal/jak-project.git
synced 2024-10-20 00:57:44 -04:00
[decompiler] Support v5 data file link data (#3076)
Fix the implementation of `link_v5` so it works on "data" files for jak 3.
This commit is contained in:
parent
5a6aab4fab
commit
cfce5e5916
|
@ -43,7 +43,7 @@ void write_wave_file(const std::vector<s16>& left_samples,
|
||||||
writer.add(sample);
|
writer.add(sample);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < left_samples.size(); i++) {
|
for (size_t i = 0; i < left_samples.size(); i++) {
|
||||||
writer.add(left_samples.at(i));
|
writer.add(left_samples.at(i));
|
||||||
if (i < right_samples.size()) {
|
if (i < right_samples.size()) {
|
||||||
writer.add(right_samples.at(i));
|
writer.add(right_samples.at(i));
|
||||||
|
@ -86,8 +86,9 @@ std::pair<std::vector<s16>, std::vector<s16>> decode_adpcm(BinaryReader& reader,
|
||||||
u8 shift = shift_filter & 0b1111;
|
u8 shift = shift_filter & 0b1111;
|
||||||
u8 filter = shift_filter >> 4;
|
u8 filter = shift_filter >> 4;
|
||||||
u8 flags = reader.read<u8>();
|
u8 flags = reader.read<u8>();
|
||||||
|
(void)flags;
|
||||||
|
|
||||||
// removed assertions here
|
// removed assertions here (and that's probably why the audio doesn't sound right)
|
||||||
|
|
||||||
u8 input_buffer[14];
|
u8 input_buffer[14];
|
||||||
|
|
||||||
|
|
|
@ -521,7 +521,7 @@ void Interpreter::vararg_check(
|
||||||
/*!
|
/*!
|
||||||
* Evaluate a list and return the result of the last evaluation.
|
* Evaluate a list and return the result of the last evaluation.
|
||||||
*/
|
*/
|
||||||
Object Interpreter::eval_list_return_last(const Object& form,
|
Object Interpreter::eval_list_return_last(const Object& /*form*/,
|
||||||
Object rest,
|
Object rest,
|
||||||
const std::shared_ptr<EnvironmentObject>& env) {
|
const std::shared_ptr<EnvironmentObject>& env) {
|
||||||
if (rest.is_empty_list()) {
|
if (rest.is_empty_list()) {
|
||||||
|
|
|
@ -66,7 +66,6 @@ SymbolTable::~SymbolTable() {
|
||||||
}
|
}
|
||||||
|
|
||||||
InternedSymbolPtr SymbolTable::intern(const char* str) {
|
InternedSymbolPtr SymbolTable::intern(const char* str) {
|
||||||
InternedSymbolPtr result;
|
|
||||||
size_t string_len = strlen(str);
|
size_t string_len = strlen(str);
|
||||||
u32 hash = crc32((const u8*)str, string_len);
|
u32 hash = crc32((const u8*)str, string_len);
|
||||||
|
|
||||||
|
|
|
@ -113,7 +113,7 @@ Function* LinkedObjectFile::try_get_function_at_label(int label_id) {
|
||||||
|
|
||||||
Function* LinkedObjectFile::try_get_function_at_label(const DecompilerLabel& label) {
|
Function* LinkedObjectFile::try_get_function_at_label(const DecompilerLabel& label) {
|
||||||
for (auto& func : functions_by_seg.at(label.target_segment)) {
|
for (auto& func : functions_by_seg.at(label.target_segment)) {
|
||||||
// + 4 to skip past type tag to the first word, which is were the label points.
|
// + 4 to skip past type tag to the first word, which is where the label points.
|
||||||
if (func.start_word * 4 + 4 == label.offset) {
|
if (func.start_word * 4 + 4 == label.offset) {
|
||||||
return &func;
|
return &func;
|
||||||
}
|
}
|
||||||
|
@ -128,7 +128,7 @@ const Function* LinkedObjectFile::try_get_function_at_label(int label_id) const
|
||||||
|
|
||||||
const Function* LinkedObjectFile::try_get_function_at_label(const DecompilerLabel& label) const {
|
const Function* LinkedObjectFile::try_get_function_at_label(const DecompilerLabel& label) const {
|
||||||
for (auto& func : functions_by_seg.at(label.target_segment)) {
|
for (auto& func : functions_by_seg.at(label.target_segment)) {
|
||||||
// + 4 to skip past type tag to the first word, which is were the label points.
|
// + 4 to skip past type tag to the first word, which is where the label points.
|
||||||
if (func.start_word * 4 + 4 == label.offset) {
|
if (func.start_word * 4 + 4 == label.offset) {
|
||||||
return &func;
|
return &func;
|
||||||
}
|
}
|
||||||
|
@ -156,7 +156,8 @@ bool LinkedObjectFile::pointer_link_word(int source_segment,
|
||||||
ASSERT(word.kind() == LinkedWord::PLAIN_DATA);
|
ASSERT(word.kind() == LinkedWord::PLAIN_DATA);
|
||||||
|
|
||||||
if (dest_offset / 4 > (int)words_by_seg.at(dest_segment).size()) {
|
if (dest_offset / 4 > (int)words_by_seg.at(dest_segment).size()) {
|
||||||
// printf("HACK bad link ignored!\n");
|
// printf("HACK bad link ignored src %d, %d vs %d!\n", source_offset, dest_offset / 4,
|
||||||
|
// int(words_by_seg.at(dest_segment).size()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
ASSERT(dest_offset / 4 <= (int)words_by_seg.at(dest_segment).size());
|
ASSERT(dest_offset / 4 <= (int)words_by_seg.at(dest_segment).size());
|
||||||
|
|
|
@ -55,11 +55,10 @@ struct LinkHeaderV5 {
|
||||||
uint32_t length_to_get_to_code; // 4 length.. of link data?
|
uint32_t length_to_get_to_code; // 4 length.. of link data?
|
||||||
uint16_t version; // 8
|
uint16_t version; // 8
|
||||||
uint16_t unknown; // 10
|
uint16_t unknown; // 10
|
||||||
uint32_t pad; // 12
|
uint32_t length_to_get_to_link; // 12
|
||||||
uint32_t link_length; // 16
|
uint32_t link_length; // 16
|
||||||
uint8_t n_segments; // 20
|
uint8_t n_segments; // 20
|
||||||
char name[59]; // 21 (really??)
|
char name[59]; // 21 (really??)
|
||||||
SegmentInfo segment_info[3];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// The types of symbol links
|
// The types of symbol links
|
||||||
|
@ -421,72 +420,83 @@ static void link_v5(LinkedObjectFile& f,
|
||||||
const std::string& name,
|
const std::string& name,
|
||||||
DecompilerTypeSystem& dts) {
|
DecompilerTypeSystem& dts) {
|
||||||
auto header = (const LinkHeaderV5*)(&data.at(0));
|
auto header = (const LinkHeaderV5*)(&data.at(0));
|
||||||
if (header->n_segments == 1) {
|
|
||||||
printf("abandon %s!\n", name.c_str());
|
// for jak 3, both code and data use a "v5" format for linking.
|
||||||
return;
|
// code has 3 segments (top-level, main, debug), and data has just 1.
|
||||||
}
|
// they appear to be generated by different programs, so there's some hard-coded checks for
|
||||||
|
// each.
|
||||||
|
|
||||||
|
// the "v5" format allows for multiple segments (like v3), "split-pointer" linking to support
|
||||||
|
// splitting a pointer link between a lui/ori instruction (needed for code), but uses "v2"
|
||||||
|
// symbol linking. For a reason that I don't understand, "v3" symlinks uses a less-space efficient
|
||||||
|
// encoding of large integers.
|
||||||
|
|
||||||
|
static_assert(0x50 == sizeof(LinkHeaderV5));
|
||||||
|
|
||||||
|
if (header->n_segments == 3) {
|
||||||
ASSERT(header->type_tag == 0);
|
ASSERT(header->type_tag == 0);
|
||||||
ASSERT(name == header->name);
|
ASSERT(name == header->name);
|
||||||
ASSERT(header->n_segments == 3);
|
// the linker for code placed the link data at the beginning.
|
||||||
ASSERT(header->pad == 0x50);
|
// but we expect the link data to start just after the object file header
|
||||||
ASSERT(header->length_to_get_to_code - header->link_length == 0x50);
|
ASSERT(header->length_to_get_to_link == sizeof(LinkHeaderV5));
|
||||||
|
// and then the code sould come after that
|
||||||
f.set_segment_count(3);
|
ASSERT(header->length_to_get_to_code == sizeof(LinkHeaderV5) + header->link_length);
|
||||||
|
} else if (header->n_segments == 1) {
|
||||||
// link v3's data size is data.size() - link_length
|
ASSERT(header->type_tag == UINT32_MAX);
|
||||||
// link v5's data size is data.size() - new_link_length - 0x50.
|
// name is inconsistent, so don't check is
|
||||||
|
// data files have the data first, which is good, as the last object in a DGO gets loaded
|
||||||
// lbp + 4 points to version?
|
// directly to the heap, and putting the data first means that we can "free" the link data just
|
||||||
// lbp points to 4 past start of header.
|
// by bumping the heap pointer back, rather than memcpy the code back to cover the hole if link
|
||||||
|
// data came first.
|
||||||
// lbp[1] = version + unknown 16 bit thing.
|
// the offset is always 0x80, which is bigger than the header, but is needed to make data
|
||||||
// lbp[3] = link block length (minus 0x50)
|
// aligned with the PS2's cache line size (64 bytes), which makes sense.
|
||||||
|
ASSERT(header->length_to_get_to_code == 0x80);
|
||||||
|
} else {
|
||||||
|
lg::die("bad segment count {}", header->n_segments);
|
||||||
|
}
|
||||||
|
f.set_segment_count(header->n_segments);
|
||||||
|
|
||||||
// todo - check this against the code size we actually got.
|
// todo - check this against the code size we actually got.
|
||||||
// size_t expected_code_size = data.size() - (header->link_length + 0x50);
|
// size_t expected_code_size = data.size() - (header->link_length + 0x50);
|
||||||
|
|
||||||
uint32_t data_ptr_offset = header->length_to_get_to_code;
|
const int n_segs = header->n_segments;
|
||||||
|
|
||||||
|
// the first think in the link data is the segment info array, which we need to find stuff.
|
||||||
|
const SegmentInfo* seg_info_array =
|
||||||
|
(const SegmentInfo*)(data.data() + header->length_to_get_to_link);
|
||||||
|
|
||||||
|
// for convenience, we'll find the data/link offsets for each segment.
|
||||||
uint32_t segment_data_offsets[3];
|
uint32_t segment_data_offsets[3];
|
||||||
uint32_t segment_link_offsets[3];
|
uint32_t segment_link_offsets[3];
|
||||||
uint32_t segment_link_ends[3];
|
uint32_t segment_link_ends[3]; // set in linking, once we get to the end.
|
||||||
for (int i = 0; i < 3; i++) {
|
for (int i = 0; i < n_segs; i++) {
|
||||||
segment_data_offsets[i] = data_ptr_offset + header->segment_info[i].data;
|
segment_data_offsets[i] = header->length_to_get_to_code + seg_info_array[i].data;
|
||||||
segment_link_offsets[i] = header->segment_info[i].relocs + 0x50;
|
segment_link_offsets[i] = header->length_to_get_to_link + seg_info_array[i].relocs;
|
||||||
ASSERT(header->segment_info[i].magic == 1);
|
ASSERT(seg_info_array[i].magic == 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// check that the data region is filled
|
// check that the data region is filled
|
||||||
for (int i = 0; i < 2; i++) {
|
for (int i = 0; i < n_segs - 1; i++) {
|
||||||
ASSERT(align16(segment_data_offsets[i] + header->segment_info[i].size) ==
|
ASSERT(align16(segment_data_offsets[i] + seg_info_array[i].size) ==
|
||||||
segment_data_offsets[i + 1]);
|
segment_data_offsets[i + 1]);
|
||||||
}
|
}
|
||||||
ASSERT(align16(segment_data_offsets[2] + header->segment_info[2].size) == data.size());
|
if (n_segs == 3) {
|
||||||
|
ASSERT(align16(segment_data_offsets[2] + seg_info_array[2].size) == data.size());
|
||||||
|
}
|
||||||
|
|
||||||
// loop over segments (reverse order for now)
|
// loop over segments
|
||||||
for (int seg_id = 3; seg_id-- > 0;) {
|
for (int seg_id = n_segs; seg_id-- > 0;) {
|
||||||
// ?? is this right?
|
int segment_size = seg_info_array[seg_id].size;
|
||||||
if (header->segment_info[seg_id].size == 0)
|
if (segment_size == 0) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
auto segment_size = header->segment_info[seg_id].size;
|
// the decompiler uses 4-byte words, so pad to 4-bytes.
|
||||||
f.stats.v3_code_bytes += segment_size;
|
|
||||||
|
|
||||||
// if(gGameVersion == JAK2) {
|
|
||||||
bool adjusted = false;
|
|
||||||
while (segment_size % 4) {
|
while (segment_size % 4) {
|
||||||
segment_size++;
|
segment_size++;
|
||||||
adjusted = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (adjusted) {
|
// set up pointers for linker.
|
||||||
printf(
|
|
||||||
"Adjusted the size of segment %d in %s, this is fine, but rare (and may indicate a "
|
|
||||||
"bigger problem if it happens often)\n",
|
|
||||||
seg_id, name.c_str());
|
|
||||||
}
|
|
||||||
// }
|
|
||||||
|
|
||||||
auto base_ptr = segment_data_offsets[seg_id];
|
auto base_ptr = segment_data_offsets[seg_id];
|
||||||
auto data_ptr = base_ptr - 4;
|
auto data_ptr = base_ptr - 4;
|
||||||
auto link_ptr = segment_link_offsets[seg_id];
|
auto link_ptr = segment_link_offsets[seg_id];
|
||||||
|
@ -494,13 +504,15 @@ static void link_v5(LinkedObjectFile& f,
|
||||||
ASSERT((data_ptr % 4) == 0);
|
ASSERT((data_ptr % 4) == 0);
|
||||||
ASSERT((segment_size % 4) == 0);
|
ASSERT((segment_size % 4) == 0);
|
||||||
|
|
||||||
|
// add data to the decompiler.
|
||||||
auto code_start = (const uint32_t*)(&data.at(data_ptr + 4));
|
auto code_start = (const uint32_t*)(&data.at(data_ptr + 4));
|
||||||
auto code_end = ((const uint32_t*)(&data.at(data_ptr + segment_size))) + 1;
|
auto code_end = ((const uint32_t*)(&data.at(data_ptr + segment_size))) + 1;
|
||||||
for (auto x = code_start; x < code_end; x++) {
|
for (auto x = code_start; x < code_end; x++) {
|
||||||
f.push_back_word_to_segment(*((const uint32_t*)x), seg_id);
|
f.push_back_word_to_segment(*((const uint32_t*)x), seg_id);
|
||||||
}
|
}
|
||||||
bool fixing = false;
|
|
||||||
|
|
||||||
|
// pointer linking.
|
||||||
|
bool fixing = false;
|
||||||
if (data.at(link_ptr)) {
|
if (data.at(link_ptr)) {
|
||||||
// we have pointers
|
// we have pointers
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -517,7 +529,8 @@ static void link_v5(LinkedObjectFile& f,
|
||||||
if ((old_code >> 24) == 0) {
|
if ((old_code >> 24) == 0) {
|
||||||
f.stats.v3_word_pointers++;
|
f.stats.v3_word_pointers++;
|
||||||
if (!f.pointer_link_word(seg_id, data_ptr - base_ptr, seg_id, old_code)) {
|
if (!f.pointer_link_word(seg_id, data_ptr - base_ptr, seg_id, old_code)) {
|
||||||
printf("WARNING bad pointer_link_word (2) in %s\n", name.c_str());
|
// the art groups just have bogus links. we ignored them in jak 2, so do the same
|
||||||
|
// here. The joint-anim-compressed-control's have a few bogus frames at the end.
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
f.stats.v3_split_pointers++;
|
f.stats.v3_split_pointers++;
|
||||||
|
@ -526,12 +539,9 @@ static void link_v5(LinkedObjectFile& f,
|
||||||
ASSERT(lo_hi_offset);
|
ASSERT(lo_hi_offset);
|
||||||
ASSERT(dest_seg < 3);
|
ASSERT(dest_seg < 3);
|
||||||
auto offset_upper = old_code & 0xff;
|
auto offset_upper = old_code & 0xff;
|
||||||
// ASSERT(offset_upper == 0);
|
|
||||||
uint32_t low_code = *(const uint32_t*)(&data.at(data_ptr + 4 * lo_hi_offset));
|
uint32_t low_code = *(const uint32_t*)(&data.at(data_ptr + 4 * lo_hi_offset));
|
||||||
uint32_t offset = low_code & 0xffff;
|
uint32_t offset = low_code & 0xffff;
|
||||||
if (offset_upper) {
|
if (offset_upper) {
|
||||||
// seems to work fine, no need to warn.
|
|
||||||
// printf("WARNING - offset upper is set in %s\n", name.c_str());
|
|
||||||
offset += (offset_upper << 16);
|
offset += (offset_upper << 16);
|
||||||
}
|
}
|
||||||
f.pointer_link_split_word(seg_id, data_ptr - base_ptr,
|
f.pointer_link_split_word(seg_id, data_ptr - base_ptr,
|
||||||
|
@ -558,6 +568,7 @@ static void link_v5(LinkedObjectFile& f,
|
||||||
}
|
}
|
||||||
link_ptr++;
|
link_ptr++;
|
||||||
|
|
||||||
|
// symbol linking.
|
||||||
if (data.at(link_ptr)) {
|
if (data.at(link_ptr)) {
|
||||||
auto sub_link_ptr = link_ptr;
|
auto sub_link_ptr = link_ptr;
|
||||||
|
|
||||||
|
@ -604,15 +615,16 @@ static void link_v5(LinkedObjectFile& f,
|
||||||
segment_link_ends[seg_id] = link_ptr;
|
segment_link_ends[seg_id] = link_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (n_segs == 3) {
|
||||||
ASSERT(segment_link_offsets[0] == 128);
|
ASSERT(segment_link_offsets[0] == 128);
|
||||||
|
|
||||||
if (header->segment_info[0].size) {
|
if (seg_info_array[0].size) {
|
||||||
ASSERT(segment_link_ends[0] + 1 == segment_link_offsets[1]);
|
ASSERT(segment_link_ends[0] + 1 == segment_link_offsets[1]);
|
||||||
} else {
|
} else {
|
||||||
ASSERT(segment_link_offsets[0] + 2 == segment_link_offsets[1]);
|
ASSERT(segment_link_offsets[0] + 2 == segment_link_offsets[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (header->segment_info[1].size) {
|
if (seg_info_array[1].size) {
|
||||||
ASSERT(segment_link_ends[1] + 1 == segment_link_offsets[2]);
|
ASSERT(segment_link_ends[1] + 1 == segment_link_offsets[2]);
|
||||||
} else {
|
} else {
|
||||||
ASSERT(segment_link_offsets[1] + 2 == segment_link_offsets[2]);
|
ASSERT(segment_link_offsets[1] + 2 == segment_link_offsets[2]);
|
||||||
|
@ -620,6 +632,7 @@ static void link_v5(LinkedObjectFile& f,
|
||||||
|
|
||||||
ASSERT(align16(segment_link_ends[2] + 2) == segment_data_offsets[0]);
|
ASSERT(align16(segment_link_ends[2] + 2) == segment_data_offsets[0]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void link_v3(LinkedObjectFile& f,
|
static void link_v3(LinkedObjectFile& f,
|
||||||
const std::vector<uint8_t>& data,
|
const std::vector<uint8_t>& data,
|
||||||
|
@ -677,10 +690,8 @@ static void link_v3(LinkedObjectFile& f,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (game_version == GameVersion::Jak2) {
|
if (game_version == GameVersion::Jak2) {
|
||||||
[[maybe_unused]] bool adjusted = false;
|
|
||||||
while (segment_size % 4) {
|
while (segment_size % 4) {
|
||||||
segment_size++;
|
segment_size++;
|
||||||
adjusted = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -294,7 +294,7 @@ struct FieldPrint {
|
||||||
|
|
||||||
// if a field has a weird inspect, just return the FieldPrint instead of asserting,
|
// if a field has a weird inspect, just return the FieldPrint instead of asserting,
|
||||||
// there's too many edge cases in custom prints to account for all of them
|
// there's too many edge cases in custom prints to account for all of them
|
||||||
FieldPrint handle_custom_prints(FieldPrint& fp, const std::string& str) {
|
FieldPrint handle_custom_prints(FieldPrint& fp, const std::string& /*str*/) {
|
||||||
return fp;
|
return fp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -61,7 +61,7 @@ void MessageBuffer::handle_char(char c) {
|
||||||
// we reach the length of the body as provided in the Content-Length
|
// we reach the length of the body as provided in the Content-Length
|
||||||
// header.
|
// header.
|
||||||
auto content_length = std::stoi(m_headers["Content-Length"]);
|
auto content_length = std::stoi(m_headers["Content-Length"]);
|
||||||
if (m_raw_message.length() == content_length) {
|
if (m_raw_message.length() == (size_t)content_length) {
|
||||||
m_body = json::parse(m_raw_message);
|
m_body = json::parse(m_raw_message);
|
||||||
m_reading_content = false;
|
m_reading_content = false;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue