mirror of
https://github.com/open-goal/jak-project.git
synced 2024-10-20 21:27:52 -04:00
c4a92571b2
* ci: fix windows releases (hopefully) * scripts: fix Taskfile file references for linux * asserts: add `ASSERT_MSG` macro and ensure `stdout` is flushed before `abort`ing * asserts: refactor all `assert(false);` with a preceeding message instances * lint: format * temp... * fix compiler errors * assert: allow for string literals in `ASSERT_MSG` * lint: formatting * revert temp change for testing
382 lines
13 KiB
C++
382 lines
13 KiB
C++
#include "Generic2.h"
|
|
#include "game/graphics/opengl_renderer/AdgifHandler.h"
|
|
|
|
/*!
|
|
* Advance through DMA data that has no effect on rendering (NOP codes) and see if this is the
|
|
* end of the data.
|
|
* The DmaFollower will either point to the start of the next bucket (and the function will return
|
|
* true), or to the beginning of the next non-NOP DMA for this bucket.
|
|
*/
|
|
bool Generic2::check_for_end_of_generic_data(DmaFollower& dma, u32 next_bucket) {
|
|
while (dma.current_tag().qwc == 0 && dma.current_tag_vifcode0().kind == VifCode::Kind::NOP &&
|
|
dma.current_tag_vifcode1().kind == VifCode::Kind::NOP) {
|
|
// this "CALL" tag is inserted by the engine to reset the GS. It's always inserted at the end of
|
|
// the bucket. if we see it here, we should be able to skip over this resetting stuff (always 4
|
|
// tags) and then see the start of the next bucket.
|
|
if (dma.current_tag().kind == DmaTag::Kind::CALL) {
|
|
for (int i = 0; i < 4; i++) {
|
|
dma.read_and_advance();
|
|
m_stats.dma_tags++;
|
|
}
|
|
ASSERT(dma.current_tag_offset() == next_bucket);
|
|
return true;
|
|
}
|
|
m_stats.dma_tags++;
|
|
dma.read_and_advance();
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*!
|
|
* Process the first DMA of a generic bucket.
|
|
* Return true if the generic bucket is empty and there is nothing to do.
|
|
*
|
|
* Otherwise, populates m_drawing_config which contains the common draw settings for all data being
|
|
* rendered in this bucket.
|
|
*/
|
|
bool Generic2::handle_bucket_setup_dma(DmaFollower& dma, u32 next_bucket) {
|
|
// if the engine didn't run the generic renderer setup function, this bucket will end here.
|
|
if (check_for_end_of_generic_data(dma, next_bucket)) {
|
|
return true;
|
|
}
|
|
|
|
// next, the generic setup. This reads the data generated by generic-init-buf.
|
|
|
|
// setup packet 1 is GS settings
|
|
auto test_and_zbuf = dma.read_and_advance();
|
|
ASSERT(test_and_zbuf.size_bytes == 48);
|
|
// first qw is the gif tag. Can ignore.
|
|
// second qw is test, this is always the same, so can ignore it too.
|
|
// (new 'static 'gs-test
|
|
// :ate #x1
|
|
// :atst (gs-atest greater-equal)
|
|
// :aref #x26
|
|
// :afail #x1
|
|
// :zte #x1
|
|
// :ztst (gs-ztest greater-equal)
|
|
// )
|
|
// third qw is zbuf:
|
|
// the only thing that changes is zmsk, we need to store this value for later.
|
|
u64 zbuf_val;
|
|
memcpy(&zbuf_val, test_and_zbuf.data + 32, 8);
|
|
m_drawing_config.zmsk = GsZbuf(zbuf_val).zmsk();
|
|
|
|
// setup packet 2 is constants that normally go to VU1 data memory.
|
|
// we're not going to be super strict checking the exact details of the unpack command, it's
|
|
// a waste of time since we're the ones generating it anyway.
|
|
auto constants = dma.read_and_advance();
|
|
ASSERT(constants.size_bytes == 160);
|
|
ASSERT(constants.vifcode0().kind == VifCode::Kind::STCYCL);
|
|
ASSERT(constants.vifcode1().kind == VifCode::Kind::UNPACK_V4_32);
|
|
|
|
// (fog vector :inline :offset-assert 0)
|
|
memcpy(&m_drawing_config.pfog0, constants.data + 0, 4);
|
|
memcpy(&m_drawing_config.fog_min, constants.data + 4, 4);
|
|
memcpy(&m_drawing_config.fog_max, constants.data + 8, 4);
|
|
|
|
// (adgif gs-gif-tag :inline :offset-assert 16) ;; was qword
|
|
// (giftag gs-gif-tag :inline :offset-assert 32) ;; was qword
|
|
// (hvdf-offset vector :inline :offset-assert 48)
|
|
memcpy(m_drawing_config.hvdf_offset.data(), constants.data + 48, 16);
|
|
// (hmge-scale vector :inline :offset-assert 64)
|
|
// (invh-scale vector :inline :offset-assert 80)
|
|
// (guard vector :inline :offset-assert 96)
|
|
// (adnop qword :inline :offset-assert 112)
|
|
// (flush qword :inline :offset-assert 128)
|
|
// (stores qword :inline :offset-assert 144)
|
|
|
|
auto vu_setup = dma.read_and_advance();
|
|
ASSERT(vu_setup.size_bytes == 32);
|
|
// this sets offset/base to 0, sets row to 0 and runs program 0 to set up VU regs
|
|
// todo: any setup required from running this program.
|
|
|
|
// if there was nothing rendered by generic on this frame in this bucket, the bucket will end
|
|
// here.
|
|
if (check_for_end_of_generic_data(dma, next_bucket)) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void Generic2::reset_buffers() {
|
|
m_next_free_frag = 0;
|
|
m_next_free_vert = 0;
|
|
m_next_free_adgif = 0;
|
|
m_next_free_bucket = 0;
|
|
m_next_free_idx = 0;
|
|
}
|
|
|
|
bool is_nop_vif(const u8* data) {
|
|
u32 tag0_data;
|
|
memcpy(&tag0_data, data, 4);
|
|
return VifCode(tag0_data).kind == VifCode::Kind::NOP;
|
|
}
|
|
|
|
bool is_nop_or_flushe_vif(const u8* data) {
|
|
u32 tag0_data;
|
|
memcpy(&tag0_data, data, 4);
|
|
auto k = VifCode(tag0_data).kind;
|
|
return k == VifCode::Kind::NOP || k == VifCode::Kind::FLUSHE;
|
|
}
|
|
|
|
u32 unpack_vtx_positions(Generic2::Vertex* vtx, const u8* data, int vtx_count) {
|
|
for (int i = 0; i < vtx_count; i++) {
|
|
memcpy(vtx[i].xyz.data(), data + (i * 12), 12);
|
|
}
|
|
return vtx_count * 12;
|
|
}
|
|
|
|
u32 unpack_vertex_colors(Generic2::Vertex* vtx, const u8* data, int vtx_count) {
|
|
for (int i = 0; i < vtx_count; i++) {
|
|
memcpy(vtx[i].rgba.data(), data + (i * 4), 4);
|
|
}
|
|
return vtx_count * 4;
|
|
}
|
|
|
|
u32 unpack_vtx_tcs(Generic2::Vertex* vtx, const u8* data, int vtx_count) {
|
|
for (int i = 0; i < vtx_count; i++) {
|
|
s16 s, t;
|
|
memcpy(&s, data + (i * 4), 2);
|
|
memcpy(&t, data + (i * 4) + 2, 2);
|
|
s16 s_masked = s & (s16)0xfffe;
|
|
// note: int to float happening here.
|
|
// if this is a bottleneck, we can possible keep integers and do this in the shader.
|
|
// I've avoided this for now because only some integer formats are inefficient on the GPU
|
|
// and it's hard to know what's supported well on all drivers/GPUs
|
|
vtx[i].st[0] = s_masked;
|
|
vtx[i].st[1] = t;
|
|
vtx[i].adc = s_masked == s;
|
|
}
|
|
return vtx_count * 4;
|
|
}
|
|
|
|
u32 Generic2::handle_fragments_after_unpack_v4_32(const u8* data,
|
|
u32 off,
|
|
u32 first_unpack_bytes,
|
|
u32 end_of_vif,
|
|
Fragment* frag,
|
|
bool loop) {
|
|
// note: they rely on _something_ aligning this?
|
|
u32 off_aligned = (off + 15) & ~15;
|
|
// each header should have 7 qw header + at least 5 qw for a single adgif.
|
|
ASSERT(first_unpack_bytes >= FRAG_HEADER_SIZE + sizeof(AdGifData));
|
|
// grab the 7 qw header
|
|
memcpy(frag->header, data + off_aligned, FRAG_HEADER_SIZE);
|
|
|
|
// figure out how many adgifs and grab those.
|
|
u32 adgif_bytes = (first_unpack_bytes - FRAG_HEADER_SIZE);
|
|
u32 adgifs = adgif_bytes / sizeof(AdGifData);
|
|
frag->adgif_idx = m_next_free_adgif;
|
|
frag->adgif_count = adgifs;
|
|
ASSERT(frag->adgif_count > 0);
|
|
ASSERT(adgif_bytes == adgifs * sizeof(AdGifData));
|
|
for (u32 i = 0; i < adgifs; i++) {
|
|
auto& add = next_adgif();
|
|
memcpy(&add.data, data + off_aligned + FRAG_HEADER_SIZE + (i * sizeof(AdGifData)),
|
|
sizeof(AdGifData));
|
|
}
|
|
|
|
// continue in this transfer
|
|
off += first_unpack_bytes;
|
|
if (off == end_of_vif) {
|
|
ASSERT_MSG(false, "nothing after header upload");
|
|
}
|
|
|
|
// the next thing is the vertex positions.
|
|
while (is_nop_vif(data + off) && off < end_of_vif) {
|
|
off += 4;
|
|
}
|
|
u32 stcycl_tag_data;
|
|
memcpy(&stcycl_tag_data, data + off, 4);
|
|
off += 4;
|
|
VifCode stcycl_tag(stcycl_tag_data);
|
|
ASSERT(stcycl_tag.kind == VifCode::Kind::STCYCL);
|
|
ASSERT(stcycl_tag.immediate == 0x103);
|
|
|
|
u32 vtx_pos_unpack_tag_data;
|
|
memcpy(&vtx_pos_unpack_tag_data, data + off, 4);
|
|
VifCode vtx_pos_unpack_tag(vtx_pos_unpack_tag_data);
|
|
|
|
if (vtx_pos_unpack_tag.kind == VifCode::Kind::UNPACK_V4_8) {
|
|
ASSERT(loop);
|
|
} else {
|
|
ASSERT(!loop);
|
|
ASSERT(vtx_pos_unpack_tag.kind == VifCode::Kind::UNPACK_V3_32);
|
|
off += 4;
|
|
|
|
frag->vtx_idx = m_next_free_vert;
|
|
frag->vtx_count = vtx_pos_unpack_tag.num;
|
|
alloc_vtx(frag->vtx_count);
|
|
|
|
off += unpack_vtx_positions(&m_verts[frag->vtx_idx], data + off, frag->vtx_count);
|
|
|
|
ASSERT(off < end_of_vif);
|
|
while (is_nop_vif(data + off) && off < end_of_vif) {
|
|
off += 4;
|
|
}
|
|
ASSERT(off < end_of_vif);
|
|
}
|
|
|
|
if (loop) {
|
|
// next, vertex colors
|
|
u32 unpack_vtx_color_tag_data;
|
|
memcpy(&unpack_vtx_color_tag_data, data + off, 4);
|
|
off += 4;
|
|
VifCode unpack_vtx_color_tag(unpack_vtx_color_tag_data);
|
|
ASSERT(unpack_vtx_color_tag.kind == VifCode::Kind::UNPACK_V4_8);
|
|
frag->vtx_idx = m_next_free_vert;
|
|
frag->vtx_count = unpack_vtx_color_tag.num;
|
|
alloc_vtx(frag->vtx_count);
|
|
off += unpack_vertex_colors(&m_verts[frag->vtx_idx], data + off, frag->vtx_count);
|
|
} else {
|
|
// next, vertex colors
|
|
u32 unpack_vtx_color_tag_data;
|
|
memcpy(&unpack_vtx_color_tag_data, data + off, 4);
|
|
off += 4;
|
|
VifCode unpack_vtx_color_tag(unpack_vtx_color_tag_data);
|
|
ASSERT(unpack_vtx_color_tag.kind == VifCode::Kind::UNPACK_V4_8);
|
|
ASSERT(unpack_vtx_color_tag.num == frag->vtx_count);
|
|
off += unpack_vertex_colors(&m_verts[frag->vtx_idx], data + off, frag->vtx_count);
|
|
}
|
|
|
|
ASSERT(off < end_of_vif);
|
|
while (is_nop_vif(data + off) && off < end_of_vif) {
|
|
off += 4;
|
|
}
|
|
ASSERT(off < end_of_vif);
|
|
|
|
// next, vertex tcs
|
|
u32 unpack_vtx_tc_tag_data;
|
|
memcpy(&unpack_vtx_tc_tag_data, data + off, 4);
|
|
off += 4;
|
|
VifCode unpack_vtx_tc_tag(unpack_vtx_tc_tag_data);
|
|
ASSERT(unpack_vtx_tc_tag.kind == VifCode::Kind::UNPACK_V2_16);
|
|
ASSERT(unpack_vtx_tc_tag.num == frag->vtx_count);
|
|
off += unpack_vtx_tcs(&m_verts[frag->vtx_idx], data + off, frag->vtx_count);
|
|
|
|
if (off == end_of_vif) {
|
|
return off;
|
|
}
|
|
|
|
ASSERT(off < end_of_vif);
|
|
while (is_nop_vif(data + off) && off < end_of_vif) {
|
|
off += 4;
|
|
}
|
|
ASSERT(off < end_of_vif);
|
|
|
|
u32 stcycl_reset_data;
|
|
memcpy(&stcycl_reset_data, data + off, 4);
|
|
off += 4;
|
|
VifCode stcycl_reset(stcycl_reset_data);
|
|
if (stcycl_reset.kind == VifCode::Kind::STCYCL) {
|
|
ASSERT(off < end_of_vif);
|
|
while (is_nop_vif(data + off) && off < end_of_vif) {
|
|
off += 4;
|
|
}
|
|
ASSERT(off < end_of_vif);
|
|
|
|
u32 mscal_data;
|
|
memcpy(&mscal_data, data + off, 4);
|
|
off += 4;
|
|
VifCode mscal(mscal_data);
|
|
ASSERT(mscal.kind == VifCode::Kind::MSCAL);
|
|
frag->mscal_addr = mscal.immediate;
|
|
} else {
|
|
ASSERT(stcycl_reset.kind == VifCode::Kind::MSCAL);
|
|
frag->mscal_addr = stcycl_reset.immediate;
|
|
|
|
ASSERT(off < end_of_vif);
|
|
while (is_nop_vif(data + off) && off < end_of_vif) {
|
|
off += 4;
|
|
}
|
|
ASSERT(off < end_of_vif);
|
|
|
|
u32 stcycl_data;
|
|
memcpy(&stcycl_data, data + off, 4);
|
|
off += 4;
|
|
VifCode stcycl(stcycl_data);
|
|
ASSERT(stcycl.kind == VifCode::Kind::STCYCL);
|
|
}
|
|
|
|
ASSERT(off < end_of_vif);
|
|
while (is_nop_or_flushe_vif(data + off) && off < end_of_vif) {
|
|
off += 4;
|
|
}
|
|
return off;
|
|
}
|
|
|
|
void Generic2::process_dma(DmaFollower& dma, u32 next_bucket) {
|
|
reset_buffers();
|
|
|
|
// handle the stuff at the beginning.
|
|
if (handle_bucket_setup_dma(dma, next_bucket)) {
|
|
return;
|
|
}
|
|
|
|
// loop over "fragments"
|
|
// each "fragment" consists of a series of uploads, followed by a MSCAL VIFCODE that runs
|
|
// VU program that does vertex transformation and sends to the GS.
|
|
Fragment* continued_fragment = nullptr;
|
|
|
|
while (dma.current_tag_offset() != next_bucket) {
|
|
if (continued_fragment) {
|
|
auto continue_vif_transfer = dma.read_and_advance();
|
|
ASSERT(continue_vif_transfer.vifcode0().kind == VifCode::Kind::NOP);
|
|
auto up = continue_vif_transfer.vifcode1();
|
|
ASSERT(up.kind == VifCode::Kind::UNPACK_V3_32);
|
|
ASSERT(continue_vif_transfer.size_bytes * 4 / 48 == up.num);
|
|
ASSERT(up.num == continued_fragment->vtx_count);
|
|
unpack_vtx_positions(&m_verts[continued_fragment->vtx_idx], continue_vif_transfer.data,
|
|
continued_fragment->vtx_count);
|
|
continued_fragment = nullptr;
|
|
auto call = dma.read_and_advance();
|
|
ASSERT(call.size_bytes == 0);
|
|
ASSERT(call.vifcode1().kind == VifCode::Kind::MSCAL);
|
|
|
|
if (check_for_end_of_generic_data(dma, next_bucket)) {
|
|
return;
|
|
}
|
|
|
|
} else {
|
|
auto vif_transfer = dma.read_and_advance();
|
|
auto v1 = vif_transfer.vifcode1();
|
|
// if (vif_transfer.vifcode0().kind != VifCode::Kind::STCYCL ||
|
|
// vif_transfer.vifcode1().kind != VifCode::Kind::UNPACK_V4_32) {
|
|
// fmt::print("failing tag: {} {} {}\n", vif_transfer.vifcode0().print(),
|
|
// vif_transfer.vifcode1().print(), vif_transfer.size_bytes);
|
|
// }
|
|
ASSERT(vif_transfer.vifcode0().kind == VifCode::Kind::STCYCL);
|
|
ASSERT(v1.kind == VifCode::Kind::UNPACK_V4_32);
|
|
u32 unpack_bytes = v1.num * 16;
|
|
auto& frag = next_frag();
|
|
u32 off = handle_fragments_after_unpack_v4_32(vif_transfer.data, 0, unpack_bytes,
|
|
vif_transfer.size_bytes, &frag, false);
|
|
|
|
if (check_for_end_of_generic_data(dma, next_bucket)) {
|
|
return;
|
|
}
|
|
|
|
if (off < vif_transfer.size_bytes) {
|
|
u32 stcycl_reset;
|
|
memcpy(&stcycl_reset, vif_transfer.data + off, 4);
|
|
ASSERT(VifCode(stcycl_reset).kind == VifCode::Kind::STCYCL);
|
|
off += 4;
|
|
// while (off < vif_transfer.size_bytes) {
|
|
u32 next;
|
|
memcpy(&next, vif_transfer.data + off, 4);
|
|
VifCode next_unpack(next);
|
|
|
|
ASSERT(next_unpack.kind == VifCode::Kind::UNPACK_V4_32);
|
|
|
|
auto& continue_frag = next_frag();
|
|
off = handle_fragments_after_unpack_v4_32(vif_transfer.data, off, next_unpack.num * 16,
|
|
vif_transfer.size_bytes, &continue_frag, true);
|
|
continued_fragment = &continue_frag;
|
|
ASSERT(off == vif_transfer.size_bytes);
|
|
// }
|
|
}
|
|
}
|
|
}
|
|
}
|