jak-project/game/graphics/opengl_renderer/foreground/Generic2_DMA.cpp

#include "Generic2.h"
#include "game/graphics/opengl_renderer/AdgifHandler.h"

/*!
 * Advance through DMA data that has no effect on rendering (NOP codes) and see if this is the
 * end of the data.
 * The DmaFollower will either point to the start of the next bucket (and the function will return
 * true), or to the beginning of the next non-NOP DMA for this bucket.
 */
bool Generic2::check_for_end_of_generic_data(DmaFollower& dma, u32 next_bucket) {
  while (dma.current_tag().qwc == 0 && dma.current_tag_vifcode0().kind == VifCode::Kind::NOP &&
         dma.current_tag_vifcode1().kind == VifCode::Kind::NOP) {
    // this "CALL" tag is inserted by the engine to reset the GS. It's always inserted at the end of
    // the bucket. if we see it here, we should be able to skip over this resetting stuff (always 4
    // tags) and then see the start of the next bucket.
    if (dma.current_tag().kind == DmaTag::Kind::CALL) {
      for (int i = 0; i < 4; i++) {
        dma.read_and_advance();
      }
      ASSERT(dma.current_tag_offset() == next_bucket);
      return true;
    }
    dma.read_and_advance();
  }
  return false;
}

/*!
 * Process the first DMA of a generic bucket.
 * Return true if the generic bucket is empty and there is nothing to do.
 *
 * Otherwise, populates m_drawing_config which contains the common draw settings for all data being
 * rendered in this bucket.
 */
bool Generic2::handle_bucket_setup_dma(DmaFollower& dma, u32 next_bucket) {
  // if the engine didn't run the generic renderer setup function, this bucket will end here.
  if (check_for_end_of_generic_data(dma, next_bucket)) {
    return true;
  }

  // next, the generic setup.  This reads the data generated by generic-init-buf.

  // setup packet 1 is GS settings
  auto test_and_zbuf = dma.read_and_advance();
  ASSERT(test_and_zbuf.size_bytes == 48);
  // first qw is the gif tag. Can ignore.
  // second qw is test, this is always the same, so can ignore it too.
  //  (new 'static 'gs-test
  //  :ate #x1
  //  :atst (gs-atest greater-equal)
  //  :aref #x26
  //  :afail #x1
  //  :zte #x1
  //  :ztst (gs-ztest greater-equal)
  //  )
  // third qw is zbuf:
  // the only thing that changes is zmsk, we need to store this value for later.
  u64 zbuf_val;
  memcpy(&zbuf_val, test_and_zbuf.data + 32, 8);
  m_drawing_config.zmsk = GsZbuf(zbuf_val).zmsk();

  // setup packet 2 is constants that normally go to VU1 data memory.
  // we're not going to be super strict checking the exact details of the unpack command, it's
  // a waste of time since we're the ones generating it anyway.
  auto constants = dma.read_and_advance();
  ASSERT(constants.size_bytes == 160);
  ASSERT(constants.vifcode0().kind == VifCode::Kind::STCYCL);
  ASSERT(constants.vifcode1().kind == VifCode::Kind::UNPACK_V4_32);

  //  (fog         vector :inline :offset-assert 0)
  memcpy(&m_drawing_config.pfog0, constants.data + 0, 4);
  memcpy(&m_drawing_config.fog_min, constants.data + 4, 4);
  memcpy(&m_drawing_config.fog_max, constants.data + 8, 4);

  //  (adgif       gs-gif-tag  :inline :offset-assert 16) ;; was qword
  //  (giftag      gs-gif-tag  :inline :offset-assert 32) ;; was qword
  //  (hvdf-offset vector :inline :offset-assert 48)
  memcpy(m_drawing_config.hvdf_offset.data(), constants.data + 48, 16);
  //  (hmge-scale  vector :inline :offset-assert 64)
  //  (invh-scale  vector :inline :offset-assert 80)
  //  (guard       vector :inline :offset-assert 96)
  //  (adnop       qword  :inline :offset-assert 112)
  //  (flush       qword  :inline :offset-assert 128)
  //  (stores      qword  :inline :offset-assert 144)

  auto vu_setup = dma.read_and_advance();
  ASSERT(vu_setup.size_bytes == 32);
  // this sets offset/base to 0, sets row to 0 and runs program 0 to set up VU regs
  // todo: any setup required from running this program.

  // if there was nothing rendered by generic on this frame in this bucket, the bucket will end
  // here.
  if (check_for_end_of_generic_data(dma, next_bucket)) {
    return true;
  }

  return false;
}

void Generic2::reset_buffers() {
  m_max_frags_seen = std::max(m_next_free_frag, m_max_frags_seen);
  m_max_verts_seen = std::max(m_next_free_vert, m_max_verts_seen);
  m_max_adgifs_seen = std::max(m_next_free_adgif, m_max_adgifs_seen);
  m_max_buckets_seen = std::max(m_next_free_bucket, m_max_buckets_seen);
  m_max_indices_seen = std::max(m_next_free_idx, m_max_indices_seen);

  m_next_free_frag = 0;
  m_next_free_vert = 0;
  m_next_free_adgif = 0;
  m_next_free_bucket = 0;
  m_next_free_idx = 0;
}

bool is_nop_vif(const u8* data) {
  u32 tag0_data;
  memcpy(&tag0_data, data, 4);
  return VifCode(tag0_data).kind == VifCode::Kind::NOP;
}

bool is_nop_or_flushe_vif(const u8* data) {
  u32 tag0_data;
  memcpy(&tag0_data, data, 4);
  auto k = VifCode(tag0_data).kind;
  return k == VifCode::Kind::NOP || k == VifCode::Kind::FLUSHE;
}

u32 unpack_vtx_positions(Generic2::Vertex* vtx, const u8* data, int vtx_count) {
  for (int i = 0; i < vtx_count; i++) {
    memcpy(vtx[i].xyz.data(), data + (i * 12), 12);
  }
  return vtx_count * 12;
}

u32 unpack_vertex_colors(Generic2::Vertex* vtx, const u8* data, int vtx_count) {
  for (int i = 0; i < vtx_count; i++) {
    memcpy(vtx[i].rgba.data(), data + (i * 4), 4);
  }
  return vtx_count * 4;
}

u32 unpack_vtx_tcs(Generic2::Vertex* vtx, const u8* data, int vtx_count) {
  for (int i = 0; i < vtx_count; i++) {
    s16 s, t;
    memcpy(&s, data + (i * 4), 2);
    memcpy(&t, data + (i * 4) + 2, 2);
    s16 s_masked = s & (s16)0xfffe;
    // note: int to float happening here.
    // if this is a bottleneck, we can possible keep integers and do this in the shader.
    // I've avoided this for now because only some integer formats are inefficient on the GPU
    // and it's hard to know what's supported well on all drivers/GPUs
    vtx[i].st[0] = s_masked;
    vtx[i].st[1] = t;
    vtx[i].adc = s_masked == s;
  }
  return vtx_count * 4;
}

u32 Generic2::handle_fragments_after_unpack_v4_32(const u8* data,
                                                  u32 off,
                                                  u32 first_unpack_bytes,
                                                  u32 end_of_vif,
                                                  Fragment* frag,
                                                  bool loop) {
  // note: they rely on _something_ aligning this?
  u32 off_aligned = (off + 15) & ~15;
  // each header should have 7 qw header + at least 5 qw for a single adgif.
  ASSERT(first_unpack_bytes >= FRAG_HEADER_SIZE + sizeof(AdGifData));
  // grab the 7 qw header
  memcpy(frag->header, data + off_aligned, FRAG_HEADER_SIZE);

  // figure out how many adgifs and grab those.
  u32 adgif_bytes = (first_unpack_bytes - FRAG_HEADER_SIZE);
  u32 adgifs = adgif_bytes / sizeof(AdGifData);
  frag->adgif_idx = m_next_free_adgif;
  frag->adgif_count = adgifs;
  ASSERT(frag->adgif_count > 0);
  ASSERT(adgif_bytes == adgifs * sizeof(AdGifData));
  for (u32 i = 0; i < adgifs; i++) {
    auto& add = next_adgif();
    memcpy(&add.data, data + off_aligned + FRAG_HEADER_SIZE + (i * sizeof(AdGifData)),
           sizeof(AdGifData));
  }

  // continue in this transfer
  off += first_unpack_bytes;
  if (off == end_of_vif) {
    ASSERT_MSG(false, "nothing after header upload");
  }

  // the next thing is the vertex positions.
  while (is_nop_vif(data + off) && off < end_of_vif) {
    off += 4;
  }
  u32 stcycl_tag_data;
  memcpy(&stcycl_tag_data, data + off, 4);
  off += 4;
  VifCode stcycl_tag(stcycl_tag_data);
  ASSERT(stcycl_tag.kind == VifCode::Kind::STCYCL);
  ASSERT(stcycl_tag.immediate == 0x103);

  u32 vtx_pos_unpack_tag_data;
  memcpy(&vtx_pos_unpack_tag_data, data + off, 4);
  VifCode vtx_pos_unpack_tag(vtx_pos_unpack_tag_data);

  if (vtx_pos_unpack_tag.kind == VifCode::Kind::UNPACK_V4_8) {
    ASSERT(loop);
  } else {
    ASSERT(!loop);
    ASSERT(vtx_pos_unpack_tag.kind == VifCode::Kind::UNPACK_V3_32);
    off += 4;

    frag->vtx_idx = m_next_free_vert;
    frag->vtx_count = vtx_pos_unpack_tag.num;
    alloc_vtx(frag->vtx_count);

    off += unpack_vtx_positions(&m_verts[frag->vtx_idx], data + off, frag->vtx_count);

    ASSERT(off < end_of_vif);
    while (is_nop_vif(data + off) && off < end_of_vif) {
      off += 4;
    }
    ASSERT(off < end_of_vif);
  }

  if (loop) {
    // next, vertex colors
    u32 unpack_vtx_color_tag_data;
    memcpy(&unpack_vtx_color_tag_data, data + off, 4);
    off += 4;
    VifCode unpack_vtx_color_tag(unpack_vtx_color_tag_data);
    ASSERT(unpack_vtx_color_tag.kind == VifCode::Kind::UNPACK_V4_8);
    frag->vtx_idx = m_next_free_vert;
    frag->vtx_count = unpack_vtx_color_tag.num;
    alloc_vtx(frag->vtx_count);
    off += unpack_vertex_colors(&m_verts[frag->vtx_idx], data + off, frag->vtx_count);
  } else {
    // next, vertex colors
    u32 unpack_vtx_color_tag_data;
    memcpy(&unpack_vtx_color_tag_data, data + off, 4);
    off += 4;
    VifCode unpack_vtx_color_tag(unpack_vtx_color_tag_data);
    ASSERT(unpack_vtx_color_tag.kind == VifCode::Kind::UNPACK_V4_8);
    ASSERT(unpack_vtx_color_tag.num == frag->vtx_count);
    off += unpack_vertex_colors(&m_verts[frag->vtx_idx], data + off, frag->vtx_count);
  }

  ASSERT(off < end_of_vif);
  while (is_nop_vif(data + off) && off < end_of_vif) {
    off += 4;
  }
  ASSERT(off < end_of_vif);

  // next, vertex tcs
  u32 unpack_vtx_tc_tag_data;
  memcpy(&unpack_vtx_tc_tag_data, data + off, 4);
  off += 4;
  VifCode unpack_vtx_tc_tag(unpack_vtx_tc_tag_data);
  ASSERT(unpack_vtx_tc_tag.kind == VifCode::Kind::UNPACK_V2_16);
  ASSERT(unpack_vtx_tc_tag.num == frag->vtx_count);
  off += unpack_vtx_tcs(&m_verts[frag->vtx_idx], data + off, frag->vtx_count);

  if (off == end_of_vif) {
    return off;
  }

  ASSERT(off < end_of_vif);
  while (is_nop_vif(data + off) && off < end_of_vif) {
    off += 4;
  }
  ASSERT(off < end_of_vif);

  u32 stcycl_reset_data;
  memcpy(&stcycl_reset_data, data + off, 4);
  off += 4;
  VifCode stcycl_reset(stcycl_reset_data);
  if (stcycl_reset.kind == VifCode::Kind::STCYCL) {
    ASSERT(off < end_of_vif);
    while (is_nop_vif(data + off) && off < end_of_vif) {
      off += 4;
    }
    ASSERT(off < end_of_vif);

    u32 mscal_data;
    memcpy(&mscal_data, data + off, 4);
    off += 4;
    VifCode mscal(mscal_data);
    ASSERT(mscal.kind == VifCode::Kind::MSCAL);
    frag->mscal_addr = mscal.immediate;
  } else {
    ASSERT(stcycl_reset.kind == VifCode::Kind::MSCAL);
    frag->mscal_addr = stcycl_reset.immediate;

    ASSERT(off < end_of_vif);
    while (is_nop_vif(data + off) && off < end_of_vif) {
      off += 4;
    }
    ASSERT(off < end_of_vif);

    u32 stcycl_data;
    memcpy(&stcycl_data, data + off, 4);
    off += 4;
    VifCode stcycl(stcycl_data);
    ASSERT(stcycl.kind == VifCode::Kind::STCYCL);
  }

  ASSERT(off < end_of_vif);
  while (is_nop_or_flushe_vif(data + off) && off < end_of_vif) {
    off += 4;
  }
  return off;
}

void Generic2::process_dma_jak1(DmaFollower& dma, u32 next_bucket) {
  reset_buffers();

  // handle the stuff at the beginning.
  if (handle_bucket_setup_dma(dma, next_bucket)) {
    return;
  }

  // loop over "fragments"
  // each "fragment" consists of a series of uploads, followed by a MSCAL VIFCODE that runs
  // VU program that does vertex transformation and sends to the GS.
  Fragment* continued_fragment = nullptr;

  while (dma.current_tag_offset() != next_bucket) {
    if (continued_fragment) {
      auto continue_vif_transfer = dma.read_and_advance();
      ASSERT(continue_vif_transfer.vifcode0().kind == VifCode::Kind::NOP);
      auto up = continue_vif_transfer.vifcode1();
      ASSERT(up.kind == VifCode::Kind::UNPACK_V3_32);
      ASSERT(continue_vif_transfer.size_bytes * 4 / 48 == up.num);
      ASSERT(up.num == continued_fragment->vtx_count);
      unpack_vtx_positions(&m_verts[continued_fragment->vtx_idx], continue_vif_transfer.data,
                           continued_fragment->vtx_count);
      continued_fragment = nullptr;
      auto call = dma.read_and_advance();
      ASSERT(call.size_bytes == 0);
      ASSERT(call.vifcode1().kind == VifCode::Kind::MSCAL);

      if (check_for_end_of_generic_data(dma, next_bucket)) {
        return;
      }

    } else {
      auto vif_transfer = dma.read_and_advance();
      auto v1 = vif_transfer.vifcode1();
      //      if (vif_transfer.vifcode0().kind != VifCode::Kind::STCYCL ||
      //          vif_transfer.vifcode1().kind != VifCode::Kind::UNPACK_V4_32) {
      //        fmt::print("failing tag: {} {} {}\n", vif_transfer.vifcode0().print(),
      //                   vif_transfer.vifcode1().print(), vif_transfer.size_bytes);
      //      }
      ASSERT(vif_transfer.vifcode0().kind == VifCode::Kind::STCYCL);
      ASSERT(v1.kind == VifCode::Kind::UNPACK_V4_32);
      u32 unpack_bytes = v1.num * 16;
      auto& frag = next_frag();
      u32 off = handle_fragments_after_unpack_v4_32(vif_transfer.data, 0, unpack_bytes,
                                                    vif_transfer.size_bytes, &frag, false);

      if (check_for_end_of_generic_data(dma, next_bucket)) {
        return;
      }

      if (off < vif_transfer.size_bytes) {
        u32 stcycl_reset;
        memcpy(&stcycl_reset, vif_transfer.data + off, 4);
        ASSERT(VifCode(stcycl_reset).kind == VifCode::Kind::STCYCL);
        off += 4;
        //    while (off < vif_transfer.size_bytes) {
        u32 next;
        memcpy(&next, vif_transfer.data + off, 4);
        VifCode next_unpack(next);

        ASSERT(next_unpack.kind == VifCode::Kind::UNPACK_V4_32);

        auto& continue_frag = next_frag();
        off = handle_fragments_after_unpack_v4_32(vif_transfer.data, off, next_unpack.num * 16,
                                                  vif_transfer.size_bytes, &continue_frag, true);
        continued_fragment = &continue_frag;
        ASSERT(off == vif_transfer.size_bytes);
        //    }
      }
    }
  }
}

bool is_nop_zero(const DmaTransfer& xf) {
  return xf.size_bytes == 0 && xf.vifcode0().kind == VifCode::Kind::NOP &&
         xf.vifcode1().kind == VifCode::Kind::NOP;
}

bool is_jak2_end(const DmaTransfer& xf) {
  return xf.size_bytes == 160 && xf.vifcode0().kind == VifCode::Kind::FLUSHA &&
         xf.vifcode1().kind == VifCode::Kind::DIRECT;
}

void Generic2::process_dma_jak2(DmaFollower& dma, u32 next_bucket) {
  reset_buffers();
  auto first_data = dma.read_and_advance();

  // handle the stuff at the beginning.
  // if the engine didn't run the generic renderer setup function, this bucket will end here.
  if (is_nop_zero(first_data) && next_bucket == dma.current_tag_offset()) {
    return;
  }

  // next, the generic setup.  This reads the data generated by generic-init-buf.
  auto v0k = first_data.vifcode0().kind;
  ASSERT((v0k == VifCode::Kind::MARK || v0k == VifCode::Kind::NOP) &&
         first_data.vifcode1().kind == VifCode::Kind::NOP);

  // setup packet 1 is GS settings
  auto direct_setup = dma.read_and_advance();
  ASSERT(direct_setup.size_bytes == 32 && direct_setup.vifcode0().kind == VifCode::Kind::NOP &&
         direct_setup.vifcode1().kind == VifCode::Kind::DIRECT);
  u64 zbuf_val;
  memcpy(&zbuf_val, direct_setup.data + 16, 8);
  m_drawing_config.zmsk = GsZbuf(zbuf_val).zmsk();

  auto constants = dma.read_and_advance();
  ASSERT(constants.size_bytes == 128);
  ASSERT(constants.vifcode0().kind == VifCode::Kind::STCYCL);
  ASSERT(constants.vifcode1().kind == VifCode::Kind::UNPACK_V4_32);
  memcpy(&m_drawing_config.pfog0, constants.data + 0, 4);
  memcpy(&m_drawing_config.fog_min, constants.data + 4, 4);
  memcpy(&m_drawing_config.fog_max, constants.data + 8, 4);
  memcpy(m_drawing_config.hvdf_offset.data(), constants.data + 32, 16);

  auto vu_setup = dma.read_and_advance();
  ASSERT(vu_setup.size_bytes == 32);
  // this sets offset/base to 0, sets row to 0 and runs program 0 to set up VU regs
  // todo: any setup required from running this program.

  // if there was nothing rendered by generic on this frame in this bucket, the bucket will end
  // here.
  if (is_nop_zero(first_data) && next_bucket == dma.current_tag_offset()) {
    return;
  }

  //  0: NOP NOP

  // loop over "fragments"
  // each "fragment" consists of a series of uploads, followed by a MSCAL VIFCODE that runs
  // VU program that does vertex transformation and sends to the GS.
  Fragment* continued_fragment = nullptr;

  auto vif_transfer = dma.read_and_advance();
  while (is_nop_zero(vif_transfer)) {
    vif_transfer = dma.read_and_advance();
  }

  while (!is_jak2_end(vif_transfer)) {
    if (continued_fragment) {
      ASSERT(vif_transfer.vifcode0().kind == VifCode::Kind::NOP);
      auto up = vif_transfer.vifcode1();
      ASSERT(up.kind == VifCode::Kind::UNPACK_V3_32);
      ASSERT(vif_transfer.size_bytes * 4 / 48 == up.num);
      ASSERT(up.num == continued_fragment->vtx_count);
      unpack_vtx_positions(&m_verts[continued_fragment->vtx_idx], vif_transfer.data,
                           continued_fragment->vtx_count);
      continued_fragment = nullptr;
      auto call = dma.read_and_advance();
      ASSERT(call.size_bytes == 0);
      ASSERT(call.vifcode1().kind == VifCode::Kind::MSCAL);

      if (check_for_end_of_generic_data(dma, next_bucket)) {
        return;
      }

    } else {
      auto v1 = vif_transfer.vifcode1();
      if (vif_transfer.vifcode0().kind != VifCode::Kind::STCYCL ||
          vif_transfer.vifcode1().kind != VifCode::Kind::UNPACK_V4_32) {
        fmt::print("failing tag: {} {} {}\n", vif_transfer.vifcode0().print(),
                   vif_transfer.vifcode1().print(), vif_transfer.size_bytes);
      }
      ASSERT(vif_transfer.vifcode0().kind == VifCode::Kind::STCYCL);
      ASSERT(v1.kind == VifCode::Kind::UNPACK_V4_32);
      u32 unpack_bytes = v1.num * 16;
      auto& frag = next_frag();
      u32 off = handle_fragments_after_unpack_v4_32(vif_transfer.data, 0, unpack_bytes,
                                                    vif_transfer.size_bytes, &frag, false);

      if (check_for_end_of_generic_data(dma, next_bucket)) {
        return;
      }

      if (off < vif_transfer.size_bytes) {
        u32 stcycl_reset;
        memcpy(&stcycl_reset, vif_transfer.data + off, 4);
        ASSERT(VifCode(stcycl_reset).kind == VifCode::Kind::STCYCL);
        off += 4;
        //    while (off < vif_transfer.size_bytes) {
        u32 next;
        memcpy(&next, vif_transfer.data + off, 4);
        VifCode next_unpack(next);

        ASSERT(next_unpack.kind == VifCode::Kind::UNPACK_V4_32);

        auto& continue_frag = next_frag();
        off = handle_fragments_after_unpack_v4_32(vif_transfer.data, off, next_unpack.num * 16,
                                                  vif_transfer.size_bytes, &continue_frag, true);
        continued_fragment = &continue_frag;
        ASSERT(off == vif_transfer.size_bytes);
        //    }
      }
    }
    vif_transfer = dma.read_and_advance();
    while (is_nop_zero(vif_transfer)) {
      vif_transfer = dma.read_and_advance();
    }
  }

  // ending:
  //  0: NOP NOP
  //  160: FLUSHA DIRECT
  //  0: NOP NOP
  auto end = dma.read_and_advance();
  (void)end;
  ASSERT(next_bucket == dma.current_tag_offset());
}

void unpack_vertex(Generic2::Vertex* out, const u8* in, int count) {
  for (int i = 0; i < count; i++) {
    // st:
    s32 s, t;
    memcpy(&s, in, 4);
    memcpy(&t, in + 4, 4);
    s32 s_masked = s & 0xfffffffe;
    out->st[0] = s_masked;
    out->st[1] = t;
    out->adc = s_masked == s;

    // rgba
    u32 data[4];
    memcpy(data, in + 16, 16);
    for (int j = 0; j < 4; j++) {
      out->rgba[j] = data[j];
    }

    // pos
    float p[4];
    memcpy(p, in + 32, 16);
    for (int j = 0; j < 3; j++) {
      out->xyz[j] = p[j];
    }

    out++;
    in += (16 * 3);
  }
}

void Generic2::process_dma_lightning(DmaFollower& dma, u32 next_bucket) {
  reset_buffers();
  auto first_data = dma.read_and_advance();
  // if unused, sends 0 nop nop
  if (is_nop_zero(first_data) && next_bucket == dma.current_tag_offset()) {
    return;
  }

  // intro:
  //  0: MARK NOP (profiling)
  auto v0k = first_data.vifcode0().kind;
  ASSERT((v0k == VifCode::Kind::MARK || v0k == VifCode::Kind::NOP) &&
         first_data.vifcode1().kind == VifCode::Kind::NOP);
  //  32: NOP DIRECT (set GS registers)
  // for lightning, this is always masking z writes
  auto direct_setup = dma.read_and_advance();
  ASSERT(direct_setup.size_bytes == 32 && direct_setup.vifcode0().kind == VifCode::Kind::NOP &&
         direct_setup.vifcode1().kind == VifCode::Kind::DIRECT);
  m_drawing_config.zmsk = true;

  //  128: STCYCL cl: 4 wl: 4 UNPACK-V4-32: 8 addr: 897 us: false tops: false
  // upload VU1 constants

  /*
 (deftype generic-constants (structure)
  ((fog         vector :inline :offset-assert 0)
   (adgif       gs-gif-tag  :inline :offset-assert 16) ;; was qword
   (hvdf-offset vector :inline :offset-assert 32)
   (hmge-scale  vector :inline :offset-assert 48)
   (invh-scale  vector :inline :offset-assert 64)
   (guard       vector :inline :offset-assert 80)
   (flush       qword  :inline :offset-assert 96)
   (stores      qword  :inline :offset-assert 112)
   )
  )
   */
  auto constants = dma.read_and_advance();
  ASSERT(constants.size_bytes == 128);
  ASSERT(constants.vifcode0().kind == VifCode::Kind::STCYCL);
  ASSERT(constants.vifcode1().kind == VifCode::Kind::UNPACK_V4_32);
  memcpy(&m_drawing_config.pfog0, constants.data + 0, 4);
  memcpy(&m_drawing_config.fog_min, constants.data + 4, 4);
  memcpy(&m_drawing_config.fog_max, constants.data + 8, 4);
  memcpy(m_drawing_config.hvdf_offset.data(), constants.data + 32, 16);

  //  32: MSCALF 0x0 STMOD 0b0
  auto mscalf = dma.read_and_advance();
  ASSERT(mscalf.vifcode0().kind == VifCode::Kind::MSCALF &&
         mscalf.vifcode1().kind == VifCode::Kind::STMOD);
  //  0: NOP NOP
  auto another_nop = dma.read_and_advance();
  ASSERT(is_nop_zero(another_nop));

  auto maybe_first_upload = dma.read_and_advance();
  while (maybe_first_upload.vifcode1().kind == VifCode::Kind::UNPACK_V4_32) {
    auto second_upload = dma.read_and_advance();
    auto mscal = dma.read_and_advance();
    (void)mscal;

    auto* frag = &next_frag();
    ASSERT(maybe_first_upload.size_bytes == Generic2::FRAG_HEADER_SIZE + 5 * 16);  // header + adgif
    memcpy(frag->header, maybe_first_upload.data, Generic2::FRAG_HEADER_SIZE);
    frag->adgif_idx = m_next_free_adgif;
    frag->adgif_count = 1;
    frag->mscal_addr = 6;
    frag->uses_hud = false;
    auto* adgif = &next_adgif();
    memcpy(&adgif->data, maybe_first_upload.data + Generic2::FRAG_HEADER_SIZE, sizeof(AdGifData));
    // (new 'static 'gif-tag-regs-32 :regs0 (gif-reg-id st) :regs1 (gif-reg-id rgbaq) :regs2
    // (gif-reg-id xyzf2))
    int num_vtx = second_upload.size_bytes / (16 * 3);
    frag->vtx_count = num_vtx;
    frag->vtx_idx = m_next_free_vert;
    alloc_vtx(num_vtx);
    unpack_vertex(&m_verts[frag->vtx_idx], second_upload.data, num_vtx);

    // run
    //  192: NOP UNPACK-V4-32: 12 addr: 837 us: false tops: false
    //  1536: NOP UNPACK-V4-32: 96 addr: 9 us: false tops: false
    //  0: NOP MSCAL 0x6

    maybe_first_upload = dma.read_and_advance();
  }

  // ending:
  //  0: NOP NOP
  //  160: FLUSHA DIRECT
  //  0: NOP NOP
  auto flusha = dma.read_and_advance();
  (void)flusha;
  auto end = dma.read_and_advance();
  (void)end;
  ASSERT(next_bucket == dma.current_tag_offset());
}