// The actual "tfrag" type. This only lives on the EE and gets converted to DMA data.
// some of the DMA data is pointers to chains that live in the static level data.
// other is colors that are looked up from the palette (on the EE!) then thrown in the
// double-buffered frame global buffer.
(deftype tfragment (drawable)
(color-index uint16 :offset 6)
(debug-data tfragment-debug-data :offset 8)
(color-indices uint32 :offset 12)
(colors uint32 :offset 12)
(dma-chain uint32 3 :offset-assert 32)
(dma-common uint32 :offset 32)
(dma-level-0 uint32 :offset 32)
(dma-base uint32 :offset 36)
(dma-level-1 uint32 :offset 40)
(dma-qwc uint8 4 :offset 44)
(shader (inline-array adgif-shader) :offset 48)
(num-shaders uint8 :offset 52)
(num-base-colors uint8 :offset 53)
(num-level0-colors uint8 :offset 54)
(num-level1-colors uint8 :offset 55)
(color-offset uint8 :offset 56)
(color-count uint8 :offset 57)
(pad0 uint8 :offset 58)
(pad1 uint8 :offset 59)
(generic generic-tfragment :offset-assert 60)
(generic-u32 uint32 :offset 60) ;; added
:method-count-assert 18
:size-assert #x40
:flag-assert #x1200000040
// This is the temp/debug structure used for the EE code
(deftype tfrag-work (structure)
((base-tmpl dma-packet :inline :offset-assert 0)
(level-0-tmpl dma-packet :inline :offset-assert 16)
(common-tmpl dma-packet :inline :offset-assert 32)
(level-1-tmpl dma-packet :inline :offset-assert 48)
(color-tmpl dma-packet :inline :offset-assert 64)
(frag-dists vector :inline :offset-assert 80)
(max-dist vector :inline :offset-assert 96)
(min-dist vector :inline :offset-assert 112)
(color-ptr vector4w :inline :offset-assert 128)
(tr-stat-tfrag tr-stat :offset-assert 144)
(tr-stat-tfrag-near tr-stat :offset-assert 148)
(vu1-enable-tfrag int32 :offset-assert 152)
(vu1-enable-tfrag-near int32 :offset-assert 156)
(cur-vis-bits uint32 :offset-assert 160)
(end-vis-bits uint32 :offset-assert 164)
(src-ptr uint32 :offset-assert 168)
(last-call uint32 :offset-assert 172)
(dma-buffer basic :offset-assert 176)
(test-id uint32 :offset-assert 180)
(wait-from-spr uint32 :offset-assert 184)
(wait-to-spr uint32 :offset-assert 188)
(near-wait-from-spr uint32 :offset-assert 192)
(near-wait-to-spr uint32 :offset-assert 196)
:method-count-assert 9
:size-assert #xc8
:flag-assert #x9000000c8
// base vifs:
// ??
// t3
// l0:
// ??
// t3
// common:
// ??
// t3
// color
// ??
// 12 sb color-offset
// 14 sb num colors, 4 aligned.
; .function draw-inline-array-tfrag
;; there's two double-buffered spad buffers + culling data
;; arguments:
;; a0 - occlusion cull list (on spad)
;; a1 - tfrags
;; a2 - num tfrags
;; a3 - dma buf
;; constants:
;; t0 = *tfrag-work*
;; t1 = SPR FROM
;; t2 = 0x14000000 ??
;; t4 = SPR TO
;; vars:
;; v1 = ptr to dma buffer data
;; t5 = SPR BUFFER 0 (tfrags)
;; a3 = SPR BUFFER 1
;; t3 = ?? (init to 0)
;; t6 = spr buffer 1 use (qwc)
;; vf3 = frag-dists
;; vf1 = (1, 1, 1, 1)
;; vf2 = bsphere
;; set up constants
daddiu sp, sp, -128
sd ra, 0(sp)
sq s0, 16(sp)
sq s1, 32(sp)
sq s2, 48(sp)
sq s3, 64(sp)
sq s4, 80(sp)
sq s5, 96(sp)
sq gp, 112(sp)
lui t2, 5120 = (0x14000000), a constant (mscal)
lw v1, 4(a3) (-> dma-buf base)
lui t3, 4096 = (0x10000000)
lui t1, 4096 = (0x10000000)
cache dxwbin v1, 0
cache dxwbin v1, 1
lw t0, *tfrag-work*(s7)
ori t4, t3, 54272 = (0x1000D400) SPR TO
ori t1, t1, 53248 = (0x1000D000) SPR FROM
lui t5, 28672 = (0x70000000)
lqc2 vf3, 80(t0) = (-> *tfrag-work* frag-dists)
sw a3, 176(t0) (set! (-> *tfrag-work* dma-buffer) dma-ptr)
ori a3, t5, 2064 setup buffer 1
addiu t3, r0, 0 t3 = 0
ori t5, t5, 1040 setup buffer 0
vmaxw.xyzw vf1, vf0, vf0 vf1 = (1, 1, 1, 1)
lh t7, 0(a0) vis cull load
lqc2 vf4, 96(t0) max-dist
addiu a1, a1, -4 remove basic offset
addiu t6, r0, 0 t6 = 0
or ra, a3, r0 ra = SPAD BUFFER 1
;; skips ahead until we find some visible tfrags.
bne t7, r0, L42
sll r0, r0, 0
addiu a0, a0, 2 ;; + 16 bits in the vis list
addiu a1, a1, 1024 ;; 16 * 0x40 = 1024 bytes in tfrag list
daddiu a2, a2, -16 ;; num tfrags -= 16
lh t7, 0(a0) ;; next vis
blez a2, L69_CLEANUP ;; no visible tfrags, abort!
sll r0, r0, 0
beq r0, r0, L41 ;; keep looking
sll r0, r0, 0
;; waits for any previously running spad dma transfer to end
lw t7, 0(t4)
sll r0, r0, 0
sll r0, r0, 0
sll r0, r0, 0
andi t7, t7, 256
sll r0, r0, 0
bne t7, r0, L42
sll r0, r0, 0
;; initializes the first scratchpad upload of tfrags
sw a1, 16(t4) ;; madr = a1
xori t7, t5, 1024 ;; t7 = upload addr of tfrags double buffer
sw t7, 128(t4) ;; sadr
addiu t7, r0, 64 ;; 64 qw = 16 tfrags
sw t7, 32(t4) ;; qwc
addiu t7, r0, 256 ;; go
sw t7, 0(t4) ;; go!
sll r0, r0, 0
or gp, a0, r0 ;; gp = temp addr of vis list
xori t5, t5, 1024 ;; toggle to addr of upload tfrags
daddiu a0, a0, 2 ;; advance vis list ptr (16 tfrags)
or t9, a0, r0 ;; t9 = temp addr of next vis list
or t8, t5, r0 ;; t8 = tfrags to use
;; next, let's find next block of visible tfrags so we can start it's dma early
daddiu t7, a2, -16 ;; t7 = tfrags left after this loop
bgtz t7, L45 ;; if we have them left, jump
lh t7, 0(a0) ;; and load their vis
beq r0, r0, L48 ;; none left, skip dma kickoff.
sll r0, r0, 0
daddiu a2, a2, -16 ;; skip invisible block (dec tfrag counter)
addiu a0, a0, 2 ;; increment vis list
blez a2, L48 ;; did we get to the end of the tfrag list?
lh t7, 0(a0) ;; check vis again.
sll r0, r0, 0
sll r0, r0, 0
beq t7, r0, L44 ;; we have tfrags left. if 0, they are all hidden, so loop
addiu a1, a1, 1024 ;; and advance upload pointer (not done at all yet)
;; we reach here if we have tfrags left after this block.
;; so let's upload the next ones to the scratchpad so they are ready by next time.
lw t7, 0(t4) ;; make sure to-spr is done
sll r0, r0, 0
sll r0, r0, 0
sll r0, r0, 0
andi t7, t7, 256
sll r0, r0, 0
beq t7, r0, L47
sll r0, r0, 0
sll r0, r0, 0
lw t7, 188(t0)
sll r0, r0, 0
sll r0, r0, 0
sll r0, r0, 0
daddiu t7, t7, 1 ;; counting how many times we wait
sll r0, r0, 0
sw t7, 188(t0)
beq r0, r0, L46
sll r0, r0, 0
sw a1, 16(t4) ;; start the to!
xori t7, t5, 1024
sw t7, 128(t4)
addiu t7, r0, 64
sw t7, 32(t4)
addiu t7, r0, 256
beq r0, r0, L49 ;; skip ahead
sw t7, 0(t4)
;; only reach here if we dont have any more spr to's
;; still need to sync the to for the block we're about to process
lw t7, 0(t4)
sll r0, r0, 0
sll r0, r0, 0
sll r0, r0, 0
andi t7, t7, 256
sll r0, r0, 0
beq t7, r0, L49
sll r0, r0, 0
sll r0, r0, 0
lw t7, 188(t0)
sll r0, r0, 0
sll r0, r0, 0
sll r0, r0, 0
daddiu t7, t7, 1
sll r0, r0, 0
sw t7, 188(t0)
beq r0, r0, L48
sll r0, r0, 0
;; common op start
;; at this point:
;; t8 is our spad tfrag buffer, with 16 tfrags. at least 1 is visible.
;; gp is our vis-list pointer
;; we run through this loop 2x, each time doing 8 tfrags.
lb t7, 0(gp) ;; load first 8 frag vis bits
addiu gp, gp, 1 ;; inc vis bit ptr.
sll r0, r0, 0
sw gp, 160(t0) ;; store cur-vis-bits
bne t7, r0, L50 ;; are any visible in the first 8?
sw t9, 164(t0) ;; set end-vis-bits (why?)
B17: ;; none are visible
daddiu a2, a2, -8 ;; dec tfrags
addiu t8, t8, 512 ;; skip tfrags
beq r0, r0, L65 ;; skip ahead!
sll r0, r0, 0
addiu t9, r0, 128 ;; vis mask init (gets shifted in each run of the 8-loop)
lqc2 vf2, 16(t8) ;; bsphere load
daddiu gp, t6, -124 ;; are we full of stuff in buffer 1?
sll r0, r0, 0
blez gp, L54
sll r0, r0, 0
lw ra, 0(t1) ;; wait for spr-from
sll r0, r0, 0
sll r0, r0, 0
sll r0, r0, 0
andi ra, ra, 256
sll r0, r0, 0
beq ra, r0, L53
sll r0, r0, 0
sll r0, r0, 0 ;; count it
lw ra, 184(t0)
sll r0, r0, 0
sll r0, r0, 0
sll r0, r0, 0
daddiu ra, ra, 1
sll r0, r0, 0
sw ra, 184(t0)
beq r0, r0, L52
sll r0, r0, 0
sw a3, 128(t1) ;; kick off the next spr-from.
xori a3, a3, 6144
sw v1, 16(t1) ;; to the dma-buf
sll ra, t6, 4
addu v1, v1, ra ;; add qwc
or ra, a3, r0 ;; ra is the spad-side dma buffer to write to
sw t6, 32(t1) ;; qwc
addiu t6, r0, 256
sw t6, 0(t1) ;; go!
addiu t6, r0, 0 ;; reset use.
;; actually building dma.
and gp, t7, t9 ;; vis check
vmulax.xyzw acc, vf16, vf2 ;; plane ?
beq gp, r0, L64_8loop_reject ;; vis check failed, reject!
lwu gp, 36(t8) ;; DMA BASE (chain) -------------
vmadday.xyzw acc, vf17, vf2 ;; plane
lbu s5, 45(t8) ;; DMA QWC1
vmaddaz.xyzw acc, vf18, vf2 ;; plane
sw gp, 4(t0) ;; base tmpl set addr
vmsubaw.xyzw acc, vf19, vf0 ;; plane
sh s5, 0(t0) ;; base tmpl set qwc
vmaddw.xyzw vf5, vf1, vf2 ;; plane
lwu gp, 32(t8) ;; DMA level0 -------------
vmulaw.xyzw acc, vf27, vf0 ;; camrot
lbu s5, 47(t8) ;; DMA QWC3
vmaddax.xyzw acc, vf24, vf2 ;; camrot
sw gp, 20(t0) ;; l0 tmpl set addr
vmadday.xyzw acc, vf25, vf2 ;; camrot
sh s5, 16(t0) ;; l0 tmpl set qwc
vmaddaz.xyzw acc, vf26, vf2 ;; camrot
lwu gp, 32(t8) ;; DMA common --------------
qmfc2.i s5, vf5 ;; plane
lbu s4, 44(t8) ;; DMA QWC0
vmaddw.xyzw vf6, vf1, vf2 ;; ??
sw gp, 36(t0) ;; common tmpl set addr
vmsubw.xyzw vf8, vf1, vf2 ;; ??
sh s4, 32(t0) ;; common tmpl set qwc
pcgtw s5, r0, s5 ;; plane check
lwu gp, 40(t8) ;; DMA level1 -------------
ppach s5, r0, s5 ;; plane check
lbu s4, 46(t8) ;; DMA QWC2
vaddz.xyzw vf6, vf3, vf6 ;; dist
sw gp, 52(t0) ;; l1 tmpl addr
vaddz.xyzw vf7, vf3, vf8 ;; dist
sw t3, 12(t0) ;; !!! set a vif on base, 0 on the first round, at least.
bne s5, r0, L63_8loop_reject_tog_vis
sh s4, 48(t0) ;; l1 tmpl qwc
vmini.xyzw vf4, vf4, vf8 ;; max dist
sw t3, 28(t0) ;; !!! set a vif on l0
sll r0, r0, 0
lbu s5, 53(t8) ;; s5 = num-base-colors
qmfc2.i gp, vf6 ;; dist
sw t3, 44(t0) ;; !!! set a vif on common
qmfc2.i s3, vf7 ;; dist
lbu s4, 56(t8) ;; s4 = color-offset
pcgtw s2, r0, gp ;; dist
lw gp, 12(t8) ;; gp = colors-indices
pcgtw s3, r0, s3 ;; dist
sb s4, 76(t0) ;; store color-offset
pinteh s4, s2, s3 ;; dist
lbu s2, 54(t8) ;; s2 = num-level0-colors
ppacb s3, r0, s4 ;; dist
lbu s1, 55(t8) ;; s1 = num-level1-colors
beq s3, r0, L56 ;; jump if dist fails?
dsrl32 s4, s3, 8 ;; s4 is the level or something?
beq s2, r0, L56 ;; if we have no level0 colors, use base
sll r0, r0, 0
beq s1, r0, L55 ;; if we have no level1 colors, use level0
dsrl s5, s3, 16
beq s5, r0, L55 ;; possible l1 skip based on lod
dsrl32 s5, s3, 24
bne s5, r0, L64_8loop_reject ;; possible all skip based on lod.
addiu s5, s1, 3 ;; s5 = num-level1-colors + 3
B30: ;; level 1 color setup
sra s4, s5, 2 ;; s4 = (num_color + 3) >> 4
or s5, s1, r0 ;; s5 = (num_color)
sll t3, s4, 2 ;; t3 = num colors, 4 aligned
sh s4, 64(t0) ;; color-tmpl qwc.
sll r0, r0, 0
sb t3, 78(t0) ;; vif store
daddiu t6, t6, 3 ;; use 3 qw's of global dma.
lq s2, 32(t0) ;; load the common-tmpl!
sll r0, r0, 0
lq s1, 48(t0) ;; load the l1 tmpl!
sll r0, r0, 0
lq t3, 64(t0) ;; load the color tmpl!
sq s2, 0(ra) ;; store the common!
sll r0, r0, 0
sq s1, 16(ra) ;; store the l1!
dsrl32 s2, s3, 16
sq t3, 32(ra) ;; store the color
daddiu ra, ra, 48 ;; advance the dma buffer pointer
bne s2, r0, L57
ori t3, t2, 18 ;; is this.. program 18?
dsrl32 t3, s3, 8
sll r0, r0, 0
bne t3, r0, L57
ori t3, t2, 16
beq r0, r0, L57
ori t3, t2, 14
bne s4, r0, L64_8loop_reject
addiu s5, s2, 3 ;; l0 colors + 3
B34: l0 color setup
sra s4, s5, 2 ;; >> 2
or s5, s2, r0 ;; s5 = qwc
sll t3, s4, 2 ;; << 2
sh s4, 64(t0) ;; color-tmpl qwc
sll r0, r0, 0
sb t3, 78(t0) ;; vif store for unpack??
daddiu t6, t6, 2 ;; only 2 qw's
lq s2, 16(t0) ;; l0 tmpl
sll r0, r0, 0
lq t3, 64(t0) ;; color tmp
sq s2, 0(ra)
dsrl s3, s3, 8
sq t3, 16(ra)
daddiu ra, ra, 32
bne s3, r0, L57
ori t3, t2, 10
beq r0, r0, L57
ori t3, t2, 8
bne s4, r0, L64_8loop_reject
addiu s4, s5, 3 ;; base colors + 3
sra s4, s4, 2
sll r0, r0, 0
sll t3, s4, 2
sh s4, 64(t0)
sll r0, r0, 0
sb t3, 78(t0)
ori t3, t2, 6
lq s3, 0(t0) ;; base
daddiu t6, t6, 2
lq s2, 64(t0) ;; color
sq s3, 0(ra)
sll r0, r0, 0
sq s2, 16(ra)
daddiu ra, ra, 32
;; END of the color setup
L57: ;; another opportunity to do some spad swappin
addiu s3, r0, 127 ;; s3 = 127
daddu s2, t6, s4 ;; s2 = dma-use + color qwc
dsubu s3, s3, s2
sll r0, r0, 0
bgez s3, L60
sll r0, r0, 0
lw ra, 0(t1)
sll r0, r0, 0
sll r0, r0, 0
sll r0, r0, 0
andi ra, ra, 256
sll r0, r0, 0
beq ra, r0, L59
sll r0, r0, 0
sll r0, r0, 0
lw ra, 184(t0)
sll r0, r0, 0
sll r0, r0, 0
sll r0, r0, 0
daddiu ra, ra, 1
sll r0, r0, 0
sw ra, 184(t0)
beq r0, r0, L58
sll r0, r0, 0
sw a3, 128(t1)
xori a3, a3, 6144
sw v1, 16(t1)
sll ra, t6, 4
addu v1, v1, ra
or ra, a3, r0
sw t6, 32(t1)
addiu t6, r0, 256
sw t6, 0(t1)
addiu t6, r0, 0
daddu t6, t6, s4 ;; add color imm's to dma buffer length
sw t8, 168(t0) ;; back up tfrag... not enough regs
ld s4, 0(gp) ;; load color-indices (u64 = u16 x 4)
daddiu t8, gp, 8 ;; inc colors ptr
daddiu gp, s5, -4 ;; gp is color counter. we're using the rounded up to 4 color count.
lq s5, 128(t0) ;; color-ptr x4
pextlh s4, r0, s4 ;; expand packed u16's to u32's
mfc1 r0, f31 ;; nop
paddw s2, s4, s5 ;; add to color pointers
mfc1 r0, f31
lw s4, 0(s2) ;; s4 = colors[0]
dsra32 s3, s2, 0
lw s3, 0(s3) ;; s5 = colors[1]
pcpyud s1, s2, s2
lw s2, 0(s1) ;; s2 = colors[2]
dsra32 s1, s1, 0
blez gp, L62
lw s1, 0(s1) ;; s1 = colors[3]
ld s0, 0(t8)
daddiu ra, ra, 16
daddiu t8, t8, 8
sw s4, -16(ra)
daddiu gp, gp, -4
sw s3, -12(ra)
pextlh s4, r0, s0
sw s2, -8(ra)
paddw s2, s4, s5
sw s1, -4(ra)
lw s4, 0(s2)
dsra32 s3, s2, 0
lw s3, 0(s3)
pcpyud s1, s2, s2
lw s2, 0(s1)
dsra32 s1, s1, 0
bgtz gp, L61
lw s1, 0(s1)
daddiu ra, ra, 16
lw t8, 168(t0)
sll r0, r0, 0
sw s4, -16(ra)
sll r0, r0, 0
sw s3, -12(ra)
sll r0, r0, 0
sw s2, -8(ra)
sll r0, r0, 0
sw s1, -4(ra)
xor t7, t7, t9 ;; update vis
sll r0, r0, 0
daddiu t8, t8, 64
srl t9, t9, 1
addiu a2, a2, -1
sll r0, r0, 0
bne t9, r0, L51
lqc2 vf2, 16(t8)
sll r0, r0, 0
lw gp, 160(t0)
sll r0, r0, 0
lw t9, 164(t0)
bne gp, t9, L49
sb t7, -1(gp)
bgtz a2, L43_MAIN_LOOP_TOP
sll r0, r0, 0
beq t6, r0, L68
sll r0, r0, 0
lw a0, 0(t1)
sll r0, r0, 0
sll r0, r0, 0
sll r0, r0, 0
andi a0, a0, 256
sll r0, r0, 0
beq a0, r0, L67
sll r0, r0, 0
sll r0, r0, 0
lw a0, 184(t0)
sll r0, r0, 0
sll r0, r0, 0
sll r0, r0, 0
daddiu a0, a0, 1
sll r0, r0, 0
sw a0, 184(t0)
beq r0, r0, L66
sll r0, r0, 0
sw a3, 128(t1)
xori a0, a3, 6144
sw v1, 16(t1)
sll a1, t6, 4
addu v1, v1, a1
or a0, a0, r0
sw t6, 32(t1)
addiu a0, r0, 256
sw a0, 0(t1)
addiu a0, r0, 0
lw a0, 0(t1)
sll r0, r0, 0
sll r0, r0, 0
sll r0, r0, 0
andi a0, a0, 256
sll r0, r0, 0
beq a0, r0, L69_CLEANUP
sll r0, r0, 0
sll r0, r0, 0
lw a0, 184(t0)
sll r0, r0, 0
sll r0, r0, 0
sll r0, r0, 0
daddiu a0, a0, 1
sll r0, r0, 0
sw a0, 184(t0)
beq r0, r0, L68
sll r0, r0, 0
lw a0, 176(t0)
sll r0, r0, 0
sw t3, 172(t0)
sll r0, r0, 0
sqc2 vf4, 112(t0)
sll r0, r0, 0
sw v1, 4(a0)
sll r0, r0, 0
or v0, r0, r0
ld ra, 0(sp)
lq gp, 112(sp)
lq s5, 96(sp)
lq s4, 80(sp)
lq s3, 64(sp)
lq s2, 48(sp)
lq s1, 32(sp)
lq s0, 16(sp)
jr ra
daddiu sp, sp, 128
sll r0, r0, 0
sll r0, r0, 0
sll r0, r0, 0
Notes on the VU program
vi03 is a pointer to an "address book" - a sequence of addresses
vi02 contains addresses in this book
from these xyw are loaded for vf28 (v3-32, with 2, 1)
xy are floats. w is address of next vertex data.
vi08 is a pointer to adgifs?
vi09 is a pointer to some data like [vi12, ?, ?, vi13] ??
vi12 counter, started negative?
vi13 is adgif offset?
vi04 is a pointer to tri-data:
- vertex (w = 128.0?)
- ?? (vf20)