jak-project/game/mips2c/mips2c_private.h

1679 lines
42 KiB
C
Raw Normal View History

#pragma once
#include <cfloat>
#include <cmath>
#include <cstring>
#include "common/common_types.h"
#include "common/dma/dma.h"
#include "common/util/Assert.h"
#include "common/util/BitUtils.h"
#include "game/common/vu.h"
#include "game/mips2c/mips2c_table.h"
#include "third-party/fmt/core.h"
// This file contains utility functions for code generated by the mips2c pass.
// This is only useful for
extern u8* g_ee_main_mem;
extern "C" {
u64 _call_goal8_asm_linux(void* func, u64* arg_array, u64 zero, u64 pp, u64 st, void* off);
u64 _call_goal8_asm_win32(void* func, u64* arg_array, u64 zero, u64 pp, u64 st, void* off);
}
namespace Mips2C {
// nicknames for GPRs
enum Gpr {
r0 = 0, // hardcoded to zero
at = 1, // temp, not used by GOAL compiler, but used by GOAL's kernel inline assembly (an other
// places?)
v0 = 2, // return, temp
v1 = 3, // temp
a0 = 4, // arg0, temp
a1 = 5, // arg1, temp
a2 = 6, // arg2, temp
a3 = 7, // arg3, temp
t0 = 8, // arg4, temp
t1 = 9, // arg5, temp
t2 = 10, // arg6, temp
t3 = 11, // arg7, temp
t4 = 12, // temp
t5 = 13, // temp
t6 = 14, // temp
t7 = 15, // temp
s0 = 16, // saved
s1 = 17, // saved
s2 = 18, // saved
s3 = 19, // saved
s4 = 20, // saved
s5 = 21, // saved
s6 = 22, // process pointer
s7 = 23, // symbol table
t8 = 24, // temp
t9 = 25, // function pointer
k0 = 26, // reserved
k1 = 27, // reserved
gp = 28, // saved (C code uses this a global pointer)
sp = 29, // stack pointer
fp = 30, // global pointer (address of current function)
ra = 31, // return address
MAX_GPR = 32
};
enum VfName {
vf00 = 0,
vf0 = 0,
vf1 = 1,
vf01 = 1,
vf2 = 2,
vf02 = 2,
vf3 = 3,
vf03 = 3,
vf4 = 4,
vf04 = 4,
vf5 = 5,
vf05 = 5,
vf6 = 6,
vf06 = 6,
vf7 = 7,
vf07 = 7,
vf8 = 8,
vf08 = 8,
vf9 = 9,
vf09 = 9,
vf10 = 10,
vf11 = 11,
vf12 = 12,
vf13 = 13,
vf14 = 14,
vf15 = 15,
vf16 = 16,
vf17 = 17,
vf18 = 18,
vf19 = 19,
vf20 = 20,
vf21 = 21,
vf22 = 22,
vf23 = 23,
vf24 = 24,
vf25 = 25,
vf26 = 26,
vf27 = 27,
vf28 = 28,
vf29 = 29,
vf30 = 30,
vf31 = 31,
};
enum ViName {
vi00 = 0,
vi01 = 1,
vi02 = 2,
vi03 = 3,
vi04 = 4,
vi05 = 5,
vi06 = 6,
vi07 = 7,
vi08 = 8,
vi09 = 9,
vi10 = 10,
vi11 = 11,
vi12 = 12,
vi13 = 13,
vi14 = 14,
};
enum FprName {
f0 = 0,
f1 = 1,
f2 = 2,
f3 = 3,
f4 = 4,
f5 = 5,
f6 = 6,
f7 = 7,
f8 = 8,
f9 = 9,
f10 = 10,
f11 = 11,
f12 = 12,
f13 = 13,
f14 = 14,
f15 = 15,
f16 = 16,
f17 = 17,
f18 = 18,
f19 = 19,
f20 = 20,
f21 = 21,
f22 = 22,
f23 = 23,
f24 = 24,
f25 = 25,
f26 = 26,
f27 = 27,
f28 = 28,
f29 = 29,
f30 = 30,
f31 = 31,
};
// note: these are not the same as the ps2 encoding - in these the least significant bit is x.
enum class DEST {
NONE = 0,
x = 1,
y = 2,
xy = 3,
z = 4,
xz = 5,
yz = 6,
xyz = 7,
w = 8,
xw = 9,
yw = 10,
xyw = 11,
zw = 12,
xzw = 13,
yzw = 14,
xyzw = 15
};
enum class BC { x = 0, y = 1, z = 2, w = 3 };
struct Mips2c_vf {
union {
u64 du64[2];
s64 ds64[2];
u32 du32[4];
s32 ds32[4];
u16 du16[8];
s16 ds16[8];
u8 du8[16];
s8 ds8[16];
float f[4];
Vf vf;
};
};
struct Mips2c_acc {
union {
u64 du64[2];
s64 ds64[2];
u32 du32[4];
s32 ds32[4];
u16 du16[8];
s16 ds16[8];
u8 du8[16];
s8 ds8[16];
float f[4];
Accumulator vf;
};
};
struct ExecutionContext {
// EE general purpose registers
u128 gprs[32];
// EE fprs
float fprs[32];
// VU0 vf registers
Mips2c_vf vfs[32];
Mips2c_acc acc;
float Q;
float I;
u128 hi, lo;
void copy_vfs_from_other(const ExecutionContext* other) {
for (int i = 0; i < 32; i++) {
vfs[i] = other->vfs[i];
}
}
Mips2c_vf vf_src(int idx) {
if (idx == 0) {
Mips2c_vf result;
result.f[0] = 0;
result.f[1] = 0;
result.f[2] = 0;
result.f[3] = 1.f;
return result;
} else {
return vfs[idx];
}
}
u128 gpr_src(int idx) {
if (idx == 0) {
u128 result;
result.du64[0] = 0;
result.du64[1] = 0;
return result;
} else {
return gprs[idx];
}
}
u64 sgpr64(int idx) { return gpr_src(idx).du64[0]; }
u32 gpr_addr(int idx) { return gpr_src(idx).du32[0]; }
void load_symbol(int gpr, void* sym_addr) {
s32 val;
memcpy(&val, sym_addr, 4);
gprs[gpr].ds64[0] = val; // sign extend and set
}
void load_symbol2(int gpr, void* sym_addr) {
s32 val;
memcpy(&val, (u8*)sym_addr - 1, 4);
gprs[gpr].ds64[0] = val; // sign extend and set
}
void store_symbol2(int gpr, void* sym_addr) { memcpy((u8*)sym_addr - 1, &gprs[gpr].ds32[0], 4); }
void load_symbol_addr(int gpr, void* sym_addr) {
gprs[gpr].du64[0] = ((const u8*)sym_addr) - g_ee_main_mem;
}
void lbu(int dst, int offset, int src) {
u8 val;
memcpy(&val, g_ee_main_mem + gpr_src(src).du32[0] + offset, 1);
gprs[dst].du64[0] = val;
}
void lb(int dst, int offset, int src) {
s8 val;
memcpy(&val, g_ee_main_mem + gpr_src(src).du32[0] + offset, 1);
gprs[dst].du64[0] = val;
}
void lqc2(int vf, int offset, int gpr) {
ASSERT(((gpr_src(gpr).du32[0] + offset) & 0xf) == 0);
memcpy(&vfs[vf], g_ee_main_mem + gpr_src(gpr).du32[0] + offset, 16);
}
void lwc1(int dst, int offset, int gpr) {
memcpy(&fprs[dst], g_ee_main_mem + gpr_src(gpr).du32[0] + offset, 4);
}
void lw(int dst, int offset, int src) {
s32 val;
memcpy(&val, g_ee_main_mem + gpr_src(src).du32[0] + offset, 4);
gprs[dst].ds64[0] = val;
}
void lw_float_constant(int dst, u32 src) {
s32 val = src;
gprs[dst].ds64[0] = val;
}
void lh(int dst, int offset, int src) {
s16 val;
memcpy(&val, g_ee_main_mem + gpr_src(src).du32[0] + offset, 2);
gprs[dst].ds64[0] = val;
}
void lhu(int dst, int offset, int src) {
u16 val;
memcpy(&val, g_ee_main_mem + gpr_src(src).du32[0] + offset, 2);
gprs[dst].du64[0] = val;
}
void lwu(int dst, int offset, int src) {
u32 val;
memcpy(&val, g_ee_main_mem + gpr_src(src).du32[0] + offset, 4);
gprs[dst].du64[0] = val;
}
void lq(int dst, int offset, int src) {
memcpy(&gprs[dst].du64[0], g_ee_main_mem + ((gpr_addr(src) + offset) & (~15)), 16);
}
void ld(int dst, int offset, int src) {
memcpy(&gprs[dst].du64[0], g_ee_main_mem + gpr_addr(src) + offset, 8);
}
void sw(int src, int offset, int addr) {
auto s = gpr_src(src);
memcpy(g_ee_main_mem + gpr_addr(addr) + offset, &s.du32[0], 4);
}
void jalr(u32 addr) {
// u64 _call_goal8_asm_linux(u64 func, u64* arg_array, u64 zero, u64 pp, u64 st, u64 off);
u64 args[8] = {gprs[a0].du64[0], gprs[a1].du64[0], gprs[a2].du64[0], gprs[a3].du64[0],
gprs[t0].du64[0], gprs[t1].du64[0], gprs[t2].du64[0], gprs[t3].du64[0]};
#ifdef __linux__
2023-04-16 18:05:35 -04:00
ASSERT(addr);
gprs[v0].du64[0] = _call_goal8_asm_linux(g_ee_main_mem + addr, args, 0, gprs[s6].du64[0],
gprs[s7].du64[0], g_ee_main_mem);
#elif _WIN32
gprs[v0].du64[0] = _call_goal8_asm_win32(g_ee_main_mem + addr, args, 0, gprs[s6].du64[0],
gprs[s7].du64[0], g_ee_main_mem);
#endif
}
void sb(int src, int offset, int addr) {
auto s = gpr_src(src);
memcpy(g_ee_main_mem + gpr_addr(addr) + offset, &s.du32[0], 1);
}
void sh(int src, int offset, int addr) {
auto s = gpr_src(src);
memcpy(g_ee_main_mem + gpr_addr(addr) + offset, &s.du32[0], 2);
}
void sd(int src, int offset, int addr) {
auto s = gpr_src(src);
memcpy(g_ee_main_mem + gpr_addr(addr) + offset, &s.du32[0], 8);
}
void sq(int src, int offset, int addr) {
auto s = gpr_src(src);
ASSERT((offset & 15) == 0);
memcpy(g_ee_main_mem + gpr_addr(addr) + offset, &s.du32[0], 16);
}
void sqc2(int src, int offset, int addr) {
auto s = vf_src(src);
ASSERT(((gpr_addr(addr) + offset) & 0xf) == 0);
memcpy(g_ee_main_mem + gpr_addr(addr) + offset, &s.du32[0], 16);
}
void swc1(int src, int offset, int addr) {
memcpy(g_ee_main_mem + gpr_addr(addr) + offset, &fprs[src], 4);
}
void vadd_bc(DEST mask, BC bc, int dest, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dest].f[i] = s0.f[i] + s1.f[(int)bc];
}
}
}
void vmini_bc(DEST mask, BC bc, int dest, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dest].f[i] = std::min(s0.f[i], s1.f[(int)bc]);
}
}
}
void vmax_bc(DEST mask, BC bc, int dest, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dest].f[i] = std::max(s0.f[i], s1.f[(int)bc]);
}
}
}
void pextuh(int dst, int src0, int src1) {
auto s0 = gpr_src(src0);
auto s1 = gpr_src(src1);
gprs[dst].du16[0] = s1.du16[4];
gprs[dst].du16[1] = s0.du16[4];
gprs[dst].du16[2] = s1.du16[5];
gprs[dst].du16[3] = s0.du16[5];
gprs[dst].du16[4] = s1.du16[6];
gprs[dst].du16[5] = s0.du16[6];
gprs[dst].du16[6] = s1.du16[7];
gprs[dst].du16[7] = s0.du16[7];
}
void pextuw(int dst, int src0, int src1) {
auto s0 = gpr_src(src0);
auto s1 = gpr_src(src1);
gprs[dst].du32[0] = s1.du32[2];
gprs[dst].du32[1] = s0.du32[2];
gprs[dst].du32[2] = s1.du32[3];
gprs[dst].du32[3] = s0.du32[3];
}
void pextlw(int dst, int src0, int src1) {
auto s0 = gpr_src(src0);
auto s1 = gpr_src(src1);
gprs[dst].du32[0] = s1.du32[0];
gprs[dst].du32[1] = s0.du32[0];
gprs[dst].du32[2] = s1.du32[1];
gprs[dst].du32[3] = s0.du32[1];
}
void pcpyud(int dst, int src0, int src1) {
auto s0 = gpr_src(src0);
auto s1 = gpr_src(src1);
gprs[dst].du64[0] = s0.du64[1];
gprs[dst].du64[1] = s1.du64[1];
}
void pcpyld(int dst, int src0, int src1) {
auto s0 = gpr_src(src0);
auto s1 = gpr_src(src1);
gprs[dst].du64[1] = s0.du64[0];
gprs[dst].du64[0] = s1.du64[0];
}
void pexew(int dst, int src) {
auto s = gpr_src(src);
gprs[dst].du32[0] = s.du32[2];
gprs[dst].du32[1] = s.du32[1];
gprs[dst].du32[2] = s.du32[0];
gprs[dst].du32[3] = s.du32[3];
}
void pcgtw(int rd, int rs, int rt) {
auto s = gpr_src(rs);
auto t = gpr_src(rt);
for (int i = 0; i < 4; i++) {
if (s.ds32[i] > t.ds32[i]) {
gprs[rd].du32[i] = 0xffffffff;
} else {
gprs[rd].du32[i] = 0;
}
}
}
void ppacw(int rd, int rs, int rt) {
auto s = gpr_src(rs);
auto t = gpr_src(rt);
gprs[rd].du32[0] = t.du32[0];
gprs[rd].du32[1] = t.du32[2];
gprs[rd].du32[2] = s.du32[0];
gprs[rd].du32[3] = s.du32[2];
}
void ppach(int rd, int rs, int rt) {
auto s = gpr_src(rs);
auto t = gpr_src(rt);
gprs[rd].du16[0] = t.du16[0];
gprs[rd].du16[1] = t.du16[2];
gprs[rd].du16[2] = t.du16[4];
gprs[rd].du16[3] = t.du16[6];
gprs[rd].du16[4] = s.du16[0];
gprs[rd].du16[5] = s.du16[2];
gprs[rd].du16[6] = s.du16[4];
gprs[rd].du16[7] = s.du16[6];
}
void pinteh(int rd, int rs, int rt) {
auto s = gpr_src(rs);
auto t = gpr_src(rt);
gprs[rd].du16[0] = t.du16[0];
gprs[rd].du16[1] = s.du16[0];
gprs[rd].du16[2] = t.du16[2];
gprs[rd].du16[3] = s.du16[2];
gprs[rd].du16[4] = t.du16[4];
gprs[rd].du16[5] = s.du16[4];
gprs[rd].du16[6] = t.du16[6];
gprs[rd].du16[7] = s.du16[6];
}
void ppacb(int rd, int rs, int rt) {
auto s = gpr_src(rs);
auto t = gpr_src(rt);
gprs[rd].du8[0] = t.du8[0];
gprs[rd].du8[1] = t.du8[2];
gprs[rd].du8[2] = t.du8[4];
gprs[rd].du8[3] = t.du8[6];
gprs[rd].du8[4] = t.du8[8];
gprs[rd].du8[5] = t.du8[10];
gprs[rd].du8[6] = t.du8[12];
gprs[rd].du8[7] = t.du8[14];
gprs[rd].du8[8] = s.du8[0];
gprs[rd].du8[9] = s.du8[2];
gprs[rd].du8[10] = s.du8[4];
gprs[rd].du8[11] = s.du8[6];
gprs[rd].du8[12] = s.du8[8];
gprs[rd].du8[13] = s.du8[10];
gprs[rd].du8[14] = s.du8[12];
gprs[rd].du8[15] = s.du8[14];
}
void paddw(int rd, int rs, int rt) {
auto s = gpr_src(rs);
auto t = gpr_src(rt);
for (int i = 0; i < 4; i++) {
gprs[rd].du32[i] = s.du32[i] + t.du32[i];
}
}
void paddb(int rd, int rs, int rt) {
auto s = gpr_src(rs);
auto t = gpr_src(rt);
for (int i = 0; i < 16; i++) {
gprs[rd].du8[i] = s.du8[i] + t.du8[i];
}
}
void pextub(int rd, int rs, int rt) {
auto s = gpr_src(rs);
auto t = gpr_src(rt);
gprs[rd].du8[0] = t.du8[8];
gprs[rd].du8[1] = s.du8[8];
gprs[rd].du8[2] = t.du8[9];
gprs[rd].du8[3] = s.du8[9];
gprs[rd].du8[4] = t.du8[10];
gprs[rd].du8[5] = s.du8[10];
gprs[rd].du8[6] = t.du8[11];
gprs[rd].du8[7] = s.du8[11];
gprs[rd].du8[8] = t.du8[12];
gprs[rd].du8[9] = s.du8[12];
gprs[rd].du8[10] = t.du8[13];
gprs[rd].du8[11] = s.du8[13];
gprs[rd].du8[12] = t.du8[14];
gprs[rd].du8[13] = s.du8[14];
gprs[rd].du8[14] = t.du8[15];
gprs[rd].du8[15] = s.du8[15];
}
void pmulth(int rd, int rs, int rt) {
// ASSERT(rd == 0);
s32 temp;
auto s = gpr_src(rs);
auto t = gpr_src(rt);
temp = (s32)s.ds16[0] * (s32)t.ds16[0];
lo.du32[0] = temp;
if (rd)
gprs[rd].du32[0] = temp;
temp = (s32)s.ds16[1] * (s32)t.ds16[1];
lo.du32[1] = temp;
temp = (s32)s.ds16[2] * (s32)t.ds16[2];
hi.du32[0] = temp;
if (rd)
gprs[rd].du32[1] = temp;
temp = (s32)s.ds16[3] * (s32)t.ds16[3];
hi.du32[1] = temp;
temp = (s32)s.ds16[4] * (s32)t.ds16[4];
lo.du32[2] = temp;
if (rd)
gprs[rd].du32[2] = temp;
temp = (s32)s.ds16[5] * (s32)t.ds16[5];
lo.du32[3] = temp;
temp = (s32)s.ds16[6] * (s32)t.ds16[6];
hi.du32[2] = temp;
if (rd)
gprs[rd].du32[3] = temp;
temp = (s32)s.ds16[7] * (s32)t.ds16[7];
hi.du32[3] = temp;
}
void pmaddh(int rd, int rs, int rt) {
s32 temp;
auto s = gpr_src(rs);
auto t = gpr_src(rt);
temp = lo.du32[0] + (s32)s.ds16[0] * (s32)t.ds16[0];
lo.du32[0] = temp;
if (rd)
gprs[rd].du32[0] = temp;
temp = lo.du32[1] + (s32)s.ds16[1] * (s32)t.ds16[1];
lo.du32[1] = temp;
temp = hi.du32[0] + (s32)s.ds16[2] * (s32)t.ds16[2];
hi.du32[0] = temp;
if (rd)
gprs[rd].du32[1] = temp;
temp = hi.du32[1] + (s32)s.ds16[3] * (s32)t.ds16[3];
hi.du32[1] = temp;
temp = lo.du32[2] + (s32)s.ds16[4] * (s32)t.ds16[4];
lo.du32[2] = temp;
if (rd)
gprs[rd].du32[2] = temp;
temp = lo.du32[3] + (s32)s.ds16[5] * (s32)t.ds16[5];
lo.du32[3] = temp;
temp = hi.du32[2] + (s32)s.ds16[6] * (s32)t.ds16[6];
hi.du32[2] = temp;
if (rd)
gprs[rd].du32[3] = temp;
temp = hi.du32[3] + (s32)s.ds16[7] * (s32)t.ds16[7];
hi.du32[3] = temp;
}
void psrlh(int dest, int src, int sa) {
auto s = gpr_src(src);
for (int i = 0; i < 8; i++) {
gprs[dest].du16[i] = s.du16[i] >> (sa & 0xf);
}
}
void psraw(int dest, int src, int sa) {
auto s = gpr_src(src);
for (int i = 0; i < 4; i++) {
gprs[dest].du32[i] = s.ds32[i] >> (sa & 0x1f);
}
}
void psllw(int dest, int src, int sa) {
auto s = gpr_src(src);
for (int i = 0; i < 4; i++) {
gprs[dest].ds32[i] = s.ds32[i] << (sa & 0x1f);
}
}
void psllh(int dest, int src, int sa) {
auto s = gpr_src(src);
for (int i = 0; i < 8; i++) {
gprs[dest].ds16[i] = s.ds16[i] << (sa & 0x1f);
}
}
void prot3w(int dst, int src) {
auto s = gpr_src(src);
gprs[dst].du32[0] = s.du32[1];
gprs[dst].du32[1] = s.du32[2];
gprs[dst].du32[2] = s.du32[0];
gprs[dst].du32[3] = s.du32[3];
}
void paddh(int dest, int rs, int rt) {
auto s = gpr_src(rs);
auto t = gpr_src(rt);
for (int i = 0; i < 8; i++) {
gprs[dest].du16[i] = s.du16[i] + t.du16[i];
}
}
2023-04-16 18:05:35 -04:00
void psubh(int dest, int rs, int rt) {
auto s = gpr_src(rs);
auto t = gpr_src(rt);
for (int i = 0; i < 8; i++) {
gprs[dest].du16[i] = s.du16[i] - t.du16[i];
}
}
void pminh(int dest, int rs, int rt) {
auto s = gpr_src(rs);
auto t = gpr_src(rt);
for (int i = 0; i < 8; i++) {
if (s.ds16[i] < t.ds16[i])
gprs[dest].du16[i] = s.ds16[i];
else
gprs[dest].du16[i] = t.ds16[i];
}
}
void pand(int dst, int rs, int rt) {
auto s = gpr_src(rs);
auto t = gpr_src(rt);
gprs[dst].du64[0] = s.du64[0] & t.du64[0];
gprs[dst].du64[1] = s.du64[1] & t.du64[1];
}
void pceqb(int dst, int rs, int rt) {
auto s = gpr_src(rs);
auto t = gpr_src(rt);
for (int i = 0; i < 16; i++) {
gprs[dst].du8[i] = (s.du8[i] == t.du8[i]) ? 0xff : 0;
}
}
void pceqw(int dst, int rs, int rt) {
auto s = gpr_src(rs);
auto t = gpr_src(rt);
for (int i = 0; i < 4; i++) {
gprs[dst].du32[i] = (s.du32[i] == t.du32[i]) ? 0xffffffff : 0;
}
}
void pmfhl_lh(int dest) {
gprs[dest].du16[0] = lo.du16[0];
gprs[dest].du16[1] = lo.du16[2];
gprs[dest].du16[2] = hi.du16[0];
gprs[dest].du16[3] = hi.du16[2];
gprs[dest].du16[4] = lo.du16[4];
gprs[dest].du16[5] = lo.du16[6];
gprs[dest].du16[6] = hi.du16[4];
gprs[dest].du16[7] = hi.du16[6];
}
void pmfhl_uw(int dest) {
gprs[dest].du32[0] = lo.du32[1];
gprs[dest].du32[1] = hi.du32[1];
gprs[dest].du32[2] = lo.du32[3];
gprs[dest].du32[3] = hi.du32[3];
}
void pmfhl_lw(int dest) {
gprs[dest].du32[0] = lo.du32[0];
gprs[dest].du32[1] = hi.du32[0];
gprs[dest].du32[2] = lo.du32[2];
gprs[dest].du32[3] = hi.du32[2];
}
void vsub_bc(DEST mask, BC bc, int dest, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dest].f[i] = s0.f[i] - s1.f[(int)bc];
}
}
}
void vmul_bc(DEST mask, BC bc, int dest, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dest].f[i] = s0.f[i] * s1.f[(int)bc];
}
}
}
void vmul(DEST mask, int dest, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dest].f[i] = s0.f[i] * s1.f[i];
}
}
}
void vadd(DEST mask, int dest, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dest].f[i] = s0.f[i] + s1.f[i];
}
}
}
void vmini(DEST mask, int dest, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dest].f[i] = std::min(s0.f[i], s1.f[i]);
}
}
}
void vmax(DEST mask, int dest, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dest].f[i] = std::max(s0.f[i], s1.f[i]);
}
}
}
2022-11-11 16:23:26 -05:00
void vmr32(DEST mask, int dest, int src) {
auto s = vf_src(src);
if ((u64)mask & 1) {
vfs[dest].f[0] = s.f[1];
}
if ((u64)mask & 2) {
vfs[dest].f[1] = s.f[2];
}
if ((u64)mask & 4) {
vfs[dest].f[2] = s.f[3];
}
if ((u64)mask & 8) {
vfs[dest].f[3] = s.f[0];
}
}
void vsub(DEST mask, int dest, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dest].f[i] = s0.f[i] - s1.f[i];
}
}
}
void vmula_bc(DEST mask, BC bc, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
acc.f[i] = s0.f[i] * s1.f[(int)bc];
}
}
}
void vmula(DEST mask, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
acc.f[i] = s0.f[i] * s1.f[i];
}
}
}
void vadda_bc(DEST mask, BC bc, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
acc.f[i] = s0.f[i] + s1.f[(int)bc];
}
}
}
void vmadda_bc(DEST mask, BC bc, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
acc.f[i] += s0.f[i] * s1.f[(int)bc];
}
}
}
void vmadda(DEST mask, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
acc.f[i] += s0.f[i] * s1.f[i];
}
}
}
void vmsuba(DEST mask, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
acc.f[i] -= s0.f[i] * s1.f[i];
}
}
}
void vmsuba_bc(DEST mask, BC bc, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
acc.f[i] -= s0.f[i] * s1.f[(int)bc];
}
}
}
void vmadd_bc(DEST mask, BC bc, int dst, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].f[i] = acc.f[i] + s0.f[i] * s1.f[(int)bc];
}
}
}
void vmadd(DEST mask, int dst, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].f[i] = acc.f[i] + s0.f[i] * s1.f[i];
}
}
}
void vmsub_bc(DEST mask, BC bc, int dst, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].f[i] = acc.f[i] - s0.f[i] * s1.f[(int)bc];
}
}
}
void vmsub(DEST mask, int dst, int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].f[i] = acc.f[i] - s0.f[i] * s1.f[i];
}
}
}
void vmsubq(DEST mask, int dst, int src0) {
auto s0 = vf_src(src0);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].f[i] = acc.f[i] - s0.f[i] * Q;
}
}
}
void vdiv(int src0, BC bc0, int src1, BC bc1) {
Q = vf_src(src0).f[(int)bc0] / vf_src(src1).f[(int)bc1];
}
void vrsqrt(int src0, BC bc0, int src1, BC bc1) {
Q = vf_src(src0).f[(int)bc0] / std::sqrt(std::abs(vf_src(src1).f[(int)bc1]));
}
void vsqrt(int src, BC bc) { Q = std::sqrt(std::abs(vf_src(src).f[(int)bc])); }
2023-01-14 11:04:15 -05:00
void sqrts(int dst, int src) { fprs[dst] = std::sqrt(std::abs(fprs[src])); }
void vmulq(DEST mask, int dst, int src) {
auto s0 = vf_src(src);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].f[i] = s0.f[i] * Q;
}
}
}
void vrget(DEST mask, int dst) {
float r = gRng.R;
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].f[i] = r;
}
}
}
void vrxor(int src, BC bc) { gRng.rxor(vf_src(src).du32[(int)bc]); }
void vaddq(DEST mask, int dst, int src) {
auto s = vf_src(src);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].f[i] = s.f[i] + Q;
}
}
}
void vabs(DEST mask, int dst, int src) {
auto s = vf_src(src);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].f[i] = std::abs(s.f[i]);
}
}
}
void vrnext(DEST mask, int dst) {
gRng.advance();
float r = gRng.R;
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].f[i] = r;
}
}
}
void mov64(int dest, int src) { gprs[dest].ds64[0] = gpr_src(src).du64[0]; }
void vmove(DEST mask, int dest, int src) {
auto s = vf_src(src);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dest].f[i] = s.f[i];
}
}
}
void slt(int dst, int src0, int src1) {
gprs[dst].du64[0] = (gpr_src(src0).ds64[0] < gpr_src(src1).ds64[0]) ? 1 : 0;
}
void sltu(int dst, int src0, int src1) {
gprs[dst].du64[0] = (gpr_src(src0).du64[0] < gpr_src(src1).du64[0]) ? 1 : 0;
}
void sltiu(int dst, int src, u64 imm) {
gprs[dst].du64[0] = (gpr_src(src).du64[0] < imm) ? 1 : 0;
}
void slti(int dst, int src, s64 imm) { gprs[dst].du64[0] = (gpr_src(src).ds64[0] < imm) ? 1 : 0; }
void sll(int dst, int src, int sa) {
u32 value = gpr_src(src).du32[0] << sa;
s32 value_signed = value;
gprs[dst].ds64[0] = value_signed;
}
void srl(int dst, int src, int sa) {
u32 value = gpr_src(src).du32[0] >> sa;
s32 value_signed = value;
gprs[dst].ds64[0] = value_signed;
}
void dsra(int dst, int src, int sa) { gprs[dst].ds64[0] = gpr_src(src).ds64[0] >> sa; }
void dsrl(int dst, int src, int sa) { gprs[dst].du64[0] = gpr_src(src).du64[0] >> sa; }
void dsrav(int dst, int src, int sa) {
gprs[dst].ds64[0] = gpr_src(src).ds64[0] >> gpr_src(sa).du32[0];
}
void dsllv(int dst, int src, int sa) {
gprs[dst].ds64[0] = gpr_src(src).ds64[0] << (gpr_src(sa).du32[0] & 0b111111);
}
void sllv(int dst, int src, int sa) {
gprs[dst].ds64[0] = gpr_src(src).ds32[0] << (gpr_src(sa).du32[0] & 0b11111);
}
void dsra32(int dst, int src, int sa) { gprs[dst].ds64[0] = gpr_src(src).ds64[0] >> (32 + sa); }
void dsrl32(int dst, int src, int sa) { gprs[dst].du64[0] = gpr_src(src).du64[0] >> (32 + sa); }
void sra(int dst, int src, int sa) { gprs[dst].ds64[0] = gpr_src(src).ds32[0] >> sa; }
void dsll(int dst, int src0, int sa) { gprs[dst].du64[0] = gpr_src(src0).du64[0] << sa; }
void dsll32(int dst, int src0, int sa) { gprs[dst].du64[0] = gpr_src(src0).du64[0] << (32 + sa); }
void daddu(int dst, int src0, int src1) { gprs[dst].du64[0] = sgpr64(src0) + sgpr64(src1); }
void daddiu(int dst, int src0, s64 imm) { gprs[dst].du64[0] = sgpr64(src0) + imm; }
void addiu(int dst, int src0, s64 imm) {
s32 temp = sgpr64(src0) + imm;
gprs[dst].ds64[0] = temp;
}
void lui(int dst, u32 src) {
s32 val = (src << 16);
gprs[dst].ds64[0] = val;
}
void addu(int dst, int src0, int src1) {
s32 temp = sgpr64(src0) + sgpr64(src1);
gprs[dst].ds64[0] = temp;
}
void dsubu(int dst, int src0, int src1) { gprs[dst].du64[0] = sgpr64(src0) - sgpr64(src1); }
void subu(int dst, int src0, int src1) {
gprs[dst].ds64[0] = gpr_src(src0).ds32[0] - gpr_src(src1).ds32[0];
}
void xor_(int dst, int src0, int src1) { gprs[dst].du64[0] = sgpr64(src0) ^ sgpr64(src1); }
void or_(int dst, int src0, int src1) { gprs[dst].du64[0] = sgpr64(src0) | sgpr64(src1); }
2022-11-11 16:23:26 -05:00
void nor(int dst, int src0, int src1) { gprs[dst].du64[0] = ~(sgpr64(src0) | sgpr64(src1)); }
void movz(int dst, int src0, int src1) {
if (sgpr64(src1) == 0) {
gprs[dst].du64[0] = sgpr64(src0);
}
}
void movn(int dst, int src0, int src1) {
if (sgpr64(src1) != 0) {
gprs[dst].du64[0] = sgpr64(src0);
}
}
void mult3(int dst, int src0, int src1) {
u32 result = gpr_src(src0).ds32[0] * gpr_src(src1).ds32[0];
s32 sresult = result;
gprs[dst].ds64[0] = sresult;
}
void multu3(int dst, int src0, int src1) {
u32 result = gpr_src(src0).du32[0] * gpr_src(src1).du32[0];
s32 sresult = result;
gprs[dst].ds64[0] = sresult;
}
void xori(int dest, int src, u64 imm) { gprs[dest].du64[0] = gpr_src(src).du64[0] ^ imm; }
void andi(int dest, int src, u64 imm) { gprs[dest].du64[0] = gpr_src(src).du64[0] & imm; }
void ori(int dest, int src, u64 imm) { gprs[dest].du64[0] = gpr_src(src).du64[0] | imm; }
void and_(int dest, int src0, int src1) {
gprs[dest].du64[0] = gpr_src(src0).du64[0] & gpr_src(src1).du64[0];
}
void pextlb(int dst, int src0, int src1) {
auto s0 = gpr_src(src0);
auto s1 = gpr_src(src1);
gprs[dst].du8[0] = s1.du8[0];
gprs[dst].du8[1] = s0.du8[0];
gprs[dst].du8[2] = s1.du8[1];
gprs[dst].du8[3] = s0.du8[1];
gprs[dst].du8[4] = s1.du8[2];
gprs[dst].du8[5] = s0.du8[2];
gprs[dst].du8[6] = s1.du8[3];
gprs[dst].du8[7] = s0.du8[3];
gprs[dst].du8[8] = s1.du8[4];
gprs[dst].du8[9] = s0.du8[4];
gprs[dst].du8[10] = s1.du8[5];
gprs[dst].du8[11] = s0.du8[5];
gprs[dst].du8[12] = s1.du8[6];
gprs[dst].du8[13] = s0.du8[6];
gprs[dst].du8[14] = s1.du8[7];
gprs[dst].du8[15] = s0.du8[7];
}
void pextlh(int dst, int src0, int src1) {
auto s0 = gpr_src(src0);
auto s1 = gpr_src(src1);
gprs[dst].du16[0] = s1.du16[0];
gprs[dst].du16[1] = s0.du16[0];
gprs[dst].du16[2] = s1.du16[1];
gprs[dst].du16[3] = s0.du16[1];
gprs[dst].du16[4] = s1.du16[2];
gprs[dst].du16[5] = s0.du16[2];
gprs[dst].du16[6] = s1.du16[3];
gprs[dst].du16[7] = s0.du16[3];
}
u32 lzocw(s32 in) {
if (in < 0) {
in = ~in;
}
if (in == 0) {
return 32;
}
return count_leading_zeros_u32(in);
}
void plzcw(int dst, int src) {
gprs[dst].du32[0] = lzocw(gpr_src(src).ds32[0]) - 1;
gprs[dst].du32[1] = lzocw(gpr_src(src).ds32[1]) - 1;
}
void por(int dst, int src0, int src1) {
auto s0 = gpr_src(src0);
auto s1 = gpr_src(src1);
gprs[dst].du64[0] = s0.du64[0] | s1.du64[0];
gprs[dst].du64[1] = s0.du64[1] | s1.du64[1];
}
void pmaxw(int dst, int src0, int src1) {
auto s0 = gpr_src(src0);
auto s1 = gpr_src(src1);
for (int i = 0; i < 4; i++) {
gprs[dst].ds32[i] = std::max(s0.ds32[i], s1.ds32[i]);
}
}
void pminw(int dst, int src0, int src1) {
auto s0 = gpr_src(src0);
auto s1 = gpr_src(src1);
for (int i = 0; i < 4; i++) {
gprs[dst].ds32[i] = std::min(s0.ds32[i], s1.ds32[i]);
}
}
void pmaxh(int dst, int src0, int src1) {
auto s0 = gpr_src(src0);
auto s1 = gpr_src(src1);
for (int i = 0; i < 8; i++) {
gprs[dst].ds16[i] = std::max(s0.ds16[i], s1.ds16[i]);
}
}
void pcpyh(int dst, int src) {
auto s0 = gpr_src(src);
gprs[dst].du16[0] = s0.du16[0];
gprs[dst].du16[1] = s0.du16[0];
gprs[dst].du16[2] = s0.du16[0];
gprs[dst].du16[3] = s0.du16[0];
gprs[dst].du16[4] = s0.du16[4];
gprs[dst].du16[5] = s0.du16[4];
gprs[dst].du16[6] = s0.du16[4];
gprs[dst].du16[7] = s0.du16[4];
}
void psrah(int dest, int src, int sa) {
auto s = gpr_src(src);
for (int i = 0; i < 8; i++) {
gprs[dest].ds16[i] = s.ds16[i] >> (sa & 0xf);
}
}
void mov128_vf_gpr(int dst, int src) { memcpy(vfs[dst].f, gpr_src(src).f, 16); }
void mov128_gpr_vf(int dst, int src) { memcpy(gprs[dst].f, vf_src(src).f, 16); }
void mov128_gpr_gpr(int dst, int src) { gprs[dst] = gpr_src(src); }
void vitof0(DEST mask, int dst, int src) {
auto s = vf_src(src);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].f[i] = s.ds32[i];
}
}
}
void vitof12(DEST mask, int dst, int src) {
auto s = vf_src(src);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].f[i] = ((float)s.ds32[i]) * (1.f / 4096.f);
}
}
}
void vitof15(DEST mask, int dst, int src) {
auto s = vf_src(src);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].f[i] = ((float)s.ds32[i]) * (1.f / 32768.f);
}
}
}
void vftoi12(DEST mask, int dst, int src) {
auto s = vf_src(src);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].ds32[i] = s.f[i] * 4096.f;
}
}
}
void vftoi4(DEST mask, int dst, int src) {
auto s = vf_src(src);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].ds32[i] = s.f[i] * 16.f;
}
}
}
s32 float_to_int_sat(float f) {
if (f >= (float)INT32_MAX) {
return INT32_MAX;
} else if (f <= (float)INT32_MIN) {
return INT32_MIN;
} else {
return f;
}
}
void vftoi4_sat(DEST mask, int dst, int src) {
auto s = vf_src(src);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].ds32[i] = float_to_int_sat(s.f[i] * 16.f);
}
}
}
void vftoi0(DEST mask, int dst, int src) {
auto s = vf_src(src);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].ds32[i] = s.f[i];
}
}
}
void vftoi0_sat(DEST mask, int dst, int src) {
auto s = vf_src(src);
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
vfs[dst].ds32[i] = float_to_int_sat(s.f[i]);
}
}
}
void mfc1(int dst, int src) {
s32 val;
memcpy(&val, &fprs[src], 4);
gprs[dst].ds64[0] = val;
}
void mtc1(int dst, int src) {
u32 val = gpr_src(src).du32[0];
memcpy(&fprs[dst], &val, 4);
}
void muls(int dst, int src0, int src1) { fprs[dst] = fprs[src0] * fprs[src1]; }
void adds(int dst, int src0, int src1) { fprs[dst] = fprs[src0] + fprs[src1]; }
void subs(int dst, int src0, int src1) { fprs[dst] = fprs[src0] - fprs[src1]; }
2022-02-25 15:10:39 -05:00
void divs_accurate(int dst, int src0, int src1) {
if (fprs[src1] == 0) {
if (fprs[src0] < 0) {
fprs[dst] = -std::numeric_limits<float>::max();
} else {
fprs[dst] = std::numeric_limits<float>::max();
}
} else {
fprs[dst] = fprs[src0] / fprs[src1];
}
}
void divs(int dst, int src0, int src1) {
// ASSERT(fprs[src1] != 0);
fprs[dst] = fprs[src0] / fprs[src1];
}
void mins(int dst, int src0, int src1) { fprs[dst] = std::min(fprs[src0], fprs[src1]); }
void maxs(int dst, int src0, int src1) { fprs[dst] = std::max(fprs[src0], fprs[src1]); }
void negs(int dst, int src) {
u32 v;
memcpy(&v, &fprs[src], 4);
v ^= 0x80000000;
memcpy(&fprs[dst], &v, 4);
}
void movs(int dst, int src) { fprs[dst] = fprs[src]; }
void abss(int dst, int src) { fprs[dst] = std::abs(fprs[src]); }
void cvtws(int dst, int src) {
// float to int
s32 value = fprs[src];
memcpy(&fprs[dst], &value, 4);
}
void cvtsw(int dst, int src) {
// int to float
s32 value;
memcpy(&value, &fprs[src], 4);
fprs[dst] = value;
}
void vwaitq() {}
void vopmula(int src0, int src1) {
auto s0 = vf_src(src0);
auto s1 = vf_src(src1);
acc.f[0] = s0.f[1] * s1.f[2];
acc.f[1] = s0.f[2] * s1.f[0];
acc.f[2] = s0.f[0] * s1.f[1];
}
void vopmsub(int dst, int src0, int src1) {
auto s0 = vf_src(src0); // fs
auto s1 = vf_src(src1); // ft
vfs[dst].f[0] = acc.f[0] - s0.f[1] * s1.f[2];
vfs[dst].f[1] = acc.f[1] - s0.f[2] * s1.f[0];
vfs[dst].f[2] = acc.f[2] - s0.f[0] * s1.f[1];
}
// copied from PCSX2, to handle the weirdo special case in lwr.
static constexpr u32 LWL_MASK[4] = {0xffffff, 0x0000ffff, 0x000000ff, 0x00000000};
static constexpr u32 LWR_MASK[4] = {0x000000, 0xff000000, 0xffff0000, 0xffffff00};
static constexpr u8 LWL_SHIFT[4] = {24, 16, 8, 0};
static constexpr u8 LWR_SHIFT[4] = {0, 8, 16, 24};
void lwl(int dst, int offset, int addr_reg) {
s32 addr = sgpr64(addr_reg) + offset;
u32 shift = addr & 3;
u32 mem;
memcpy(&mem, g_ee_main_mem + (addr & ~3), 4);
if (!dst)
return;
// ensure the compiler does correct sign extension into 64 bits by using s32
gprs[dst].ds64[0] = (s32)((gprs[dst].du32[0] & LWL_MASK[shift]) | (mem << LWL_SHIFT[shift]));
/*
Mem = 1234. Reg = abcd
(result is always sign extended into the upper 32 bits of the Rt)
0 4bcd (mem << 24) | (reg & 0x00ffffff)
1 34cd (mem << 16) | (reg & 0x0000ffff)
2 234d (mem << 8) | (reg & 0x000000ff)
3 1234 (mem ) | (reg & 0x00000000)
*/
}
void lwr(int dst, int offset, int addr_reg) {
s32 addr = sgpr64(addr_reg) + offset;
u32 shift = addr & 3;
u32 mem;
memcpy(&mem, g_ee_main_mem + (addr & ~3), 4);
if (!dst)
return;
// Use unsigned math here, and conditionally sign extend below, when needed.
mem = (gprs[dst].du32[0] & LWR_MASK[shift]) | (mem >> LWR_SHIFT[shift]);
if (shift == 0) {
// This special case requires sign extension into the full 64 bit dest.
gprs[dst].ds64[0] = (s32)mem;
} else {
// This case sets the lower 32 bits of the target register. Upper
// 32 bits are always preserved.
gprs[dst].du32[0] = mem;
}
}
static constexpr u64 LDL_MASK[8] = {
0x00ffffffffffffffULL, 0x0000ffffffffffffULL, 0x000000ffffffffffULL, 0x00000000ffffffffULL,
0x0000000000ffffffULL, 0x000000000000ffffULL, 0x00000000000000ffULL, 0x0000000000000000ULL};
static constexpr u64 LDR_MASK[8] = {
0x0000000000000000ULL, 0xff00000000000000ULL, 0xffff000000000000ULL, 0xffffff0000000000ULL,
0xffffffff00000000ULL, 0xffffffffff000000ULL, 0xffffffffffff0000ULL, 0xffffffffffffff00ULL};
static constexpr u8 LDR_SHIFT[8] = {0, 8, 16, 24, 32, 40, 48, 56};
static constexpr u8 LDL_SHIFT[8] = {56, 48, 40, 32, 24, 16, 8, 0};
void ldl(int dst, int offset, int addr_reg) {
u32 addr = sgpr64(addr_reg) + offset;
u32 shift = addr & 7;
u64 mem;
// memRead64(addr & ~7, &mem);
memcpy(&mem, g_ee_main_mem + (addr & ~7), 8);
if (!dst)
return;
gprs[dst].du64[0] = (gprs[dst].du64[0] & LDL_MASK[shift]) | (mem << LDL_SHIFT[shift]);
}
void ldr(int dst, int offset, int addr_reg) {
u32 addr = sgpr64(addr_reg) + offset;
u32 shift = addr & 7;
u64 mem;
// memRead64(addr & ~7, &mem);
memcpy(&mem, g_ee_main_mem + (addr & ~7), 8);
if (!dst)
return;
gprs[dst].du64[0] = (gprs[dst].du64[0] & LDR_MASK[shift]) | (mem >> LDR_SHIFT[shift]);
}
u32 clip(int xyz_idx, int w_idx, u32 old_clip) {
u32 result = (old_clip << 6);
auto xyz_vec = vf_src(xyz_idx);
float w = vf_src(w_idx).f[3];
float plus = std::abs(w);
float minus = -plus;
if (xyz_vec.f[0] > plus) {
result |= 0b1;
}
if (xyz_vec.f[0] < minus) {
result |= 0b10;
}
if (xyz_vec.f[1] > plus) {
result |= 0b100;
}
if (xyz_vec.f[1] < minus) {
result |= 0b1000;
}
if (xyz_vec.f[2] > plus) {
result |= 0b10000;
}
if (xyz_vec.f[2] < minus) {
result |= 0b100000;
}
return result & 0xffffff; // only 24 bits
}
std::string print_vf_float(int vf) {
auto src = vf_src(vf);
return fmt::format("{} {} {} {}", src.f[0], src.f[1], src.f[2], src.f[3]);
}
};
static_assert(sizeof(ExecutionContext) <= 1280);
inline void get_fake_spad_addr(int dst, void* sym_addr, u32 offset, ExecutionContext* c) {
u32 val;
memcpy(&val, sym_addr, 4);
c->gprs[dst].du64[0] = val + offset;
}
inline void get_fake_spad_addr2(int dst, void* sym_addr, u32 offset, ExecutionContext* c) {
u32 val;
memcpy(&val, (u8*)sym_addr - 1, 4);
c->gprs[dst].du64[0] = val + offset;
}
inline void* align4_ptr(void* spad_sym_addr) {
return (void*)align4(((uintptr_t)spad_sym_addr - 3));
}
inline void spad_to_dma(void* spad_sym_addr, u32 madr, u32 sadr, u32 qwc) {
u32 spad_addr_goal;
memcpy(&spad_addr_goal, align4_ptr(spad_sym_addr), 4);
sadr -= spad_addr_goal;
ASSERT((madr & 0xf) == 0);
ASSERT((sadr & 0xf) == 0);
ASSERT(sadr < 0x4000);
ASSERT((sadr + 16 * qwc) <= 0x4000);
ASSERT(qwc <= 0x4000);
void* spad_addr_c = g_ee_main_mem + spad_addr_goal + sadr;
memcpy(spad_addr_c, g_ee_main_mem + madr, qwc * 16);
}
inline void spad_to_dma_no_sadr_off(void* spad_sym_addr, u32 madr, u32 sadr, u32 qwc) {
u32 spad_addr_goal;
memcpy(&spad_addr_goal, align4_ptr(spad_sym_addr), 4);
ASSERT((madr & 0xf) == 0);
ASSERT((sadr & 0xf) == 0);
ASSERT(sadr < 0x4000);
ASSERT((sadr + 16 * qwc) <= 0x4000);
ASSERT(qwc <= 0x4000);
void* spad_addr_c = g_ee_main_mem + spad_addr_goal + sadr;
memcpy(spad_addr_c, g_ee_main_mem + madr, qwc * 16);
}
inline void spad_to_dma_no_sadr_off_bones_interleave(void* spad_sym_addr,
u32 madr,
u32 sadr,
u32 qwc) {
u32 spad_addr_goal;
2022-11-30 22:36:09 -05:00
memcpy(&spad_addr_goal, align4_ptr(spad_sym_addr), 4);
ASSERT((madr & 0xf) == 0);
ASSERT((sadr & 0xf) == 0);
ASSERT(sadr < 0x4000);
ASSERT((sadr + 16 * qwc) <= 0x4000);
ASSERT(qwc <= 0x4000);
u8* spad_addr_c = g_ee_main_mem + spad_addr_goal + sadr;
const u8* mem_addr = g_ee_main_mem + madr;
ASSERT((qwc & 3) == 0);
while (qwc > 0) {
// transfer 4.
memcpy(spad_addr_c, mem_addr, 4 * 16);
spad_addr_c += (4 * 16);
sadr += 4 * 16;
// but skip 5
mem_addr += (5 * 16);
qwc -= 4;
}
}
inline void spad_from_dma(void* spad_sym_addr, u32 madr, u32 sadr, u32 qwc) {
u32 spad_addr_goal;
memcpy(&spad_addr_goal, align4_ptr(spad_sym_addr), 4);
sadr -= spad_addr_goal;
ASSERT((madr & 0xf) == 0);
ASSERT((sadr & 0xf) == 0);
ASSERT(sadr < 0x4000);
ASSERT((sadr + 16 * qwc) <= 0x4000);
ASSERT(qwc <= 0x4000);
void* spad_addr_c = g_ee_main_mem + spad_addr_goal + sadr;
memcpy(g_ee_main_mem + madr, spad_addr_c, qwc * 16);
}
inline void spad_from_dma_no_sadr_off(void* spad_sym_addr, u32 madr, u32 sadr, u32 qwc) {
u32 spad_addr_goal;
2022-11-30 22:36:09 -05:00
memcpy(&spad_addr_goal, align4_ptr(spad_sym_addr), 4);
ASSERT((madr & 0xf) == 0);
ASSERT((sadr & 0xf) == 0);
ASSERT(sadr < 0x4000);
ASSERT((sadr + 16 * qwc) <= 0x4000);
ASSERT(qwc <= 0x4000);
void* spad_addr_c = g_ee_main_mem + spad_addr_goal + sadr;
memcpy(g_ee_main_mem + madr, spad_addr_c, qwc * 16);
}
inline void load_vfs_from_tf_regs(const void* tf_regs_sym, ExecutionContext* c) {
u32 goal_addr_of_vf1;
memcpy(&goal_addr_of_vf1, tf_regs_sym, 4);
u8* c_addr_of_vf1 = g_ee_main_mem + goal_addr_of_vf1;
memcpy(&c->vfs[1], c_addr_of_vf1, 31 * 16);
}
inline void spad_to_dma_blerc_chain(void* spad_sym_addr, u32 sadr, u32 tadr) {
u32 spad_addr_goal;
[merc2] support vertex updates, use this for blerc in jak 1 and jak 2 (#2179) This PR adds a feature to merc2 to update vertices. This will be needed to efficient do effects like blerc/ripple/texture scroll. It's enabled for blerc in jak 1 and jak 2, but with a few disclaimers: - currently we still use the mips2c blerc implementation, which is slow and has some "jittering" because of integer precision. When porting to PC, there was an additional synchronization problem because blerc overwrites the merc data as its being read by the renderers. I _think_ this wasn't an issue on PS2 because the blerc dma is higher priority than the VIF1 DMA, but I'm not certain. Either way, I had to add a mutex for this on PC to avoid very slight flickering/gaps. This isn't ideal for performance, but still beats generic by a significant amount in every place I tested. If you see merc taking 2ms to draw, it is likely because it is stuck waiting on blerc to finish. This will go away once blerc itself is ported to C++. - in jak 1, we end up using generic in some cases where we could use merc. In particular maia in village3 hut. This will be fixed later once we can use merc in more places. I don't want to mess with the merc/generic selection logic when we're hopefully going to get rid of it soon. - There is no support for ripple or texture scroll. These use generic on jak 1, and remain broken on jak 2. - Like with `emerc`, jak 1 has a toggle to go back to the old behavior `*blerc-hack*`. - In most cases, toggling this causes no visual differences. One exception is Gol's teeth. I believe this is caused by texture coordinate rounding issues, where generic has an additional float -> int -> float compared to PC merc. It is very hard to notice so I'm not going to worry about it.
2023-01-31 18:23:39 -05:00
memcpy(&spad_addr_goal, align4_ptr(spad_sym_addr), 4);
void* spad_addr_c = g_ee_main_mem + spad_addr_goal;
ASSERT(sadr < 0x4000);
emulate_dma(g_ee_main_mem, spad_addr_c, tadr, sadr);
}
} // namespace Mips2C