[graphics] Add a specialized renderer for handling merc/generic output data (#1207)

* second try at writing the direct renderer

* finish direct2

* more optimization
This commit is contained in:
water111 2022-02-27 17:23:12 -05:00 committed by GitHub
parent 42806d504f
commit 68a1f2d012
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 1742 additions and 184 deletions

View file

@ -74,6 +74,15 @@ FixedChunkDmaCopier::FixedChunkDmaCopier(u32 main_memory_size)
m_chunk_mask.resize(m_chunk_count);
}
void FixedChunkDmaCopier::set_input_data(const void* memory, u32 offset, bool run_copy) {
if (run_copy) {
run(memory, offset, false);
} else {
m_input_offset = offset;
m_input_data = memory;
}
}
const DmaData& FixedChunkDmaCopier::run(const void* memory, u32 offset, bool verify) {
Timer timer;
m_input_offset = offset;

View file

@ -22,6 +22,8 @@ class FixedChunkDmaCopier {
static constexpr u32 chunk_size = 0x20000; // 128 kB, gives use 1024 chunks for a 128 MB RAM.
FixedChunkDmaCopier(u32 main_memory_size);
void set_input_data(const void* memory, u32 offset, bool run);
const DmaData& run(const void* memory, u32 offset, bool verify = false);
void serialize_last_result(Serializer& serializer);

View file

@ -389,6 +389,21 @@ std::string DrawMode::to_string() const {
case AlphaBlend::DISABLED:
result += "disabled\n";
break;
case AlphaBlend::SRC_DST_FIX_DST:
result += "src, dst, fix, dst\n";
break;
case AlphaBlend::SRC_0_DST_DST:
result += "src, 0, dst, dst\n";
break;
case AlphaBlend::SRC_SRC_SRC_SRC:
result += "src, src, src, src\n";
break;
case AlphaBlend::ZERO_SRC_SRC_DST:
result += "0, src, src, dst\n";
break;
case AlphaBlend::SRC_0_FIX_DST:
result += "src, 0, fix, dst\n";
break;
default:
ASSERT(false);
}
@ -409,7 +424,8 @@ std::string DrawMode::to_string() const {
result += "never\n";
break;
default:
ASSERT(false);
result += "invalid!\n";
break;
}
result += fmt::format(" zte: {}\n", get_zt_enable());
result += fmt::format(" abe: {}\n", get_ab_enable());
@ -430,5 +446,6 @@ std::string DrawMode::to_string() const {
default:
ASSERT(false);
}
result += fmt::format(" fog: {}\n decal: {}\n", get_fog_enable(), get_decal());
return result;
}

View file

@ -365,6 +365,8 @@ class DrawMode {
SRC_0_FIX_DST = 3, // fix = 128
SRC_DST_FIX_DST = 4, // fix = 64
ZERO_SRC_SRC_DST = 5,
SRC_SRC_SRC_SRC = 6,
SRC_0_DST_DST = 7
};
enum class AlphaTest {
@ -483,8 +485,27 @@ class DrawMode {
bool get_decal() const { return !(m_val & (1 << 28)); }
void enable_decal() { m_val = m_val & (~(1 << 28)); }
void disable_decal() { m_val = m_val | (1 << 28); }
void set_decal(bool en) {
if (en) {
enable_decal();
} else {
disable_decal();
}
}
bool get_fog_enable() const { return m_val & (1 << 29); }
void enable_fog() { m_val = m_val | (1 << 29); }
void disable_fog() { m_val = m_val & (~(1 << 29)); }
void set_fog(bool en) {
if (en) {
enable_fog();
} else {
disable_fog();
}
}
u32& as_int() { return m_val; }
const u32& as_int() const { return m_val; }
bool operator==(const DrawMode& other) const { return m_val == other.m_val; }
bool operator!=(const DrawMode& other) const { return m_val != other.m_val; }
@ -508,5 +529,6 @@ class DrawMode {
// 23 t clamp
// 24 - 27 alpha blend
// 28 !decal
// 29 fge
u32 m_val = UINT32_MAX;
};

View file

@ -77,6 +77,7 @@ set(RUNTIME_SOURCE
graphics/opengl_renderer/BucketRenderer.cpp
graphics/opengl_renderer/debug_gui.cpp
graphics/opengl_renderer/DirectRenderer.cpp
graphics/opengl_renderer/DirectRenderer2.cpp
graphics/opengl_renderer/EyeRenderer.cpp
graphics/opengl_renderer/GenericProgram.cpp
graphics/opengl_renderer/GenericRenderer.cpp

View file

@ -92,6 +92,7 @@ struct SharedRenderState {
bool render_debug = false;
bool enable_merc_xgkick = true;
bool enable_generic_xgkick = true;
bool use_direct2 = true;
math::Vector<u8, 4> fog_color;
float fog_intensity = 1.f;
@ -116,6 +117,7 @@ class BucketRenderer {
virtual bool empty() const { return false; }
virtual void draw_debug_window() = 0;
virtual void serialize(Serializer&) {}
virtual void init_shaders(ShaderLibrary&) {}
protected:
std::string m_name;

View file

@ -0,0 +1,757 @@
#include "DirectRenderer2.h"
#include "third-party/imgui/imgui.h"
#include "common/log/log.h"
#include <immintrin.h>
DirectRenderer2::DirectRenderer2(u32 max_verts,
u32 max_inds,
u32 max_draws,
const std::string& name)
: m_name(name) {
// allocate buffers
m_vertices.vertices.resize(max_verts);
m_vertices.indices.resize(max_inds);
m_draw_buffer.resize(max_draws);
// create OpenGL objects
glGenBuffers(1, &m_ogl.vertex_buffer);
glGenBuffers(1, &m_ogl.index_buffer);
glGenVertexArrays(1, &m_ogl.vao);
// set up the vertex array
glBindVertexArray(m_ogl.vao);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_ogl.index_buffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, max_inds * sizeof(u32), nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.vertex_buffer);
glBufferData(GL_ARRAY_BUFFER, max_verts * sizeof(Vertex), nullptr, GL_STREAM_DRAW);
// xyz
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, // location 0 in the shader
3, // 3 floats per vert
GL_FLOAT, // floats
GL_TRUE, // normalized, ignored,
sizeof(Vertex), //
(void*)offsetof(Vertex, xyz) // offset in array
);
// rgba
glEnableVertexAttribArray(1);
glVertexAttribPointer(1, // location 1 in the shader
4, // 4 color components
GL_UNSIGNED_BYTE, // u8
GL_TRUE, // normalized (255 becomes 1)
sizeof(Vertex), //
(void*)offsetof(Vertex, rgba) //
);
// stq
glEnableVertexAttribArray(2);
glVertexAttribPointer(2, // location 2 in the shader
3, // 3 floats per vert
GL_FLOAT, // floats
GL_FALSE, // normalized, ignored
sizeof(Vertex), //
(void*)offsetof(Vertex, stq) // offset in array
);
// byte data
glEnableVertexAttribArray(3);
glVertexAttribIPointer(3, // location 0 in the shader
4, // 3 floats per vert
GL_UNSIGNED_BYTE, // u8's
sizeof(Vertex), //
(void*)offsetof(Vertex, tex_unit) // offset in array
);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(0);
}
DirectRenderer2::~DirectRenderer2() {
glDeleteBuffers(1, &m_ogl.vertex_buffer);
glDeleteBuffers(1, &m_ogl.index_buffer);
glDeleteVertexArrays(1, &m_ogl.vao);
}
void DirectRenderer2::init_shaders(ShaderLibrary& shaders) {
shaders[ShaderId::DIRECT2].activate();
m_ogl.alpha_reject = glGetUniformLocation(shaders[ShaderId::DIRECT2].id(), "alpha_reject");
m_ogl.color_mult = glGetUniformLocation(shaders[ShaderId::DIRECT2].id(), "color_mult");
m_ogl.fog_color = glGetUniformLocation(shaders[ShaderId::DIRECT2].id(), "fog_color");
}
void DirectRenderer2::reset_buffers() {
m_next_free_draw = 0;
m_vertices.next_index = 0;
m_vertices.next_vertex = 0;
m_state.next_vertex_starts_strip = true;
m_state.strip_warmup = 0;
m_current_state_has_open_draw = false;
}
void DirectRenderer2::reset_state() {
m_state = {};
m_stats = {};
if (m_next_free_draw || m_vertices.next_vertex || m_vertices.next_index) {
fmt::print("[{}] Call to reset_state while there was pending draw data!\n", m_name);
}
reset_buffers();
}
std::string DirectRenderer2::Vertex::print() const {
return fmt::format("{} {} {}\n", xyz.to_string_aligned(), stq.to_string_aligned(), rgba[0]);
}
std::string DirectRenderer2::Draw::to_string() const {
std::string result;
result += mode.to_string();
result += fmt::format("TBP: 0x{:x}\n", tbp);
result += fmt::format("fix: 0x{:x}\n", fix);
return result;
}
std::string DirectRenderer2::Draw::to_single_line_string() const {
return fmt::format("mode 0x{:8x} tbp 0x{:4x} fix 0x{:2x}\n", mode.as_int(), tbp, fix);
}
void DirectRenderer2::flush_pending(SharedRenderState* render_state, ScopedProfilerNode& prof) {
// skip, if we're empty.
if (m_next_free_draw == 0) {
reset_buffers();
return;
}
// first, upload:
Timer upload_timer;
glBindVertexArray(m_ogl.vao);
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.vertex_buffer);
glBufferData(GL_ARRAY_BUFFER, m_vertices.next_vertex * sizeof(Vertex), m_vertices.vertices.data(),
GL_STREAM_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_ogl.index_buffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, m_vertices.next_index * sizeof(u32),
m_vertices.indices.data(), GL_STREAM_DRAW);
m_stats.upload_wait += upload_timer.getSeconds();
m_stats.num_uploads++;
m_stats.upload_bytes +=
(m_vertices.next_vertex * sizeof(Vertex)) + (m_vertices.next_index * sizeof(u32));
// initial OpenGL setup
glEnable(GL_PRIMITIVE_RESTART);
glPrimitiveRestartIndex(UINT32_MAX);
render_state->shaders[ShaderId::DIRECT2].activate();
// draw call loop
// draw_call_loop_simple(render_state, prof);
draw_call_loop_grouped(render_state, prof);
// done! reset.
glBindVertexArray(0);
reset_buffers();
}
void DirectRenderer2::draw_call_loop_simple(SharedRenderState* render_state,
ScopedProfilerNode& prof) {
fmt::print("------------------------\n");
for (u32 draw_idx = 0; draw_idx < m_next_free_draw; draw_idx++) {
const auto& draw = m_draw_buffer[draw_idx];
fmt::print("{}", draw.to_single_line_string());
setup_opengl_for_draw_mode(draw, render_state);
setup_opengl_tex(0, draw.tbp, draw.mode.get_filt_enable(), draw.mode.get_clamp_s_enable(),
draw.mode.get_clamp_t_enable(), render_state);
void* offset = (void*)(draw.start_index * sizeof(u32));
int end_idx;
if (draw_idx == m_next_free_draw - 1) {
end_idx = m_vertices.next_index;
} else {
end_idx = m_draw_buffer[draw_idx + 1].start_index;
}
glDrawElements(GL_TRIANGLES, end_idx - draw.start_index, GL_UNSIGNED_INT, (void*)offset);
prof.add_draw_call();
prof.add_tri((end_idx - draw.start_index) / 3);
}
}
void DirectRenderer2::draw_call_loop_grouped(SharedRenderState* render_state,
ScopedProfilerNode& prof) {
u32 draw_idx = 0;
while (draw_idx < m_next_free_draw) {
const auto& draw = m_draw_buffer[draw_idx];
u32 end_of_draw_group = draw_idx; // this is inclusive
setup_opengl_for_draw_mode(draw, render_state);
setup_opengl_tex(draw.tex_unit, draw.tbp, draw.mode.get_filt_enable(),
draw.mode.get_clamp_s_enable(), draw.mode.get_clamp_t_enable(), render_state);
for (u32 draw_to_consider = draw_idx + 1; draw_to_consider < draw_idx + TEX_UNITS;
draw_to_consider++) {
if (draw_to_consider >= m_next_free_draw) {
break;
}
const auto& next_draw = m_draw_buffer[draw_to_consider];
if (next_draw.mode.as_int() != draw.mode.as_int()) {
break;
}
if (next_draw.fix != draw.fix) {
break;
}
m_stats.saved_draws++;
end_of_draw_group++;
setup_opengl_tex(next_draw.tex_unit, next_draw.tbp, next_draw.mode.get_filt_enable(),
next_draw.mode.get_clamp_s_enable(), next_draw.mode.get_clamp_t_enable(),
render_state);
}
u32 end_idx;
if (end_of_draw_group == m_next_free_draw - 1) {
end_idx = m_vertices.next_index;
} else {
end_idx = m_draw_buffer[end_of_draw_group + 1].start_index;
}
void* offset = (void*)(draw.start_index * sizeof(u32));
// fmt::print("drawing {:4d} with abe {} tex {} {}", end_idx - draw.start_index,
// (int)draw.mode.get_ab_enable(), end_of_draw_group - draw_idx, draw.to_single_line_string() );
// fmt::print("{}\n", draw.mode.to_string());
glDrawElements(GL_TRIANGLES, end_idx - draw.start_index, GL_UNSIGNED_INT, (void*)offset);
prof.add_draw_call();
prof.add_tri((end_idx - draw.start_index) / 3);
draw_idx = end_of_draw_group + 1;
}
}
void DirectRenderer2::setup_opengl_for_draw_mode(const Draw& draw,
SharedRenderState* render_state) {
// compute alpha_reject:
float alpha_reject = 0.f;
if (draw.mode.get_at_enable()) {
switch (draw.mode.get_alpha_test()) {
case DrawMode::AlphaTest::ALWAYS:
break;
case DrawMode::AlphaTest::GEQUAL:
alpha_reject = draw.mode.get_aref() / 128.f;
break;
case DrawMode::AlphaTest::NEVER:
break;
default:
fmt::print("unknown alpha test: {}\n", (int)draw.mode.get_alpha_test());
ASSERT(false);
}
}
// setup blending and color mult
float color_mult = 1.f;
if (!draw.mode.get_ab_enable()) {
glDisable(GL_BLEND);
} else {
glEnable(GL_BLEND);
glBlendColor(1, 1, 1, 1);
if (draw.mode.get_alpha_blend() == DrawMode::AlphaBlend::SRC_DST_SRC_DST) {
// (Cs - Cd) * As + Cd
// Cs * As + (1 - As) * Cd
// s, d
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glBlendEquation(GL_FUNC_ADD);
} else if (draw.mode.get_alpha_blend() == DrawMode::AlphaBlend::SRC_0_SRC_DST) {
// (Cs - 0) * As + Cd
// Cs * As + (1) * Cd
// s, d
ASSERT(draw.fix == 0);
glBlendFunc(GL_SRC_ALPHA, GL_ONE);
glBlendEquation(GL_FUNC_ADD);
} else if (draw.mode.get_alpha_blend() == DrawMode::AlphaBlend::ZERO_SRC_SRC_DST) {
// (0 - Cs) * As + Cd
// Cd - Cs * As
// s, d
glBlendFunc(GL_SRC_ALPHA, GL_ONE);
glBlendEquation(GL_FUNC_REVERSE_SUBTRACT);
} else if (draw.mode.get_alpha_blend() == DrawMode::AlphaBlend::SRC_DST_FIX_DST) {
// (Cs - Cd) * fix + Cd
// Cs * fix + (1 - fx) * Cd
glBlendFunc(GL_CONSTANT_ALPHA, GL_ONE_MINUS_CONSTANT_ALPHA);
glBlendColor(0, 0, 0, draw.fix / 127.f);
glBlendEquation(GL_FUNC_ADD);
} else if (draw.mode.get_alpha_blend() == DrawMode::AlphaBlend::SRC_SRC_SRC_SRC) {
// this is very weird...
// Cs
glBlendFunc(GL_ONE, GL_ZERO);
glBlendEquation(GL_FUNC_ADD);
} else if (draw.mode.get_alpha_blend() == DrawMode::AlphaBlend::SRC_0_DST_DST) {
// (Cs - 0) * Ad + Cd
glBlendFunc(GL_DST_ALPHA, GL_ONE);
glBlendEquation(GL_FUNC_ADD);
color_mult = 0.5;
} else {
ASSERT(false);
}
}
// setup ztest
if (draw.mode.get_zt_enable()) {
glEnable(GL_DEPTH_TEST);
switch (draw.mode.get_depth_test()) {
case GsTest::ZTest::NEVER:
glDepthFunc(GL_NEVER);
break;
case GsTest::ZTest::ALWAYS:
glDepthFunc(GL_ALWAYS);
break;
case GsTest::ZTest::GEQUAL:
glDepthFunc(GL_GEQUAL);
break;
case GsTest::ZTest::GREATER:
glDepthFunc(GL_GREATER);
break;
default:
ASSERT(false);
}
} else {
// you aren't supposed to turn off z test enable, the GS had some bugs
ASSERT(false);
}
if (draw.mode.get_depth_write_enable()) {
glDepthMask(GL_TRUE);
} else {
glDepthMask(GL_FALSE);
}
if (draw.tbp == UINT16_MAX) {
// not using a texture
ASSERT(false);
render_state->shaders[ShaderId::DIRECT_BASIC].activate();
} else {
// yes using a texture
render_state->shaders[ShaderId::DIRECT2].activate();
glUniform1f(m_ogl.alpha_reject, alpha_reject);
glUniform1f(m_ogl.color_mult, color_mult);
glUniform4f(m_ogl.fog_color, render_state->fog_color[0], render_state->fog_color[1],
render_state->fog_color[2], render_state->fog_intensity);
}
}
void DirectRenderer2::setup_opengl_tex(u16 unit,
u16 tbp,
bool filter,
bool clamp_s,
bool clamp_t,
SharedRenderState* render_state) {
// look up the texture
TextureRecord* tex = nullptr;
u32 tbp_to_lookup = tbp & 0x7fff;
bool use_mt4hh = tbp & 0x8000;
if (use_mt4hh) {
tex = render_state->texture_pool->lookup_mt4hh(tbp_to_lookup);
} else {
tex = render_state->texture_pool->lookup(tbp_to_lookup);
}
if (!tex) {
// TODO Add back
if (tbp_to_lookup >= 8160 && tbp_to_lookup <= 8600) {
fmt::print("Failed to find texture at {}, using random (eye zone)\n", tbp_to_lookup);
tex = render_state->texture_pool->get_random_texture();
} else {
fmt::print("Failed to find texture at {}, using random\n", tbp_to_lookup);
tex = render_state->texture_pool->get_random_texture();
}
}
if (!tex->on_gpu) {
render_state->texture_pool->upload_to_gpu(tex);
}
glActiveTexture(GL_TEXTURE0 + unit);
glBindTexture(GL_TEXTURE_2D, tex->gpu_texture);
if (clamp_s) {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
} else {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
}
if (clamp_t) {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
} else {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
}
if (filter) {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,
m_debug.disable_mip ? GL_LINEAR : GL_LINEAR_MIPMAP_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
} else {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
}
}
void DirectRenderer2::draw_debug_window() {
ImGui::Text("Uploads: %d", m_stats.num_uploads);
ImGui::Text("Upload time: %.3f ms", m_stats.upload_wait * 1000);
ImGui::Text("Upload size: %d bytes", m_stats.upload_bytes);
ImGui::Text("Flush due to full: %d times", m_stats.flush_due_to_full);
}
void DirectRenderer2::render_gif_data(const u8* data,
SharedRenderState* render_state,
ScopedProfilerNode& prof) {
bool eop = false;
u32 offset = 0;
while (!eop) {
GifTag tag(data + offset);
offset += 16;
// unpack registers.
// faster to do it once outside of the nloop loop.
GifTag::RegisterDescriptor reg_desc[16];
u32 nreg = tag.nreg();
for (u32 i = 0; i < nreg; i++) {
reg_desc[i] = tag.reg(i);
}
auto format = tag.flg();
if (format == GifTag::Format::PACKED) {
if (tag.pre()) {
handle_prim(tag.prim());
}
for (u32 loop = 0; loop < tag.nloop(); loop++) {
for (u32 reg = 0; reg < nreg; reg++) {
// fmt::print("{}\n", reg_descriptor_name(reg_desc[reg]));
switch (reg_desc[reg]) {
case GifTag::RegisterDescriptor::AD:
handle_ad(data + offset);
break;
case GifTag::RegisterDescriptor::ST:
handle_st_packed(data + offset);
break;
case GifTag::RegisterDescriptor::RGBAQ:
handle_rgbaq_packed(data + offset);
break;
case GifTag::RegisterDescriptor::XYZF2:
handle_xyzf2_packed(data + offset, render_state, prof);
break;
case GifTag::RegisterDescriptor::PRIM:
ASSERT(false); // handle_prim_packed(data + offset, render_state, prof);
break;
case GifTag::RegisterDescriptor::TEX0_1:
ASSERT(false); // handle_tex0_1_packed(data + offset);
break;
default:
fmt::print("Register {} is not supported in packed mode yet\n",
reg_descriptor_name(reg_desc[reg]));
ASSERT(false);
}
offset += 16; // PACKED = quadwords
}
}
} else if (format == GifTag::Format::REGLIST) {
for (u32 loop = 0; loop < tag.nloop(); loop++) {
for (u32 reg = 0; reg < nreg; reg++) {
u64 register_data;
memcpy(&register_data, data + offset, 8);
// fmt::print("loop: {} reg: {} {}\n", loop, reg, reg_descriptor_name(reg_desc[reg]));
switch (reg_desc[reg]) {
case GifTag::RegisterDescriptor::PRIM:
ASSERT(false); // handle_prim(register_data, render_state, prof);
break;
case GifTag::RegisterDescriptor::RGBAQ:
ASSERT(false); // handle_rgbaq(register_data);
break;
case GifTag::RegisterDescriptor::XYZF2:
ASSERT(false); // handle_xyzf2(register_data, render_state, prof);
break;
default:
fmt::print("Register {} is not supported in reglist mode yet\n",
reg_descriptor_name(reg_desc[reg]));
ASSERT(false);
}
offset += 8; // PACKED = quadwords
}
}
} else {
ASSERT(false); // format not packed or reglist.
}
eop = tag.eop();
}
}
void DirectRenderer2::handle_ad(const u8* data) {
u64 value;
GsRegisterAddress addr;
memcpy(&value, data, sizeof(u64));
memcpy(&addr, data + 8, sizeof(GsRegisterAddress));
// fmt::print("{}\n", register_address_name(addr));
switch (addr) {
case GsRegisterAddress::ZBUF_1:
handle_zbuf1(value);
break;
case GsRegisterAddress::TEST_1:
handle_test1(value);
break;
case GsRegisterAddress::ALPHA_1:
handle_alpha1(value);
break;
case GsRegisterAddress::PABE:
// ASSERT(false); // handle_pabe(value);
ASSERT(value == 0);
break;
case GsRegisterAddress::CLAMP_1:
handle_clamp1(value);
break;
case GsRegisterAddress::PRIM:
ASSERT(false); // handle_prim(value, render_state, prof);
break;
case GsRegisterAddress::TEX1_1:
handle_tex1_1(value);
break;
case GsRegisterAddress::TEXA: {
GsTexa reg(value);
// rgba16 isn't used so this doesn't matter?
// but they use sane defaults anyway
ASSERT(reg.ta0() == 0);
ASSERT(reg.ta1() == 0x80); // note: check rgba16_to_rgba32 if this changes.
ASSERT(reg.aem() == false);
} break;
case GsRegisterAddress::TEXCLUT:
// TODO
// the only thing the direct renderer does with texture is font, which does no tricks with
// CLUT. The texture upload process will do all of the lookups with the default CLUT.
// So we'll just assume that the TEXCLUT is set properly and ignore this.
break;
case GsRegisterAddress::FOGCOL:
// TODO
break;
case GsRegisterAddress::TEX0_1:
handle_tex0_1(value);
break;
case GsRegisterAddress::MIPTBP1_1:
case GsRegisterAddress::MIPTBP2_1:
// TODO this has the address of different mip levels.
break;
case GsRegisterAddress::TEXFLUSH:
break;
default:
fmt::print("Address {} is not supported\n", register_address_name(addr));
ASSERT(false);
}
}
void DirectRenderer2::handle_test1(u64 val) {
GsTest reg(val);
ASSERT(!reg.date()); // datm doesn't matter
if (m_state.gs_test != reg) {
m_current_state_has_open_draw = false;
m_state.gs_test = reg;
m_state.as_mode.set_at(reg.alpha_test_enable());
if (reg.alpha_test_enable()) {
switch (reg.alpha_test()) {
case GsTest::AlphaTest::NEVER:
m_state.as_mode.set_alpha_test(DrawMode::AlphaTest::NEVER);
break;
case GsTest::AlphaTest::ALWAYS:
m_state.as_mode.set_alpha_test(DrawMode::AlphaTest::ALWAYS);
break;
case GsTest::AlphaTest::GEQUAL:
m_state.as_mode.set_alpha_test(DrawMode::AlphaTest::GEQUAL);
break;
default:
ASSERT(false);
}
}
m_state.as_mode.set_aref(reg.aref());
m_state.as_mode.set_alpha_fail(reg.afail());
m_state.as_mode.set_zt(reg.zte());
m_state.as_mode.set_depth_test(reg.ztest());
}
}
void DirectRenderer2::handle_zbuf1(u64 val) {
GsZbuf x(val);
ASSERT(x.psm() == TextureFormat::PSMZ24);
ASSERT(x.zbp() == 448);
bool write = !x.zmsk();
if (write != m_state.as_mode.get_depth_write_enable()) {
m_current_state_has_open_draw = false;
m_state.as_mode.set_depth_write_enable(write);
}
}
void DirectRenderer2::handle_tex0_1(u64 val) {
GsTex0 reg(val);
if (m_state.gs_tex0 != reg) {
m_current_state_has_open_draw = false;
m_state.gs_tex0 = reg;
m_state.tbp = reg.tbp0();
// tbw
if (reg.psm() == GsTex0::PSM::PSMT4HH) {
m_state.tbp |= 0x8000;
}
// tw/th
m_state.as_mode.set_tcc(reg.tcc());
m_state.set_tcc_flag(reg.tcc());
bool decal = reg.tfx() == GsTex0::TextureFunction::DECAL;
m_state.as_mode.set_decal(decal);
m_state.set_decal_flag(decal);
ASSERT(reg.tfx() == GsTex0::TextureFunction::DECAL ||
reg.tfx() == GsTex0::TextureFunction::MODULATE);
}
}
void DirectRenderer2::handle_tex1_1(u64 val) {
GsTex1 reg(val);
if (reg.mmag() != m_state.as_mode.get_filt_enable()) {
m_current_state_has_open_draw = false;
m_state.as_mode.set_filt_enable(reg.mmag());
}
}
void DirectRenderer2::handle_clamp1(u64 val) {
bool clamp_s = val & 0b001;
bool clamp_t = val & 0b100;
if ((clamp_s != m_state.as_mode.get_clamp_s_enable()) ||
(clamp_t != m_state.as_mode.get_clamp_t_enable())) {
m_current_state_has_open_draw = false;
m_state.as_mode.set_clamp_s_enable(clamp_s);
m_state.as_mode.set_clamp_t_enable(clamp_t);
}
}
void DirectRenderer2::handle_prim(u64 val) {
m_state.next_vertex_starts_strip = true;
GsPrim reg(val);
if (reg != m_state.gs_prim) {
m_current_state_has_open_draw = false;
ASSERT(reg.kind() == GsPrim::Kind::TRI_STRIP);
ASSERT(reg.gouraud());
if (!reg.tme()) {
ASSERT(false); // todo, might need this
}
m_state.as_mode.set_fog(reg.fge());
m_state.set_fog_flag(reg.fge());
m_state.as_mode.set_ab(reg.abe());
ASSERT(!reg.aa1());
ASSERT(!reg.fst());
ASSERT(!reg.ctxt());
ASSERT(!reg.fix());
}
}
void DirectRenderer2::handle_st_packed(const u8* data) {
memcpy(&m_state.s, data + 0, 4);
memcpy(&m_state.t, data + 4, 4);
memcpy(&m_state.Q, data + 8, 4);
}
void DirectRenderer2::handle_rgbaq_packed(const u8* data) {
m_state.rgba[0] = data[0];
m_state.rgba[1] = data[4];
m_state.rgba[2] = data[8];
m_state.rgba[3] = data[12];
}
void DirectRenderer2::handle_xyzf2_packed(const u8* data,
SharedRenderState* render_state,
ScopedProfilerNode& prof) {
if (m_vertices.close_to_full()) {
m_stats.flush_due_to_full++;
flush_pending(render_state, prof);
}
u32 x, y;
memcpy(&x, data, 4);
memcpy(&y, data + 4, 4);
u64 upper;
memcpy(&upper, data + 8, 8);
u32 z = (upper >> 4) & 0xffffff;
u8 f = (upper >> 36);
bool adc = !(upper & (1ull << 47));
if (m_state.next_vertex_starts_strip) {
m_state.next_vertex_starts_strip = false;
m_state.strip_warmup = 0;
}
// push the vertex
auto& vert = m_vertices.vertices[m_vertices.next_vertex++];
m_state.strip_warmup++;
if (adc && m_state.strip_warmup >= 3) {
m_vertices.indices[m_vertices.next_index++] = m_vertices.next_vertex - 1;
m_vertices.indices[m_vertices.next_index++] = m_vertices.next_vertex - 2;
m_vertices.indices[m_vertices.next_index++] = m_vertices.next_vertex - 3;
}
if (!m_current_state_has_open_draw) {
m_current_state_has_open_draw = true;
if (m_next_free_draw >= m_draw_buffer.size()) {
ASSERT(false);
}
// pick a texture unit to use
u8 tex_unit = 0;
if (m_next_free_draw > 0) {
tex_unit = (m_draw_buffer[m_next_free_draw - 1].tex_unit + 1) % TEX_UNITS;
}
auto& draw = m_draw_buffer[m_next_free_draw++];
draw.mode = m_state.as_mode;
draw.start_index = m_vertices.next_index;
draw.tbp = m_state.tbp;
draw.fix = m_state.gs_alpha.fix();
// associate this draw with this texture unit.
draw.tex_unit = tex_unit;
m_state.tex_unit = tex_unit;
}
vert.xyz[0] = x;
vert.xyz[1] = y;
vert.xyz[2] = z;
vert.rgba = m_state.rgba;
vert.stq = math::Vector<float, 3>(m_state.s, m_state.t, m_state.Q);
vert.tex_unit = m_state.tex_unit;
vert.fog = f;
vert.flags = m_state.vertex_flags;
}
void DirectRenderer2::handle_alpha1(u64 val) {
GsAlpha reg(val);
if (m_state.gs_alpha != reg) {
m_state.gs_alpha = reg;
m_current_state_has_open_draw = false;
auto a = reg.a_mode();
auto b = reg.b_mode();
auto c = reg.c_mode();
auto d = reg.d_mode();
if (a == GsAlpha::BlendMode::SOURCE && b == GsAlpha::BlendMode::DEST &&
c == GsAlpha::BlendMode::SOURCE && d == GsAlpha::BlendMode::DEST) {
m_state.as_mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_DST_SRC_DST);
} else if (a == GsAlpha::BlendMode::SOURCE && b == GsAlpha::BlendMode::ZERO_OR_FIXED &&
c == GsAlpha::BlendMode::SOURCE && d == GsAlpha::BlendMode::DEST) {
m_state.as_mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_0_SRC_DST);
} else if (a == GsAlpha::BlendMode::ZERO_OR_FIXED && b == GsAlpha::BlendMode::SOURCE &&
c == GsAlpha::BlendMode::SOURCE && d == GsAlpha::BlendMode::DEST) {
m_state.as_mode.set_alpha_blend(DrawMode::AlphaBlend::ZERO_SRC_SRC_DST);
} else if (a == GsAlpha::BlendMode::SOURCE && b == GsAlpha::BlendMode::DEST &&
c == GsAlpha::BlendMode::ZERO_OR_FIXED && d == GsAlpha::BlendMode::DEST) {
m_state.as_mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_DST_FIX_DST);
} else if (a == GsAlpha::BlendMode::SOURCE && b == GsAlpha::BlendMode::SOURCE &&
c == GsAlpha::BlendMode::SOURCE && d == GsAlpha::BlendMode::SOURCE) {
m_state.as_mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_SRC_SRC_SRC);
} else if (a == GsAlpha::BlendMode::SOURCE && b == GsAlpha::BlendMode::ZERO_OR_FIXED &&
c == GsAlpha::BlendMode::DEST && d == GsAlpha::BlendMode::DEST) {
m_state.as_mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_0_DST_DST);
} else {
// unsupported blend: a 0 b 2 c 2 d 1
// lg::error("unsupported blend: a {} b {} c {} d {}", (int)a, (int)b, (int)c, (int)d);
// ASSERT(false);
}
}
}

View file

@ -0,0 +1,139 @@
#pragma once
#include <vector>
#include "common/common_types.h"
#include "game/graphics/opengl_renderer/BucketRenderer.h"
#include "common/dma/gs.h"
class DirectRenderer2 {
public:
DirectRenderer2(u32 max_verts, u32 max_inds, u32 max_draws, const std::string& name);
void init_shaders(ShaderLibrary& shaders);
void reset_state();
void render_gif_data(const u8* data, SharedRenderState* render_state, ScopedProfilerNode& prof);
void flush_pending(SharedRenderState* render_state, ScopedProfilerNode& prof);
void draw_debug_window();
~DirectRenderer2();
private:
static constexpr u8 TEX_UNITS = 10;
void reset_buffers();
void draw_call_loop_simple(SharedRenderState* render_state, ScopedProfilerNode& prof);
void draw_call_loop_grouped(SharedRenderState* render_state, ScopedProfilerNode& prof);
// the GsState is the state of all Gs Registers.
struct GsState {
DrawMode as_mode;
u16 tbp;
GsTest gs_test;
GsTex0 gs_tex0;
GsPrim gs_prim;
GsAlpha gs_alpha;
u8 tex_unit = 0;
float s, t, Q;
math::Vector<u8, 4> rgba;
bool next_vertex_starts_strip = true;
u32 strip_warmup = 0;
u8 vertex_flags = 0;
void set_tcc_flag(bool value) { vertex_flags ^= (-(u8)value ^ vertex_flags) & 1; }
void set_decal_flag(bool value) { vertex_flags ^= (-(u8)value ^ vertex_flags) & 2; }
void set_fog_flag(bool value) { vertex_flags ^= (-(u8)value ^ vertex_flags) & 4; }
} m_state;
// if this is true, then drawing a vertex can just get pushed directly to the vertex buffer.
// if not, we need to set up a new draw
bool m_current_state_has_open_draw = false;
struct Draw {
DrawMode mode;
u32 start_index = -1;
u16 tbp = UINT16_MAX;
u8 fix = 0;
u8 tex_unit = 0;
std::string to_string() const;
std::string to_single_line_string() const;
};
std::vector<Draw> m_draw_buffer;
u32 m_next_free_draw = 0;
struct Vertex {
math::Vector<float, 3> xyz;
math::Vector<u8, 4> rgba;
math::Vector<float, 3> stq;
u8 tex_unit;
u8 flags;
u8 fog;
u8 pad;
std::string print() const;
};
static_assert(sizeof(Vertex) == 32);
struct VertexBuffer {
std::vector<Vertex> vertices;
std::vector<u32> indices;
u32 next_vertex = 0;
u32 next_index = 0;
void push_reset() { indices[next_index++] = UINT32_MAX; }
Vertex& push() {
indices[next_index++] = next_vertex;
return vertices[next_vertex++];
}
bool close_to_full() {
return (next_vertex + 40 > vertices.size()) || (next_index + 40 > indices.size());
}
} m_vertices;
struct {
GLuint vertex_buffer;
GLuint index_buffer;
GLuint vao;
GLuint alpha_reject, color_mult, fog_color;
} m_ogl;
struct Stats {
u32 upload_bytes = 0;
u32 num_uploads = 0;
u32 flush_due_to_full = 0;
float upload_wait = 0;
u32 saved_draws = 0;
} m_stats;
struct Debug {
bool disable_mip = true;
} m_debug;
std::string m_name;
void setup_opengl_for_draw_mode(const Draw& draw, SharedRenderState* render_state);
void setup_opengl_tex(u16 unit,
u16 tbp,
bool filter,
bool clamp_s,
bool clamp_t,
SharedRenderState* render_state);
// gif handlers
void handle_ad(const u8* data);
void handle_test1(u64 val);
void handle_tex0_1(u64 val);
void handle_tex1_1(u64 val);
void handle_clamp1(u64 val);
void handle_prim(u64 val);
void handle_alpha1(u64 val);
void handle_zbuf1(u64 val);
// packed
void handle_st_packed(const u8* data);
void handle_rgbaq_packed(const u8* data);
void handle_xyzf2_packed(const u8* data,
SharedRenderState* render_state,
ScopedProfilerNode& prof);
};

View file

@ -2,7 +2,13 @@
#include "third-party/imgui/imgui.h"
GenericRenderer::GenericRenderer(const std::string& name, BucketId my_id)
: BucketRenderer(name, my_id), m_direct(name, my_id, 0x4000) {}
: BucketRenderer(name, my_id),
m_direct(name, my_id, 0x30000),
m_direct2(30000, 60000, 1000, name) {}
void GenericRenderer::init_shaders(ShaderLibrary& shaders) {
m_direct2.init_shaders(shaders);
}
void GenericRenderer::render(DmaFollower& dma,
SharedRenderState* render_state,
@ -16,7 +22,7 @@ void GenericRenderer::render(DmaFollower& dma,
// the default ALPHA doesn't seem to be right. I don't know what's supposed to set it here.
// although this is definitely a hack, it doesn't seem to cause problems when the first thing to
// draw is transparent.
m_direct.hack_disable_blend();
// m_direct.hack_disable_blend();
// skip if disabled
if (!m_enabled) {
@ -58,10 +64,18 @@ void GenericRenderer::render(DmaFollower& dma,
ASSERT(false);
}
} else if (v0.kind == VifCode::Kind::FLUSHA && v1.kind == VifCode::Kind::DIRECT) {
m_direct.render_gif(data.data, data.size_bytes, render_state, prof);
if (render_state->use_direct2) {
m_direct2.render_gif_data(data.data, render_state, prof);
} else {
m_direct.render_gif(data.data, data.size_bytes, render_state, prof);
}
ASSERT(v1.immediate == data.size_bytes / 16);
} else if (v0.kind == VifCode::Kind::NOP && v1.kind == VifCode::Kind::DIRECT) {
m_direct.render_gif(data.data, data.size_bytes, render_state, prof);
if (render_state->use_direct2) {
m_direct2.render_gif_data(data.data, render_state, prof);
} else {
m_direct.render_gif(data.data, data.size_bytes, render_state, prof);
}
ASSERT(v1.immediate == data.size_bytes / 16);
} else if (v0.kind == VifCode::Kind::STCYCL && v1.kind == VifCode::Kind::UNPACK_V4_32) {
vu.stcycl = v0.immediate;
@ -123,7 +137,11 @@ void GenericRenderer::render(DmaFollower& dma,
}
m_skipped_tags++;
}
m_direct.flush_pending(render_state, prof);
if (render_state->use_direct2) {
m_direct2.flush_pending(render_state, prof);
} else {
m_direct.flush_pending(render_state, prof);
}
}
void GenericRenderer::handle_dma_stream(const u8* data,
@ -297,7 +315,11 @@ void GenericRenderer::mscal(int imm, SharedRenderState* render_state, ScopedProf
void GenericRenderer::xgkick(u16 addr, SharedRenderState* render_state, ScopedProfilerNode& prof) {
if (render_state->enable_generic_xgkick && m_xgkick_idx >= m_min_xgkick &&
m_xgkick_idx < m_max_xgkick) {
m_direct.render_gif(m_buffer.data + (16 * addr), UINT32_MAX, render_state, prof);
if (render_state->use_direct2) {
m_direct2.render_gif_data(m_buffer.data + (16 * addr), render_state, prof);
} else {
m_direct.render_gif(m_buffer.data + (16 * addr), UINT32_MAX, render_state, prof);
}
}
m_xgkick_idx++;
}

View file

@ -2,6 +2,7 @@
#include "game/graphics/opengl_renderer/BucketRenderer.h"
#include "game/graphics/opengl_renderer/DirectRenderer.h"
#include "game/graphics/opengl_renderer/DirectRenderer2.h"
#include "game/common/vu.h"
class GenericRenderer : public BucketRenderer {
@ -9,6 +10,7 @@ class GenericRenderer : public BucketRenderer {
GenericRenderer(const std::string& name, BucketId my_id);
void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override;
void draw_debug_window() override;
void init_shaders(ShaderLibrary& shaders) override;
private:
u32 unpack32_4(const VifCodeUnpack& up, const u8* data, u32 imm);
@ -39,6 +41,7 @@ class GenericRenderer : public BucketRenderer {
int m_skipped_tags = 0;
DirectRenderer m_direct;
DirectRenderer2 m_direct2;
std::string m_debug;
struct Vu {

View file

@ -3,10 +3,16 @@
#include "third-party/imgui/imgui.h"
MercRenderer::MercRenderer(const std::string& name, BucketId my_id)
: BucketRenderer(name, my_id), m_direct(fmt::format("{}-dir", name), my_id, 0x30000) {
: BucketRenderer(name, my_id),
m_direct(fmt::format("{}-dir", name), my_id, 0x30000),
m_direct2(20000, 40000, 1000, name) {
memset(m_buffer.data, 0, sizeof(m_buffer.data));
}
void MercRenderer::init_shaders(ShaderLibrary& shaders) {
m_direct2.init_shaders(shaders);
}
void MercRenderer::render(DmaFollower& dma,
SharedRenderState* render_state,
ScopedProfilerNode& prof) {
@ -41,11 +47,15 @@ void MercRenderer::render(DmaFollower& dma,
// if we reach here, there's stuff to draw
handle_setup(dma, render_state, prof);
m_direct2.reset_state();
m_direct.reset_state();
while (dma.current_tag_offset() != render_state->next_bucket) {
handle_merc_chain(dma, render_state, prof);
}
ASSERT(dma.current_tag_offset() == render_state->next_bucket);
m_direct2.flush_pending(render_state, prof);
m_direct.flush_pending(render_state, prof);
}
@ -336,10 +346,15 @@ void MercRenderer::draw_debug_window() {
ImGui::Checkbox("Normal MSCAL enable", &m_enable_normal_mscals);
ImGui::Checkbox("Prime MSCAL enable", &m_enable_prime_mscals);
ImGui::Checkbox("Send to direct", &m_enable_send_to_direct);
m_direct2.draw_debug_window();
}
void MercRenderer::xgkick(u16 addr, SharedRenderState* render_state, ScopedProfilerNode& prof) {
if (m_enable_send_to_direct && render_state->enable_merc_xgkick) {
m_direct.render_gif(m_buffer.data + (16 * addr), UINT32_MAX, render_state, prof);
if (render_state->use_direct2) {
m_direct2.render_gif_data(m_buffer.data + (16 * addr), render_state, prof);
} else {
m_direct.render_gif(m_buffer.data + (16 * addr), UINT32_MAX, render_state, prof);
}
}
}

View file

@ -4,10 +4,13 @@
#include "common/math/Vector.h"
#include "game/graphics/opengl_renderer/DirectRenderer.h"
#include "game/common/vu.h"
#include "game/graphics/opengl_renderer/DirectRenderer2.h"
class MercRenderer : public BucketRenderer {
public:
MercRenderer(const std::string& name, BucketId my_id);
void init_shaders(ShaderLibrary& shaders) override;
void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override;
void draw_debug_window() override;
@ -79,6 +82,7 @@ class MercRenderer : public BucketRenderer {
u16 xitop();
DirectRenderer m_direct;
DirectRenderer2 m_direct2;
struct {
u32 row[4] = {0, 0, 0, 0};

View file

@ -43,6 +43,7 @@ void GLAPIENTRY opengl_error_callback(GLenum source,
} else if (severity == GL_DEBUG_SEVERITY_HIGH) {
lg::error("[{}] OpenGL error 0x{:X} S{:X} T{:X}: {}", g_current_render, id, source, type,
message);
ASSERT(false);
}
}
@ -219,6 +220,8 @@ void OpenGLRenderer::init_bucket_renderers() {
if (!m_bucket_renderers[i]) {
init_bucket_renderer<EmptyBucketRenderer>(fmt::format("bucket{}", i), (BucketId)i);
}
m_bucket_renderers[i]->init_shaders(m_render_state.shaders);
}
}
@ -295,6 +298,7 @@ void OpenGLRenderer::draw_renderer_selection_window() {
ImGui::Checkbox("Render Debug (slower)", &m_render_state.render_debug);
ImGui::Checkbox("Merc XGKICK", &m_render_state.enable_merc_xgkick);
ImGui::Checkbox("Generic XGKICK", &m_render_state.enable_generic_xgkick);
ImGui::Checkbox("Direct 2", &m_render_state.use_direct2);
for (size_t i = 0; i < m_bucket_renderers.size(); i++) {
auto renderer = m_bucket_renderers[i].get();

View file

@ -75,4 +75,5 @@ ShaderLibrary::ShaderLibrary() {
at(ShaderId::TFRAG3) = {"tfrag3"};
at(ShaderId::TFRAG3_NO_TEX) = {"tfrag3_no_tex"};
at(ShaderId::SPRITE3) = {"sprite3_3d"};
at(ShaderId::DIRECT2) = {"direct2"};
}

View file

@ -32,6 +32,7 @@ enum class ShaderId {
TFRAG3_NO_TEX = 7,
SPRITE = 8,
SPRITE3 = 9,
DIRECT2 = 10,
MAX_SHADERS
};

View file

@ -0,0 +1,75 @@
#version 430 core
out vec4 color;
in vec4 fragment_color;
in vec3 tex_coord;
uniform float alpha_reject;
uniform float color_mult;
uniform vec4 fog_color;
in flat uvec4 tex_info;
in float fog;
layout (binding = 0) uniform sampler2D tex_T0;
layout (binding = 1) uniform sampler2D tex_T1;
layout (binding = 2) uniform sampler2D tex_T2;
layout (binding = 3) uniform sampler2D tex_T3;
layout (binding = 4) uniform sampler2D tex_T4;
layout (binding = 5) uniform sampler2D tex_T5;
layout (binding = 6) uniform sampler2D tex_T6;
layout (binding = 7) uniform sampler2D tex_T7;
layout (binding = 8) uniform sampler2D tex_T8;
layout (binding = 9) uniform sampler2D tex_T9;
vec4 sample_tex(vec2 coord, uint unit) {
switch (unit) {
case 0: return texture(tex_T0, coord);
case 1: return texture(tex_T1, coord);
case 2: return texture(tex_T2, coord);
case 3: return texture(tex_T3, coord);
case 4: return texture(tex_T4, coord);
case 5: return texture(tex_T5, coord);
case 6: return texture(tex_T6, coord);
case 7: return texture(tex_T7, coord);
case 8: return texture(tex_T8, coord);
case 9: return texture(tex_T9, coord);
default : return vec4(1.0, 0, 1.0, 1.0);
}
}
void main() {
vec4 T0 = sample_tex(tex_coord.xy / tex_coord.z, tex_info.x);
// y is tcc
// z is decal
if ((tex_info.y & 1u) == 0) {
if ((tex_info.y & 2u) == 0) {
// modulate + no tcc
color.xyz = fragment_color.xyz * T0.xyz;
color.w = fragment_color.w;
} else {
// decal + no tcc
color.xyz = T0.xyz * 0.5;
color.w = fragment_color.w;
}
} else {
if ((tex_info.y & 2u) == 0) {
// modulate + tcc
color = fragment_color * T0;
} else {
// decal + tcc
color.xyz = T0.xyz * 0.5;
color.w = T0.w;
}
}
color *= 2;
color.xyz *= color_mult;
if (color.a < alpha_reject) {
discard;
}
if ((tex_info.y & 4u) != 0) {
color.xyz = mix(color.xyz, fog_color.xyz / 255., clamp(fog_color.w * (1 - fog), 0, 1));
}
}

View file

@ -0,0 +1,26 @@
#version 430 core
layout (location = 0) in vec3 position_in;
layout (location = 1) in vec4 rgba_in;
layout (location = 2) in vec3 tex_coord_in;
layout (location = 3) in uvec4 byte_info;
out vec4 fragment_color;
out vec3 tex_coord;
out float fog;
// putting all texture info stuff here so it's easier to copy-paste
out flat uvec2 tex_info;
void main() {
gl_Position = vec4((position_in.x - 0x8000) / 0x1000,
-(position_in.y - 0x8000) / 0x800,
position_in.z / 0x800000 - 1., 1.0);
// scissoring area adjust
gl_Position.y *= 512.0/448.0;
fragment_color = vec4(rgba_in.x, rgba_in.y, rgba_in.z, rgba_in.w * 2.);
tex_coord = tex_coord_in;
tex_info = byte_info.xy;
fog = float(byte_info.z) / 255.;
}

View file

@ -31,6 +31,8 @@
namespace {
constexpr bool run_dma_copy = false;
struct GraphicsData {
// vsync
std::mutex sync_mutex;
@ -253,7 +255,6 @@ void render_game_frame(int width, int height, int lbox_width, int lbox_height) {
g_gfx_data->debug_gui.want_save() = false;
}
auto& chain = g_gfx_data->dma_copier.get_last_result();
g_gfx_data->frame_idx_of_input_data = g_gfx_data->frame_idx;
RenderOptions options;
options.window_height_px = height;
@ -268,10 +269,14 @@ void render_game_frame(int width, int height, int lbox_width, int lbox_height) {
if (options.save_screenshot) {
options.screenshot_path = make_output_file_name(g_gfx_data->debug_gui.screenshot_name());
}
g_gfx_data->ogl_renderer.render(DmaFollower(chain.data.data(), chain.start_offset), options);
// g_gfx_data->ogl_renderer.render(DmaFollower(g_gfx_data->dma_copier.get_last_input_data(),
// g_gfx_data->dma_copier.get_last_input_offset()),
// options);
if constexpr (run_dma_copy) {
auto& chain = g_gfx_data->dma_copier.get_last_result();
g_gfx_data->ogl_renderer.render(DmaFollower(chain.data.data(), chain.start_offset), options);
} else {
g_gfx_data->ogl_renderer.render(DmaFollower(g_gfx_data->dma_copier.get_last_input_data(),
g_gfx_data->dma_copier.get_last_input_offset()),
options);
}
}
// before vsync, mark the chain as rendered.
@ -518,7 +523,7 @@ void gl_send_chain(const void* data, u32 offset) {
// The renderers should just operate on DMA chains, so eliminating this step in the future may
// be easy.
g_gfx_data->dma_copier.run(data, offset);
g_gfx_data->dma_copier.set_input_data(data, offset, run_dma_copy);
g_gfx_data->has_data_to_render = true;
g_gfx_data->dma_cv.notify_all();

File diff suppressed because it is too large Load diff