improve performance on intel graphics (#1041)

* improve performance on intel graphics

* more tweaks, add a glfinish button

* remove divide in fragment shader

* temp

* add cpu sky blend

* use vao

* change format

* use floats in direct renderer

* format

* format again

* tfrag ice
This commit is contained in:
water111 2021-12-30 19:38:18 -05:00 committed by GitHub
parent 3f84bd85e7
commit cd9e74819c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
29 changed files with 659 additions and 445 deletions

View file

@ -5,18 +5,15 @@
* Common types shared between the compiler and the runtime for the listener connection.
*/
#ifndef JAK1_LISTENER_COMMON_H
#define JAK1_LISTENER_COMMON_H
#include "common/common_types.h"
/*!
* Header of a DECI2 protocol message
* TODO - there are other copies of this somewhere
* NOTE: we've changed this to use 32-bit integers for len/rsvd
*/
struct Deci2Header {
u16 len; //! size of data following header
u16 rsvd; //! zero, used internally by runtime.
u32 len; //! size of data following header
u32 rsvd; //! zero, used internally by runtime.
u16 proto; //! protocol identification number
u8 src; //! identification code of sender
u8 dst; //! identification code of recipient
@ -64,5 +61,3 @@ struct ListenerMessageHeader {
constexpr int DECI2_PORT = 8112; // TODO - is this a good choice?
constexpr u16 DECI2_PROTOCOL = 0xe042;
#endif // JAK1_LISTENER_COMMON_H

View file

@ -2049,6 +2049,8 @@ void make_tfrag3_data(std::map<u32, std::vector<GroupedDraw>>& draws,
vtx.s = vert.stq.x();
vtx.t = vert.stq.y();
vtx.q = vert.stq.z();
// if this is true, we can remove a divide in the shader
assert(vtx.q == 1.f);
vtx.color_index = vert.rgba / 4;
// assert((vert.rgba >> 2) < 1024); spider cave has 2048?
assert((vert.rgba & 3) == 0);

View file

@ -1965,6 +1965,8 @@ void add_vertices_and_static_draw(tfrag3::TieTree& tree,
vtx.s = vert.tex.x();
vtx.t = vert.tex.y();
vtx.q = vert.tex.z();
// if this is true, we can remove a divide in the shader
assert(vtx.q == 1.f);
if (vert.color_index_index == UINT32_MAX) {
vtx.color_index = 0;
} else {

View file

@ -94,6 +94,8 @@ set(RUNTIME_SOURCE
graphics/opengl_renderer/OpenGLRenderer.cpp
graphics/opengl_renderer/Profiler.cpp
graphics/opengl_renderer/Shader.cpp
graphics/opengl_renderer/SkyBlendCPU.cpp
graphics/opengl_renderer/SkyBlendGPU.cpp
graphics/opengl_renderer/SkyRenderer.cpp
graphics/opengl_renderer/SpriteRenderer.cpp
graphics/opengl_renderer/TextureUploadHandler.cpp

View file

@ -26,9 +26,11 @@ enum class BucketId {
ALPHA_TEX_LEVEL0 = 31,
TFRAG_TRANS0_AND_SKY_BLEND_LEVEL0 = 32,
TFRAG_DIRT_LEVEL0 = 34,
TFRAG_ICE_LEVEL0 = 36,
ALPHA_TEX_LEVEL1 = 38,
TFRAG_TRANS1_AND_SKY_BLEND_LEVEL1 = 39,
TFRAG_DIRT_LEVEL1 = 41,
TFRAG_ICE_LEVEL1 = 43,
PRIS_TEX_LEVEL0 = 48,
PRIS_TEX_LEVEL1 = 51,
WATER_TEX_LEVEL0 = 57,
@ -59,6 +61,8 @@ struct SharedRenderState {
void* ee_main_memory = nullptr;
u32 offset_of_s7;
bool dump_playback = false;
bool use_sky_cpu = true;
};
/*!

View file

@ -8,28 +8,48 @@
DirectRenderer::DirectRenderer(const std::string& name, BucketId my_id, int batch_size, Mode mode)
: BucketRenderer(name, my_id), m_prim_buffer(batch_size), m_mode(mode) {
glGenBuffers(1, &m_ogl.vertex_buffer);
glGenBuffers(1, &m_ogl.color_buffer);
glGenBuffers(1, &m_ogl.st_buffer);
glGenVertexArrays(1, &m_ogl.vao);
glBindVertexArray(m_ogl.vao);
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.vertex_buffer);
m_ogl.vertex_buffer_bytes = batch_size * 3 * 3 * sizeof(u32);
glBufferData(GL_ARRAY_BUFFER, m_ogl.vertex_buffer_bytes, nullptr, GL_DYNAMIC_DRAW);
m_ogl.vertex_buffer_max_verts = batch_size * 3 * 2;
m_ogl.vertex_buffer_bytes = m_ogl.vertex_buffer_max_verts * sizeof(Vertex);
glBufferData(GL_ARRAY_BUFFER, m_ogl.vertex_buffer_bytes, nullptr,
GL_STREAM_DRAW); // todo stream?
glEnableVertexAttribArray(0);
glVertexAttribPointer(
0, // location 0 in the shader
4, // 3 floats per vert
GL_FLOAT, // floats
GL_TRUE, // normalized, ignored,
sizeof(Vertex), //
(void*)offsetof(Vertex, xyz) // offset in array (why is is this a pointer...)
);
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.color_buffer);
m_ogl.color_buffer_bytes = batch_size * 3 * 4 * sizeof(u8);
glBufferData(GL_ARRAY_BUFFER, m_ogl.color_buffer_bytes, nullptr, GL_DYNAMIC_DRAW);
glEnableVertexAttribArray(1);
glVertexAttribPointer(
1, // location 0 in the shader
4, // 4 floats per vert (w unused)
GL_UNSIGNED_BYTE, // floats
GL_TRUE, // normalized, ignored,
sizeof(Vertex), //
(void*)offsetof(Vertex, rgba) // offset in array (why is is this a pointer...)
);
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.st_buffer);
m_ogl.st_buffer_bytes = batch_size * 3 * 3 * sizeof(float);
glBufferData(GL_ARRAY_BUFFER, m_ogl.st_buffer_bytes, nullptr, GL_DYNAMIC_DRAW);
glEnableVertexAttribArray(2);
glVertexAttribPointer(
2, // location 0 in the shader
3, // 3 floats per vert
GL_FLOAT, // floats
GL_FALSE, // normalized, ignored,
sizeof(Vertex), //
(void*)offsetof(Vertex, stq) // offset in array (why is is this a pointer...)
);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(0);
}
DirectRenderer::~DirectRenderer() {
glDeleteBuffers(1, &m_ogl.color_buffer);
glDeleteBuffers(1, &m_ogl.vertex_buffer);
glDeleteBuffers(1, &m_ogl.st_buffer);
glDeleteVertexArrays(1, &m_ogl.vao);
}
@ -158,50 +178,16 @@ void DirectRenderer::flush_pending(SharedRenderState* render_state, ScopedProfil
// render!
// update buffers:
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.vertex_buffer);
glBufferSubData(GL_ARRAY_BUFFER, 0, m_prim_buffer.vert_count * sizeof(math::Vector<u32, 3>),
m_prim_buffer.verts.data());
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.color_buffer);
glBufferSubData(GL_ARRAY_BUFFER, 0, m_prim_buffer.vert_count * sizeof(math::Vector<u8, 4>),
m_prim_buffer.rgba_u8.data());
if (m_prim_gl_state.texture_enable) {
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.st_buffer);
glBufferSubData(GL_ARRAY_BUFFER, 0, m_prim_buffer.vert_count * sizeof(math::Vector<float, 3>),
m_prim_buffer.stqs.data());
u32 vertex_offset = m_ogl.last_vertex_offset;
if (vertex_offset + m_prim_buffer.vert_count >= m_ogl.vertex_buffer_max_verts) {
lg::warn("Buffer wrapped in {} (upcoming size is {}, {} bytes)\n", m_name,
m_prim_buffer.vert_count, m_prim_buffer.vert_count * sizeof(Vertex));
vertex_offset = 0;
}
// setup attributes:
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.vertex_buffer);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, // location 0 in the shader
3, // 3 floats per vert
GL_UNSIGNED_INT, // floats
GL_TRUE, // normalized, ignored,
0, // tightly packed
0 // offset in array (why is is this a pointer...)
);
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.color_buffer);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1, // location 0 in the shader
4, // 3 floats per vert
GL_UNSIGNED_BYTE, // floats
GL_TRUE, // normalized, ignored,
0, // tightly packed
0);
if (m_prim_gl_state.texture_enable) {
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.st_buffer);
glEnableVertexAttribArray(2);
glVertexAttribPointer(2, // location 0 in the shader
3, // 3 floats per vert
GL_FLOAT, // floats
GL_FALSE, // normalized, ignored,
0, // tightly packed
0);
glActiveTexture(GL_TEXTURE0);
}
// assert(false);
glBufferSubData(GL_ARRAY_BUFFER, vertex_offset * sizeof(Vertex),
m_prim_buffer.vert_count * sizeof(Vertex), m_prim_buffer.vertices.data());
glActiveTexture(GL_TEXTURE0);
int draw_count = 0;
if (m_mode == Mode::SPRITE_CPU) {
@ -214,13 +200,13 @@ void DirectRenderer::flush_pending(SharedRenderState* render_state, ScopedProfil
}
if (m_sprite_mode.do_first_draw) {
glDrawArrays(GL_TRIANGLES, 0, m_prim_buffer.vert_count);
glDrawArrays(GL_TRIANGLES, vertex_offset, m_prim_buffer.vert_count);
draw_count++;
}
if (m_sprite_mode.do_second_draw) {
render_state->shaders[ShaderId::SPRITE_CPU_AFAIL].activate();
glDepthMask(GL_FALSE);
glDrawArrays(GL_TRIANGLES, 0, m_prim_buffer.vert_count);
glDrawArrays(GL_TRIANGLES, vertex_offset, m_prim_buffer.vert_count);
if (m_test_state.depth_writes) {
glDepthMask(GL_TRUE);
}
@ -230,7 +216,7 @@ void DirectRenderer::flush_pending(SharedRenderState* render_state, ScopedProfil
draw_count++;
}
} else {
glDrawArrays(GL_TRIANGLES, 0, m_prim_buffer.vert_count);
glDrawArrays(GL_TRIANGLES, vertex_offset, m_prim_buffer.vert_count);
draw_count++;
}
@ -238,7 +224,7 @@ void DirectRenderer::flush_pending(SharedRenderState* render_state, ScopedProfil
render_state->shaders[ShaderId::DEBUG_RED].activate();
glDisable(GL_BLEND);
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
glDrawArrays(GL_TRIANGLES, 0, m_prim_buffer.vert_count);
glDrawArrays(GL_TRIANGLES, vertex_offset, m_prim_buffer.vert_count);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
draw_count++;
}
@ -249,6 +235,8 @@ void DirectRenderer::flush_pending(SharedRenderState* render_state, ScopedProfil
prof.add_draw_call(draw_count);
m_stats.triangles += n_tris;
m_stats.draw_calls += draw_count;
m_ogl.last_vertex_offset = vertex_offset + m_prim_buffer.vert_count;
m_ogl.last_vertex_offset = (m_ogl.last_vertex_offset + 3) & ~3;
m_prim_buffer.vert_count = 0;
}
@ -1033,6 +1021,8 @@ void DirectRenderer::reset_state() {
m_prim_building = PrimBuildState();
m_ogl.last_vertex_offset = 0;
m_stats = {};
}
@ -1075,17 +1065,18 @@ void DirectRenderer::PrimGlState::from_register(GsPrim reg) {
}
DirectRenderer::PrimitiveBuffer::PrimitiveBuffer(int max_triangles) {
rgba_u8.resize(max_triangles * 3);
verts.resize(max_triangles * 3);
stqs.resize(max_triangles * 3);
vertices.resize(max_triangles * 3);
max_verts = max_triangles * 3;
}
void DirectRenderer::PrimitiveBuffer::push(const math::Vector<u8, 4>& rgba,
const math::Vector<u32, 3>& vert,
const math::Vector<float, 3>& st) {
rgba_u8[vert_count] = rgba;
verts[vert_count] = vert;
stqs[vert_count] = st;
auto& v = vertices[vert_count];
v.rgba = rgba;
v.xyz[0] = (float)vert[0] / UINT32_MAX;
v.xyz[1] = (float)vert[1] / UINT32_MAX;
v.xyz[2] = (float)vert[2] / UINT32_MAX;
v.stq = st;
vert_count++;
}

View file

@ -179,11 +179,16 @@ class DirectRenderer : public BucketRenderer {
} m_prim_building;
struct Vertex {
math::Vector<float, 4> xyz;
math::Vector<float, 3> stq;
math::Vector<u8, 4> rgba;
};
static_assert(sizeof(Vertex) == 32);
struct PrimitiveBuffer {
PrimitiveBuffer(int max_triangles);
std::vector<math::Vector<u8, 4>> rgba_u8;
std::vector<math::Vector<u32, 3>> verts;
std::vector<math::Vector<float, 3>> stqs;
std::vector<Vertex> vertices;
int vert_count = 0;
int max_verts = 0;
@ -195,11 +200,11 @@ class DirectRenderer : public BucketRenderer {
} m_prim_buffer;
struct {
GLuint vertex_buffer, color_buffer, st_buffer;
GLuint vertex_buffer;
GLuint vao;
u32 vertex_buffer_bytes = 0;
u32 color_buffer_bytes = 0;
u32 st_buffer_bytes = 0;
u32 vertex_buffer_max_verts = 0;
u32 last_vertex_offset = 0;
} m_ogl;
struct {

View file

@ -13,6 +13,10 @@
// for the vif callback
#include "game/kernel/kmachine.h"
namespace {
std::string g_current_render;
}
/*!
* OpenGL Error callback. If we do something invalid, this will be called.
@ -28,11 +32,14 @@ void GLAPIENTRY opengl_error_callback(GLenum source,
// On some drivers this prints on every single texture upload, which is too much spam
lg::debug("OpenGL notification 0x{:X} S{:X} T{:X}: {}", id, source, type, message);
} else if (severity == GL_DEBUG_SEVERITY_LOW) {
lg::info("OpenGL message 0x{:X} S{:X} T{:X}: {}", id, source, type, message);
lg::info("[{}] OpenGL message 0x{:X} S{:X} T{:X}: {}", g_current_render, id, source, type,
message);
} else if (severity == GL_DEBUG_SEVERITY_MEDIUM) {
lg::warn("OpenGL warn 0x{:X} S{:X} T{:X}: {}", id, source, type, message);
lg::warn("[{}] OpenGL warn 0x{:X} S{:X} T{:X}: {}", g_current_render, id, source, type,
message);
} else if (severity == GL_DEBUG_SEVERITY_HIGH) {
lg::error("OpenGL error 0x{:X} S{:X} T{:X}: {}", id, source, type, message);
lg::error("[{}] OpenGL error 0x{:X} S{:X} T{:X}: {}", g_current_render, id, source, type,
message);
}
}
@ -60,11 +67,7 @@ void OpenGLRenderer::init_bucket_renderers() {
std::vector<tfrag3::TFragmentTreeKind> normal_tfrags = {tfrag3::TFragmentTreeKind::NORMAL,
tfrag3::TFragmentTreeKind::LOWRES};
std::vector<tfrag3::TFragmentTreeKind> dirt_tfrags = {tfrag3::TFragmentTreeKind::DIRT};
// TODO ice
// std::vector<tfrag3::TFragmentTreeKind> ice_tfrags = {tfrag3::TFragmentTreeKind::ICE};
// std::vector<tfrag3::TFragmentTreeKind> trans_tfrags = {tfrag3::TFragmentTreeKind::TRANS,
// tfrag3::TFragmentTreeKind::LOWRES_TRANS};
std::vector<tfrag3::TFragmentTreeKind> ice_tfrags = {tfrag3::TFragmentTreeKind::ICE};
init_bucket_renderer<EmptyBucketRenderer>("bucket0", BucketId::BUCKET0);
init_bucket_renderer<SkyRenderer>("sky", BucketId::SKY_DRAW);
@ -79,13 +82,18 @@ void OpenGLRenderer::init_bucket_renderers() {
init_bucket_renderer<TextureUploadHandler>("shrub-tex-1", BucketId::SHRUB_TEX_LEVEL1);
init_bucket_renderer<TextureUploadHandler>("alpha-tex-0", BucketId::ALPHA_TEX_LEVEL0);
init_bucket_renderer<TextureUploadHandler>("alpha-tex-1", BucketId::ALPHA_TEX_LEVEL1);
auto sky_blender = std::make_shared<SkyBlender>();
auto sky_gpu_blender = std::make_shared<SkyBlendGPU>();
auto sky_cpu_blender = std::make_shared<SkyBlendCPU>();
init_bucket_renderer<SkyBlendHandler>("sky-blend-and-tfrag-trans-0",
BucketId::TFRAG_TRANS0_AND_SKY_BLEND_LEVEL0, sky_blender);
BucketId::TFRAG_TRANS0_AND_SKY_BLEND_LEVEL0,
sky_gpu_blender, sky_cpu_blender);
init_bucket_renderer<TFragment>("tfrag-dirt-0", BucketId::TFRAG_DIRT_LEVEL0, dirt_tfrags, false);
init_bucket_renderer<TFragment>("tfrag-ice-0", BucketId::TFRAG_ICE_LEVEL0, ice_tfrags, false);
init_bucket_renderer<SkyBlendHandler>("sky-blend-and-tfrag-trans-1",
BucketId::TFRAG_TRANS1_AND_SKY_BLEND_LEVEL1, sky_blender);
BucketId::TFRAG_TRANS1_AND_SKY_BLEND_LEVEL1,
sky_gpu_blender, sky_cpu_blender);
init_bucket_renderer<TFragment>("tfrag-dirt-1", BucketId::TFRAG_DIRT_LEVEL1, dirt_tfrags, false);
init_bucket_renderer<TFragment>("tfrag-ice-1", BucketId::TFRAG_ICE_LEVEL1, ice_tfrags, false);
init_bucket_renderer<TextureUploadHandler>("pris-tex-0", BucketId::PRIS_TEX_LEVEL0);
init_bucket_renderer<TextureUploadHandler>("pris-tex-1", BucketId::PRIS_TEX_LEVEL1);
init_bucket_renderer<TextureUploadHandler>("water-tex-0", BucketId::WATER_TEX_LEVEL0);
@ -94,7 +102,7 @@ void OpenGLRenderer::init_bucket_renderers() {
init_bucket_renderer<SpriteRenderer>("sprite", BucketId::SPRITE);
init_bucket_renderer<DirectRenderer>("debug-draw-0", BucketId::DEBUG_DRAW_0, 1024,
DirectRenderer::Mode::NORMAL);
init_bucket_renderer<DirectRenderer>("debug-draw-1", BucketId::DEBUG_DRAW_1, 1024,
init_bucket_renderer<DirectRenderer>("debug-draw-1", BucketId::DEBUG_DRAW_1, 4096,
DirectRenderer::Mode::NORMAL);
// for now, for any unset renderers, just set them to an EmptyBucketRenderer.
@ -163,6 +171,9 @@ void OpenGLRenderer::serialize(Serializer& ser) {
*/
void OpenGLRenderer::draw_renderer_selection_window() {
ImGui::Begin("Renderer Debug");
ImGui::Checkbox("Sky CPU", &m_render_state.use_sky_cpu);
for (size_t i = 0; i < m_bucket_renderers.size(); i++) {
auto renderer = m_bucket_renderers[i].get();
if (renderer && !renderer->empty()) {
@ -232,8 +243,11 @@ void OpenGLRenderer::dispatch_buckets(DmaFollower dma, ScopedProfilerNode& prof)
for (int bucket_id = 0; bucket_id < (int)BucketId::MAX_BUCKETS; bucket_id++) {
auto& renderer = m_bucket_renderers[bucket_id];
auto bucket_prof = prof.make_scoped_child(renderer->name_and_id());
// lg::info("Render: {} start\n", renderer->name_and_id());
g_current_render = renderer->name_and_id();
renderer->render(dma, &m_render_state, bucket_prof);
// should have ended at the start of the next chain
// lg::info("Render: {} end\n", renderer->name_and_id());
// should have ended at the start of the next chain
assert(dma.current_tag_offset() == m_render_state.next_bucket);
m_render_state.next_bucket += 16;
@ -241,6 +255,7 @@ void OpenGLRenderer::dispatch_buckets(DmaFollower dma, ScopedProfilerNode& prof)
vif_interrupt_callback();
}
}
g_current_render = "";
// TODO ending data.
}

View file

@ -0,0 +1,166 @@
#include "SkyBlendCPU.h"
#include "game/graphics/opengl_renderer/AdgifHandler.h"
#include <immintrin.h>
SkyBlendCPU::SkyBlendCPU() {
glGenTextures(2, m_textures);
for (int i = 0; i < 2; i++) {
glBindTexture(GL_TEXTURE_2D, m_textures[i]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_sizes[i], m_sizes[i], 0, GL_RGBA,
GL_UNSIGNED_INT_8_8_8_8_REV, 0);
m_texture_data[i].resize(4 * m_sizes[i] * m_sizes[i]);
}
}
SkyBlendCPU::~SkyBlendCPU() {
glDeleteTextures(2, m_textures);
}
void blend_sky_initial_fast(u8 intensity, u8* out, const u8* in, u32 size) {
__m256i intensity_vec = _mm256_set1_epi16(intensity);
for (u32 i = 0; i < size / 16; i++) {
__m128i tex_data8 = _mm_loadu_si128((const __m128i*)(in + (i * 16)));
__m256i tex_data16 = _mm256_cvtepu8_epi16(tex_data8);
tex_data16 = _mm256_mullo_epi16(tex_data16, intensity_vec);
tex_data16 = _mm256_srli_epi16(tex_data16, 7);
auto hi = _mm256_extracti128_si256(tex_data16, 1);
auto result = _mm_packus_epi16(_mm256_castsi256_si128(tex_data16), hi);
_mm_storeu_si128((__m128i*)(out + (i * 16)), result);
}
}
void blend_sky_fast(u8 intensity, u8* out, const u8* in, u32 size) {
__m256i intensity_vec = _mm256_set1_epi16(intensity);
for (u32 i = 0; i < size / 16; i++) {
__m128i tex_data8 = _mm_loadu_si128((const __m128i*)(in + (i * 16)));
__m128i out_val = _mm_loadu_si128((const __m128i*)(out + (i * 16)));
__m256i tex_data16 = _mm256_cvtepu8_epi16(tex_data8);
tex_data16 = _mm256_mullo_epi16(tex_data16, intensity_vec);
tex_data16 = _mm256_srli_epi16(tex_data16, 7);
auto hi = _mm256_extracti128_si256(tex_data16, 1);
auto result = _mm_packus_epi16(_mm256_castsi256_si128(tex_data16), hi);
out_val = _mm_add_epi16(out_val, result);
_mm_storeu_si128((__m128i*)(out + (i * 16)), out_val);
}
}
SkyBlendStats SkyBlendCPU::do_sky_blends(DmaFollower& dma,
SharedRenderState* render_state,
ScopedProfilerNode& prof) {
SkyBlendStats stats;
Timer sky_timer;
while (dma.current_tag().qwc == 6) {
// assuming that the vif and gif-tag is correct
auto setup_data = dma.read_and_advance();
if (render_state->dump_playback) {
// continue;
}
// first is an adgif
AdgifHelper adgif(setup_data.data + 16);
assert(adgif.is_normal_adgif());
assert(adgif.alpha().data == 0x8000000068); // Cs + Cd
// next is the actual draw
auto draw_data = dma.read_and_advance();
assert(draw_data.size_bytes == 6 * 16);
GifTag draw_or_blend_tag(draw_data.data);
// the first draw overwrites the previous frame's draw by disabling alpha blend (ABE = 0)
bool is_first_draw = !GsPrim(draw_or_blend_tag.prim()).abe();
// here's we're relying on the format of the drawing to get the alpha/offset.
u32 coord;
u32 intensity;
memcpy(&coord, draw_data.data + (5 * 16), 4);
memcpy(&intensity, draw_data.data + 16, 4);
// we didn't parse the render-to-texture setup earlier, so we need a way to tell sky from
// clouds. we can look at the drawing coordinates to tell - the sky is smaller than the clouds.
int buffer_idx = 0;
if (coord == 0x200) {
// sky
buffer_idx = 0;
} else if (coord == 0x400) {
buffer_idx = 1;
} else {
assert(false); // bad data
}
// look up the source texture
auto tex = render_state->texture_pool->lookup(adgif.tex0().tbp0());
assert(tex);
assert(!tex->only_on_gpu); // we need the actual data!!
// slow version
/*
if (is_first_draw) {
memset(m_texture_data[buffer_idx].data(), 0, m_texture_data[buffer_idx].size());
}
// intensities should be 0-128 (maybe higher is okay, but I don't see how this could be
// generated with the GOAL code.)
assert(intensity <= 128);
assert(m_texture_data[buffer_idx].size() == tex->data.size());
for (size_t i = 0; i < m_texture_data[buffer_idx].size(); i++) {
u32 val = tex->data[i] * intensity;
val >>= 7;
m_texture_data[buffer_idx][i] += val;
}
*/
if (is_first_draw) {
blend_sky_initial_fast(intensity, m_texture_data[buffer_idx].data(), tex->data.data(),
tex->data.size());
} else {
blend_sky_fast(intensity, m_texture_data[buffer_idx].data(), tex->data.data(),
tex->data.size());
}
if (buffer_idx == 0) {
if (is_first_draw) {
stats.sky_draws++;
} else {
stats.sky_blends++;
}
} else {
if (is_first_draw) {
stats.cloud_draws++;
} else {
stats.cloud_blends++;
}
}
}
// put in pool.
for (int i = 0; i < 2; i++) {
// todo - these are hardcoded and rely on the vram layout.
u32 tbp = i == 0 ? 8064 : 8096;
// lookup existing, or create a new entry
TextureRecord* tex = render_state->texture_pool->lookup(tbp);
if (!tex) {
auto tsp = std::make_shared<TextureRecord>();
render_state->texture_pool->set_texture(tbp, tsp);
tex = tsp.get();
}
// update it
glBindTexture(GL_TEXTURE_2D, m_textures[i]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_sizes[i], m_sizes[i], 0, GL_RGBA,
GL_UNSIGNED_INT_8_8_8_8_REV, m_texture_data[i].data());
tex->gpu_texture = m_textures[i];
tex->on_gpu = true;
tex->only_on_gpu = true;
tex->do_gc = false;
tex->w = m_sizes[i];
tex->h = m_sizes[i];
tex->name = fmt::format("PC-SKY-{}", i);
}
// fmt::print("sky blend took {:.2f} ms\n", sky_timer.getMs());
return stats;
}

View file

@ -0,0 +1,21 @@
#pragma once
#include "common/dma/dma_chain_read.h"
#include "game/graphics/opengl_renderer/BucketRenderer.h"
#include "game/graphics/pipelines/opengl.h"
#include "game/graphics/opengl_renderer/SkyBlendCommon.h"
class SkyBlendCPU {
public:
SkyBlendCPU();
~SkyBlendCPU();
SkyBlendStats do_sky_blends(DmaFollower& dma,
SharedRenderState* render_state,
ScopedProfilerNode& prof);
private:
GLuint m_textures[2]; // sky, clouds
static constexpr int m_sizes[2] = {32, 64};
std::vector<u8> m_texture_data[2];
};

View file

@ -0,0 +1,8 @@
#pragma once
struct SkyBlendStats {
int sky_draws = 0;
int cloud_draws = 0;
int sky_blends = 0;
int cloud_blends = 0;
};

View file

@ -0,0 +1,223 @@
#include "SkyBlendGPU.h"
#include "common/log/log.h"
#include "game/graphics/opengl_renderer/AdgifHandler.h"
SkyBlendGPU::SkyBlendGPU() {
// generate textures for sky blending
glGenFramebuffers(2, m_framebuffers);
glGenTextures(2, m_textures);
GLint old_framebuffer;
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_framebuffer);
// setup the framebuffers
for (int i = 0; i < 2; i++) {
glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffers[i]);
glBindTexture(GL_TEXTURE_2D, m_textures[i]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_sizes[i], m_sizes[i], 0, GL_RGBA,
GL_UNSIGNED_INT_8_8_8_8_REV, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_textures[i], 0);
GLenum draw_buffers[1] = {GL_COLOR_ATTACHMENT0};
glDrawBuffers(1, draw_buffers);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
lg::error("SkyTextureHandler setup failed.");
}
}
glBindFramebuffer(GL_FRAMEBUFFER, 0);
glGenBuffers(1, &m_gl_vertex_buffer);
glBindBuffer(GL_ARRAY_BUFFER, m_gl_vertex_buffer);
glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex) * 6, nullptr, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, old_framebuffer);
// we only draw squares
m_vertex_data[0].x = 0;
m_vertex_data[0].y = 0;
m_vertex_data[1].x = 1;
m_vertex_data[1].y = 0;
m_vertex_data[2].x = 0;
m_vertex_data[2].y = 1;
m_vertex_data[3].x = 1;
m_vertex_data[3].y = 0;
m_vertex_data[4].x = 0;
m_vertex_data[4].y = 1;
m_vertex_data[5].x = 1;
m_vertex_data[5].y = 1;
}
SkyBlendGPU::~SkyBlendGPU() {
glDeleteFramebuffers(2, m_framebuffers);
glDeleteBuffers(1, &m_gl_vertex_buffer);
glDeleteTextures(2, m_textures);
}
SkyBlendStats SkyBlendGPU::do_sky_blends(DmaFollower& dma,
SharedRenderState* render_state,
ScopedProfilerNode& prof) {
SkyBlendStats stats;
GLuint vao;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
GLint old_viewport[4];
glGetIntegerv(GL_VIEWPORT, old_viewport);
GLint old_framebuffer;
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_framebuffer);
while (dma.current_tag().qwc == 6) {
// assuming that the vif and gif-tag is correct
auto setup_data = dma.read_and_advance();
if (render_state->dump_playback) {
// continue;
}
// first is an adgif
AdgifHelper adgif(setup_data.data + 16);
assert(adgif.is_normal_adgif());
assert(adgif.alpha().data == 0x8000000068); // Cs + Cd
// next is the actual draw
auto draw_data = dma.read_and_advance();
assert(draw_data.size_bytes == 6 * 16);
GifTag draw_or_blend_tag(draw_data.data);
// the first draw overwrites the previous frame's draw by disabling alpha blend (ABE = 0)
bool is_first_draw = !GsPrim(draw_or_blend_tag.prim()).abe();
// here's we're relying on the format of the drawing to get the alpha/offset.
u32 coord;
u32 intensity;
memcpy(&coord, draw_data.data + (5 * 16), 4);
memcpy(&intensity, draw_data.data + 16, 4);
// we didn't parse the render-to-texture setup earlier, so we need a way to tell sky from
// clouds. we can look at the drawing coordinates to tell - the sky is smaller than the clouds.
int buffer_idx = 0;
if (coord == 0x200) {
// sky
buffer_idx = 0;
} else if (coord == 0x400) {
buffer_idx = 1;
} else {
assert(false); // bad data
}
// look up the source texture
auto tex = render_state->texture_pool->lookup(adgif.tex0().tbp0());
assert(tex);
if (!tex->on_gpu) {
render_state->texture_pool->upload_to_gpu(tex);
}
// setup for rendering!
glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffers[buffer_idx]);
glViewport(0, 0, m_sizes[buffer_idx], m_sizes[buffer_idx]);
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_textures[buffer_idx], 0);
render_state->shaders[ShaderId::SKY_BLEND].activate();
// if the first is set, it disables alpha. we can just clear here, so it's easier to find
// in renderdoc.
if (is_first_draw) {
float clear[4] = {0, 0, 0, 0};
glClearBufferfv(GL_COLOR, 0, clear);
}
// intensities should be 0-128 (maybe higher is okay, but I don't see how this could be
// generated with the GOAL code.)
assert(intensity <= 128);
// todo - could do this on the GPU, but probably not worth it for <20 triangles...
float intensity_float = intensity / 128.f;
for (auto& vert : m_vertex_data) {
vert.intensity = intensity_float;
}
glDisable(GL_DEPTH_TEST);
glEnable(GL_BLEND);
// will add.
glBlendFunc(GL_ONE, GL_ONE);
// setup draw data
glBindBuffer(GL_ARRAY_BUFFER, m_gl_vertex_buffer);
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vertex) * 6, m_vertex_data);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, // location 0 in the shader
3, // 3 floats per vert
GL_FLOAT, // floats
GL_TRUE, // normalized, ignored,
0, // tightly packed
0
);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, tex->gpu_texture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glUniform1i(glGetUniformLocation(render_state->shaders[ShaderId::SKY_BLEND].id(), "T0"), 0);
// Draw a sqaure
glDrawArrays(GL_TRIANGLES, 0, 6);
// 1 draw, 2 triangles
prof.add_draw_call(1);
prof.add_tri(2);
if (buffer_idx == 0) {
if (is_first_draw) {
stats.sky_draws++;
} else {
stats.sky_blends++;
}
} else {
if (is_first_draw) {
stats.cloud_draws++;
} else {
stats.cloud_blends++;
}
}
}
// put in pool.
for (int i = 0; i < 2; i++) {
// todo - these are hardcoded and rely on the vram layout.
u32 tbp = i == 0 ? 8064 : 8096;
// lookup existing, or create a new entry
TextureRecord* tex = render_state->texture_pool->lookup(tbp);
if (!tex) {
auto tsp = std::make_shared<TextureRecord>();
render_state->texture_pool->set_texture(tbp, tsp);
tex = tsp.get();
}
// update it
tex->gpu_texture = m_textures[i];
tex->on_gpu = true;
tex->only_on_gpu = true;
tex->do_gc = false;
tex->w = m_sizes[i];
tex->h = m_sizes[i];
tex->name = fmt::format("PC-SKY-{}", i);
}
glViewport(old_viewport[0], old_viewport[1], old_viewport[2], old_viewport[3]);
glBindFramebuffer(GL_FRAMEBUFFER, old_framebuffer);
glBindVertexArray(0);
glDeleteVertexArrays(1, &vao);
return stats;
}

View file

@ -0,0 +1,29 @@
#include "common/dma/dma_chain_read.h"
#include "game/graphics/opengl_renderer/BucketRenderer.h"
#include "game/graphics/pipelines/opengl.h"
#include "game/graphics/opengl_renderer/SkyBlendCommon.h"
class SkyBlendGPU {
public:
SkyBlendGPU();
~SkyBlendGPU();
SkyBlendStats do_sky_blends(DmaFollower& dma,
SharedRenderState* render_state,
ScopedProfilerNode& prof);
private:
GLuint m_framebuffers[2]; // sky, clouds
GLuint m_textures[2]; // sky, clouds
int m_sizes[2] = {32, 64};
GLuint m_gl_vertex_buffer;
struct Vertex {
float x = 0;
float y = 0;
float intensity = 0;
};
Vertex m_vertex_data[6];
};

View file

@ -19,230 +19,13 @@
// size of the sky texture is 64x96, but it's actually a 64x64 (clouds) and a 32x32 (sky)
SkyBlender::SkyBlender() {
// generate textures for sky blending
glGenFramebuffers(2, m_framebuffers);
glGenTextures(2, m_textures);
GLint old_framebuffer;
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_framebuffer);
// setup the framebuffers
for (int i = 0; i < 2; i++) {
glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffers[i]);
glBindTexture(GL_TEXTURE_2D, m_textures[i]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_sizes[i], m_sizes[i], 0, GL_RGBA,
GL_UNSIGNED_INT_8_8_8_8_REV, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_textures[i], 0);
GLenum draw_buffers[1] = {GL_COLOR_ATTACHMENT0};
glDrawBuffers(1, draw_buffers);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
lg::error("SkyTextureHandler setup failed.");
}
}
glBindFramebuffer(GL_FRAMEBUFFER, 0);
glGenBuffers(1, &m_gl_vertex_buffer);
glBindBuffer(GL_ARRAY_BUFFER, m_gl_vertex_buffer);
glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex) * 6, nullptr, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, old_framebuffer);
// we only draw squares
m_vertex_data[0].x = 0;
m_vertex_data[0].y = 0;
m_vertex_data[1].x = 1;
m_vertex_data[1].y = 0;
m_vertex_data[2].x = 0;
m_vertex_data[2].y = 1;
m_vertex_data[3].x = 1;
m_vertex_data[3].y = 0;
m_vertex_data[4].x = 0;
m_vertex_data[4].y = 1;
m_vertex_data[5].x = 1;
m_vertex_data[5].y = 1;
}
SkyBlender::~SkyBlender() {
glDeleteFramebuffers(2, m_framebuffers);
glDeleteBuffers(1, &m_gl_vertex_buffer);
glDeleteTextures(2, m_textures);
}
SkyBlender::Stats SkyBlender::do_sky_blends(DmaFollower& dma,
SharedRenderState* render_state,
ScopedProfilerNode& prof) {
Stats stats;
GLuint vao;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
GLint old_viewport[4];
glGetIntegerv(GL_VIEWPORT, old_viewport);
GLint old_framebuffer;
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_framebuffer);
while (dma.current_tag().qwc == 6) {
// assuming that the vif and gif-tag is correct
auto setup_data = dma.read_and_advance();
if (render_state->dump_playback) {
// continue;
}
// first is an adgif
AdgifHelper adgif(setup_data.data + 16);
assert(adgif.is_normal_adgif());
assert(adgif.alpha().data == 0x8000000068); // Cs + Cd
// next is the actual draw
auto draw_data = dma.read_and_advance();
assert(draw_data.size_bytes == 6 * 16);
GifTag draw_or_blend_tag(draw_data.data);
// the first draw overwrites the previous frame's draw by disabling alpha blend (ABE = 0)
bool is_first_draw = !GsPrim(draw_or_blend_tag.prim()).abe();
// here's we're relying on the format of the drawing to get the alpha/offset.
u32 coord;
u32 intensity;
memcpy(&coord, draw_data.data + (5 * 16), 4);
memcpy(&intensity, draw_data.data + 16, 4);
// we didn't parse the render-to-texture setup earlier, so we need a way to tell sky from
// clouds. we can look at the drawing coordinates to tell - the sky is smaller than the clouds.
int buffer_idx = 0;
if (coord == 0x200) {
// sky
buffer_idx = 0;
} else if (coord == 0x400) {
buffer_idx = 1;
} else {
assert(false); // bad data
}
// look up the source texture
auto tex = render_state->texture_pool->lookup(adgif.tex0().tbp0());
assert(tex);
if (!tex->on_gpu) {
render_state->texture_pool->upload_to_gpu(tex);
}
// setup for rendering!
glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffers[buffer_idx]);
glViewport(0, 0, m_sizes[buffer_idx], m_sizes[buffer_idx]);
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_textures[buffer_idx], 0);
render_state->shaders[ShaderId::SKY_BLEND].activate();
// if the first is set, it disables alpha. we can just clear here, so it's easier to find
// in renderdoc.
if (is_first_draw) {
float clear[4] = {0, 0, 0, 0};
glClearBufferfv(GL_COLOR, 0, clear);
}
// intensities should be 0-128 (maybe higher is okay, but I don't see how this could be
// generated with the GOAL code.)
assert(intensity <= 128);
// todo - could do this on the GPU, but probably not worth it for <20 triangles...
float intensity_float = intensity / 128.f;
for (auto& vert : m_vertex_data) {
vert.intensity = intensity_float;
}
glDisable(GL_DEPTH_TEST);
glEnable(GL_BLEND);
// will add.
glBlendFunc(GL_ONE, GL_ONE);
// setup draw data
glBindBuffer(GL_ARRAY_BUFFER, m_gl_vertex_buffer);
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vertex) * 6, m_vertex_data);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, // location 0 in the shader
3, // 3 floats per vert
GL_FLOAT, // floats
GL_TRUE, // normalized, ignored,
0, // tightly packed
0
);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, tex->gpu_texture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glUniform1i(glGetUniformLocation(render_state->shaders[ShaderId::SKY_BLEND].id(), "T0"), 0);
// Draw a sqaure
glDrawArrays(GL_TRIANGLES, 0, 6);
// 1 draw, 2 triangles
prof.add_draw_call(1);
prof.add_tri(2);
if (buffer_idx == 0) {
if (is_first_draw) {
stats.sky_draws++;
} else {
stats.sky_blends++;
}
} else {
if (is_first_draw) {
stats.cloud_draws++;
} else {
stats.cloud_blends++;
}
}
}
// put in pool.
for (int i = 0; i < 2; i++) {
// todo - these are hardcoded and rely on the vram layout.
u32 tbp = i == 0 ? 8064 : 8096;
// lookup existing, or create a new entry
TextureRecord* tex = render_state->texture_pool->lookup(tbp);
if (!tex) {
auto tsp = std::make_shared<TextureRecord>();
render_state->texture_pool->set_texture(tbp, tsp);
tex = tsp.get();
}
// update it
tex->gpu_texture = m_textures[i];
tex->on_gpu = true;
tex->only_on_gpu = true;
tex->do_gc = false;
tex->w = m_sizes[i];
tex->h = m_sizes[i];
tex->name = fmt::format("PC-SKY-{}", i);
}
glViewport(old_viewport[0], old_viewport[1], old_viewport[2], old_viewport[3]);
glBindFramebuffer(GL_FRAMEBUFFER, old_framebuffer);
glBindVertexArray(0);
glDeleteVertexArrays(1, &vao);
return stats;
}
SkyBlendHandler::SkyBlendHandler(const std::string& name,
BucketId my_id,
std::shared_ptr<SkyBlender> shared_blender)
std::shared_ptr<SkyBlendGPU> shared_blender,
std::shared_ptr<SkyBlendCPU> shared_blender_cpu)
: BucketRenderer(name, my_id),
m_shared_blender(shared_blender),
m_shared_gpu_blender(shared_blender),
m_shared_cpu_blender(shared_blender_cpu),
m_tfrag_renderer(fmt::format("tfrag-{}", name),
my_id,
{tfrag3::TFragmentTreeKind::TRANS, tfrag3::TFragmentTreeKind::LOWRES_TRANS},
@ -258,14 +41,19 @@ void SkyBlendHandler::handle_sky_copies(DmaFollower& dma,
}
return;
} else {
m_stats = m_shared_blender->do_sky_blends(dma, render_state, prof);
if (render_state->use_sky_cpu) {
m_gpu_stats = m_shared_cpu_blender->do_sky_blends(dma, render_state, prof);
} else {
m_gpu_stats = m_shared_gpu_blender->do_sky_blends(dma, render_state, prof);
}
}
}
void SkyBlendHandler::render(DmaFollower& dma,
SharedRenderState* render_state,
ScopedProfilerNode& prof) {
m_stats = {};
m_gpu_stats = {};
// First thing should be a NEXT with two nops. this is a jump from buckets to sprite data
auto data0 = dma.read_and_advance();
assert(data0.vif1() == 0);
@ -320,8 +108,8 @@ void SkyBlendHandler::render(DmaFollower& dma,
void SkyBlendHandler::draw_debug_window() {
ImGui::Separator();
ImGui::Text("Draw/Blend ( sky ): %d/%d", m_stats.sky_draws, m_stats.sky_blends);
ImGui::Text("Draw/Blend (cloud): %d/%d", m_stats.cloud_draws, m_stats.cloud_blends);
ImGui::Text("Draw/Blend ( sky ): %d/%d", m_gpu_stats.sky_draws, m_gpu_stats.sky_blends);
ImGui::Text("Draw/Blend (cloud): %d/%d", m_gpu_stats.cloud_draws, m_gpu_stats.cloud_blends);
if (ImGui::TreeNode("tfrag")) {
m_tfrag_renderer.draw_debug_window();

View file

@ -3,35 +3,8 @@
#include "game/graphics/opengl_renderer/BucketRenderer.h"
#include "game/graphics/opengl_renderer/DirectRenderer.h"
#include "game/graphics/opengl_renderer/tfrag/TFragment.h"
class SkyBlender {
public:
SkyBlender();
~SkyBlender();
struct Stats {
int sky_draws = 0;
int cloud_draws = 0;
int sky_blends = 0;
int cloud_blends = 0;
};
Stats do_sky_blends(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof);
private:
GLuint m_framebuffers[2]; // sky, clouds
GLuint m_textures[2]; // sky, clouds
int m_sizes[2] = {32, 64};
GLuint m_gl_vertex_buffer;
struct Vertex {
float x = 0;
float y = 0;
float intensity = 0;
};
Vertex m_vertex_data[6];
};
#include "game/graphics//opengl_renderer/SkyBlendGPU.h"
#include "game/graphics//opengl_renderer/SkyBlendCPU.h"
/*!
* Handles texture blending for the sky.
@ -41,7 +14,8 @@ class SkyBlendHandler : public BucketRenderer {
public:
SkyBlendHandler(const std::string& name,
BucketId my_id,
std::shared_ptr<SkyBlender> shared_blender);
std::shared_ptr<SkyBlendGPU> shared_gpu_blender,
std::shared_ptr<SkyBlendCPU> shared_cpu_blender);
void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override;
void draw_debug_window() override;
@ -50,8 +24,9 @@ class SkyBlendHandler : public BucketRenderer {
SharedRenderState* render_state,
ScopedProfilerNode& prof);
std::shared_ptr<SkyBlender> m_shared_blender;
SkyBlender::Stats m_stats;
std::shared_ptr<SkyBlendGPU> m_shared_gpu_blender;
std::shared_ptr<SkyBlendCPU> m_shared_cpu_blender;
SkyBlendStats m_gpu_stats;
TFragment m_tfrag_renderer;
};

View file

@ -34,7 +34,7 @@ SpriteRenderer::SpriteRenderer(const std::string& name, BucketId my_id)
: BucketRenderer(name, my_id),
m_sprite_renderer(fmt::format("{}.sprites", name),
my_id,
100,
4000,
DirectRenderer::Mode::SPRITE_CPU),
m_direct_renderer(fmt::format("{}.direct", name), my_id, 100, DirectRenderer::Mode::NORMAL) {}

View file

@ -67,6 +67,8 @@ void FrameTimeRecorder::draw_window(const DmaStats& dma_stats) {
if (ImGui::Button("Single Frame Advance")) {
m_single_frame = true;
}
ImGui::SameLine();
ImGui::Checkbox("GLFinish", &do_gl_finish);
}
ImGui::End();
}

View file

@ -23,6 +23,8 @@ class FrameTimeRecorder {
return m_play;
}
bool do_gl_finish = false;
private:
float m_frame_times[SIZE] = {0};
int m_idx = 0;
@ -47,6 +49,7 @@ class OpenGlDebugGui {
const char* screenshot_name() const { return m_screenshot_save_name; }
bool should_advance_frame() { return m_frame_timer.should_advance_frame(); }
bool should_gl_finish() { return m_frame_timer.do_gl_finish; }
bool get_screenshot_flag() {
if (m_want_screenshot) {

View file

@ -3,7 +3,7 @@
out vec4 color;
in vec4 fragment_color;
in vec3 tex_coord;
noperspective in vec3 tex_coord;
uniform sampler2D tex_T0;
void main() {

View file

@ -11,7 +11,7 @@ uniform float alpha_max;
void main() {
//vec4 T0 = texture(tex_T0, tex_coord);
vec4 T0 = texture(tex_T0, tex_coord.xy / tex_coord.z);
vec4 T0 = texture(tex_T0, tex_coord.xy);
color = fragment_color * T0 * 2.0;
if (color.a < alpha_min) {

View file

@ -37,6 +37,11 @@ Tfrag3::~Tfrag3() {
void Tfrag3::setup_for_level(const std::vector<tfrag3::TFragmentTreeKind>& tree_kinds,
const std::string& level,
SharedRenderState* render_state) {
// regardless of how many we use some fixed max
// we won't actually interp or upload to gpu the unused ones, but we need a fixed maximum so
// indexing works properly.
m_color_result.resize(TIME_OF_DAY_COLOR_COUNT);
// make sure we have the level data.
auto lev_data = render_state->loader.get_tfrag3_level(level);
if (m_level_name != level) {
@ -47,12 +52,13 @@ void Tfrag3::setup_for_level(const std::vector<tfrag3::TFragmentTreeKind>& tree_
fmt::print("level has {} trees\n", lev_data->tfrag_trees.size());
m_cached_trees.clear();
size_t idx_buffer_len = 0;
size_t time_of_day_count = 0;
size_t vis_temp_len = 0;
size_t max_draw = 0;
for (size_t tree_idx = 0; tree_idx < lev_data->tfrag_trees.size(); tree_idx++) {
size_t idx_buffer_len = 0;
const auto& tree = lev_data->tfrag_trees[tree_idx];
m_cached_trees.emplace_back();
auto& tree_cache = m_cached_trees.back();
@ -78,7 +84,7 @@ void Tfrag3::setup_for_level(const std::vector<tfrag3::TFragmentTreeKind>& tree_
vis_temp_len = std::max(vis_temp_len, tree.bvh.vis_nodes.size());
glBindBuffer(GL_ARRAY_BUFFER, tree_cache.vertex_buffer);
glBufferData(GL_ARRAY_BUFFER, verts * sizeof(tfrag3::PreloadedVertex), nullptr,
GL_DYNAMIC_DRAW);
GL_STREAM_DRAW);
glEnableVertexAttribArray(0);
glEnableVertexAttribArray(1);
glEnableVertexAttribArray(2);
@ -108,6 +114,20 @@ void Tfrag3::setup_for_level(const std::vector<tfrag3::TFragmentTreeKind>& tree_
sizeof(tfrag3::PreloadedVertex), // stride
(void*)offsetof(tfrag3::PreloadedVertex, color_index) // offset (0)
);
glGenBuffers(1, &tree_cache.index_buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, tree_cache.index_buffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_len * sizeof(u32), nullptr,
GL_STREAM_DRAW);
tree_cache.index_list.resize(idx_buffer_len);
glGenTextures(1, &tree_cache.time_of_day_texture);
glBindTexture(GL_TEXTURE_1D, tree_cache.time_of_day_texture);
// just fill with zeros. this lets use use the faster texsubimage later
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, TIME_OF_DAY_COLOR_COUNT, 0, GL_RGBA,
GL_UNSIGNED_INT_8_8_8_8, m_color_result.data());
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glBindVertexArray(0);
}
}
@ -136,28 +156,8 @@ void Tfrag3::setup_for_level(const std::vector<tfrag3::TFragmentTreeKind>& tree_
m_textures.push_back(gl_tex);
}
fmt::print("level max index stream: {}\n", idx_buffer_len);
m_cache.index_list.resize(idx_buffer_len);
m_has_index_buffer = true;
glGenBuffers(1, &m_index_buffer);
glActiveTexture(GL_TEXTURE1);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_len * sizeof(u32), nullptr, GL_DYNAMIC_DRAW);
fmt::print("level max time of day: {}\n", time_of_day_count);
assert(time_of_day_count <= TIME_OF_DAY_COLOR_COUNT);
// regardless of how many we use some fixed max
// we won't actually interp or upload to gpu the unused ones, but we need a fixed maximum so
// indexing works properly.
m_color_result.resize(TIME_OF_DAY_COLOR_COUNT);
glGenTextures(1, &m_time_of_day_texture);
m_has_time_of_day_texture = true;
glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture);
// just fill with zeros. this lets use use the faster texsubimage later
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, TIME_OF_DAY_COLOR_COUNT, 0, GL_RGBA,
GL_UNSIGNED_INT_8_8_8_8, m_color_result.data());
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
m_level_name = level;
fmt::print("TFRAG3 setup: {:.3f}\n", tfrag3_setup_timer.getSeconds());
@ -179,7 +179,7 @@ void Tfrag3::render_tree(const TfragRenderSettings& settings,
interp_time_of_day_slow(settings.time_of_day_weights, *tree.colors, m_color_result.data());
}
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture);
glBindTexture(GL_TEXTURE_1D, tree.time_of_day_texture);
glTexSubImage1D(GL_TEXTURE_1D, 0, 0, tree.colors->size(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV,
m_color_result.data());
@ -187,7 +187,7 @@ void Tfrag3::render_tree(const TfragRenderSettings& settings,
glBindVertexArray(tree.vao);
glBindBuffer(GL_ARRAY_BUFFER, tree.vertex_buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, tree.index_buffer);
glActiveTexture(GL_TEXTURE0);
glEnable(GL_PRIMITIVE_RESTART);
glPrimitiveRestartIndex(UINT32_MAX);
@ -195,10 +195,9 @@ void Tfrag3::render_tree(const TfragRenderSettings& settings,
cull_check_all_slow(settings.planes, tree.vis->vis_nodes, m_cache.vis_temp.data());
int idx_buffer_ptr = make_index_list_from_vis_string(
m_cache.draw_idx_temp.data(), m_cache.index_list.data(), *tree.draws, m_cache.vis_temp);
m_cache.draw_idx_temp.data(), tree.index_list.data(), *tree.draws, m_cache.vis_temp);
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, idx_buffer_ptr * sizeof(u32),
m_cache.index_list.data());
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, idx_buffer_ptr * sizeof(u32), tree.index_list.data());
for (size_t draw_idx = 0; draw_idx < tree.draws->size(); draw_idx++) {
const auto& draw = tree.draws->operator[](draw_idx);
@ -332,22 +331,14 @@ void Tfrag3::discard_tree_cache() {
for (auto& tree : m_cached_trees) {
if (tree.kind != tfrag3::TFragmentTreeKind::INVALID) {
glBindTexture(GL_TEXTURE_1D, tree.time_of_day_texture);
glDeleteTextures(1, &tree.time_of_day_texture);
glDeleteBuffers(1, &tree.vertex_buffer);
glDeleteBuffers(1, &tree.index_buffer);
glDeleteVertexArrays(1, &tree.vao);
}
}
if (m_has_index_buffer) {
glDeleteBuffers(1, &m_index_buffer);
m_has_index_buffer = false;
}
if (m_has_time_of_day_texture) {
glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture);
glDeleteTextures(1, &m_time_of_day_texture);
m_has_time_of_day_texture = false;
}
// delete textures and stuff.
m_cached_trees.clear();
}

View file

@ -48,6 +48,9 @@ class Tfrag3 {
struct TreeCache {
tfrag3::TFragmentTreeKind kind;
GLuint vertex_buffer = -1;
GLuint index_buffer = -1;
std::vector<u32> index_list;
GLuint time_of_day_texture;
GLuint vao;
u32 vert_count = 0;
const std::vector<tfrag3::StripDraw>* draws = nullptr;
@ -71,21 +74,15 @@ class Tfrag3 {
struct Cache {
std::vector<u8> vis_temp;
std::vector<std::pair<int, int>> draw_idx_temp;
std::vector<u32> index_list;
} m_cache;
std::string m_level_name;
std::vector<GLuint> m_textures;
std::vector<TreeCache> m_cached_trees;
GLuint m_time_of_day_texture = -1;
bool m_has_time_of_day_texture = false;
std::vector<math::Vector<u8, 4>> m_color_result;
bool m_has_index_buffer = false;
GLuint m_index_buffer = -1;
GLuint m_debug_vao = -1;
GLuint m_debug_verts = -1;

View file

@ -17,6 +17,11 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s
// TODO: right now this will wait to load from disk and unpack it.
auto lev_data = render_state->loader.get_tfrag3_level(level);
// regardless of how many we use some fixed max
// we won't actually interp or upload to gpu the unused ones, but we need a fixed maximum so
// indexing works properly.
m_color_result.resize(TIME_OF_DAY_COLOR_COUNT);
if (m_level_name != level) {
Timer tie_setup_timer;
// We changed level!
@ -26,7 +31,6 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s
fmt::print(" New level has {} tie trees\n", lev_data->tie_trees.size());
m_trees.resize(lev_data->tie_trees.size());
size_t idx_buffer_len = 0;
size_t time_of_day_count = 0;
size_t vis_temp_len = 0;
size_t max_draw = 0;
@ -34,6 +38,7 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s
// set up each tree
for (size_t tree_idx = 0; tree_idx < lev_data->tie_trees.size(); tree_idx++) {
size_t idx_buffer_len = 0;
const auto& tree = lev_data->tie_trees[tree_idx];
max_draw = std::max(tree.static_draws.size(), max_draw);
for (auto& draw : tree.static_draws) {
@ -85,6 +90,21 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s
sizeof(tfrag3::PreloadedVertex), // stride
(void*)offsetof(tfrag3::PreloadedVertex, color_index) // offset (0)
);
glGenBuffers(1, &m_trees[tree_idx].index_buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_trees[tree_idx].index_buffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_len * sizeof(u32), nullptr, GL_STREAM_DRAW);
m_trees[tree_idx].index_list.resize(idx_buffer_len);
glActiveTexture(GL_TEXTURE1);
glGenTextures(1, &m_trees[tree_idx].time_of_day_texture);
glBindTexture(GL_TEXTURE_1D, m_trees[tree_idx].time_of_day_texture);
// just fill with zeros. this lets use use the faster texsubimage later
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, TIME_OF_DAY_COLOR_COUNT, 0, GL_RGBA,
GL_UNSIGNED_INT_8_8_8_8, m_color_result.data());
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glBindVertexArray(0);
}
@ -113,28 +133,8 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s
m_textures.push_back(gl_tex);
}
fmt::print("level TIE index stream: {}\n", idx_buffer_len);
m_cache.index_list.resize(idx_buffer_len);
m_has_index_buffer = true;
glGenBuffers(1, &m_index_buffer);
glActiveTexture(GL_TEXTURE1);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_len * sizeof(u32), nullptr, GL_STREAM_DRAW);
fmt::print("level max time of day: {}\n", time_of_day_count);
assert(time_of_day_count <= TIME_OF_DAY_COLOR_COUNT);
// regardless of how many we use some fixed max
// we won't actually interp or upload to gpu the unused ones, but we need a fixed maximum so
// indexing works properly.
m_color_result.resize(TIME_OF_DAY_COLOR_COUNT);
glGenTextures(1, &m_time_of_day_texture);
m_has_time_of_day_texture = true;
glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture);
// just fill with zeros. this lets use use the faster texsubimage later
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, TIME_OF_DAY_COLOR_COUNT, 0, GL_RGBA,
GL_UNSIGNED_INT_8_8_8_8, m_color_result.data());
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
m_level_name = level;
fmt::print("TIE setup: {:.3f}\n", tie_setup_timer.getSeconds());
@ -149,21 +149,13 @@ void Tie3::discard_tree_cache() {
m_textures.clear();
for (auto& tree : m_trees) {
glBindTexture(GL_TEXTURE_1D, tree.time_of_day_texture);
glDeleteTextures(1, &tree.time_of_day_texture);
glDeleteBuffers(1, &tree.vertex_buffer);
glDeleteBuffers(1, &tree.index_buffer);
glDeleteVertexArrays(1, &tree.vao);
}
if (m_has_index_buffer) {
glDeleteBuffers(1, &m_index_buffer);
m_has_index_buffer = false;
}
if (m_has_time_of_day_texture) {
glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture);
glDeleteTextures(1, &m_time_of_day_texture);
m_has_time_of_day_texture = false;
}
m_trees.clear();
}
@ -284,7 +276,7 @@ void Tie3::render_tree(int idx,
Timer setup_timer;
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture);
glBindTexture(GL_TEXTURE_1D, tree.time_of_day_texture);
glTexSubImage1D(GL_TEXTURE_1D, 0, 0, tree.colors->size(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV,
m_color_result.data());
@ -292,7 +284,7 @@ void Tie3::render_tree(int idx,
glBindVertexArray(tree.vao);
glBindBuffer(GL_ARRAY_BUFFER, tree.vertex_buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, tree.index_buffer);
glActiveTexture(GL_TEXTURE0);
glEnable(GL_PRIMITIVE_RESTART);
glPrimitiveRestartIndex(UINT32_MAX);
@ -305,7 +297,7 @@ void Tie3::render_tree(int idx,
tree.perf.cull_time.add(0);
Timer index_timer;
idx_buffer_ptr = make_all_visible_index_list(m_cache.draw_idx_temp.data(),
m_cache.index_list.data(), *tree.draws);
tree.index_list.data(), *tree.draws);
tree.perf.index_time.add(index_timer.getSeconds());
tree.perf.index_upload = sizeof(u32) * idx_buffer_ptr;
} else {
@ -315,14 +307,13 @@ void Tie3::render_tree(int idx,
Timer index_timer;
idx_buffer_ptr = make_index_list_from_vis_string(
m_cache.draw_idx_temp.data(), m_cache.index_list.data(), *tree.draws, m_cache.vis_temp);
m_cache.draw_idx_temp.data(), tree.index_list.data(), *tree.draws, m_cache.vis_temp);
tree.perf.index_time.add(index_timer.getSeconds());
tree.perf.index_upload = sizeof(u32) * idx_buffer_ptr;
}
Timer draw_timer;
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, idx_buffer_ptr * sizeof(u32),
m_cache.index_list.data());
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, idx_buffer_ptr * sizeof(u32), tree.index_list.data());
for (size_t draw_idx = 0; draw_idx < tree.draws->size(); draw_idx++) {
const auto& draw = tree.draws->operator[](draw_idx);

View file

@ -27,6 +27,9 @@ class Tie3 : public BucketRenderer {
void discard_tree_cache();
struct Tree {
GLuint vertex_buffer;
GLuint index_buffer;
GLuint time_of_day_texture;
std::vector<u32> index_list;
GLuint vao;
u32 vert_count;
const std::vector<tfrag3::StripDraw>* draws = nullptr;
@ -55,17 +58,10 @@ class Tie3 : public BucketRenderer {
struct Cache {
std::vector<u8> vis_temp;
std::vector<std::pair<int, int>> draw_idx_temp;
std::vector<u32> index_list;
} m_cache;
GLuint m_time_of_day_texture = -1;
bool m_has_time_of_day_texture = false;
std::vector<math::Vector<u8, 4>> m_color_result;
bool m_has_index_buffer = false;
GLuint m_index_buffer = -1;
static constexpr int TIME_OF_DAY_COLOR_COUNT = 8192;
char m_user_level[255] = "vi1";

View file

@ -402,6 +402,10 @@ static void gl_render_display(GfxDisplay* display) {
render_game_frame(width, height, lbox_w, lbox_h);
}
if (g_gfx_data->debug_gui.should_gl_finish()) {
glFinish();
}
// render imgui
g_gfx_data->debug_gui.draw(g_gfx_data->dma_copier.get_last_result().stats);
ImGui::Render();

View file

@ -938,7 +938,10 @@ void CancelDGO(RPC_Dgo_Cmd* cmd) {
SendMbx(sync_mbx, nullptr);
// wait for it to abort.
WaitMbx(dgo_mbx);
assert(cmd); // bug
// this will cause a crash if we cancel because we try to load 2 dgos at the same time.
// this should succeed if it's an actual cancel because we changed which level we're trying to
// load.
assert(cmd);
cmd->result = DGO_RPC_RESULT_ABORTED;
scmd.cmd_id = 0;
}

View file

@ -221,7 +221,7 @@ void Deci2Server::run() {
auto& driver = d2_drivers[handler];
int sent_to_program = 0;
u32 sent_to_program = 0;
while (!want_exit() && (hdr->rsvd < hdr->len || sent_to_program < hdr->rsvd)) {
// send what we have to the program
if (sent_to_program < hdr->rsvd) {

View file

@ -284,6 +284,7 @@
"out/iso/MAI.VIS"
"out/iso/SNO.VIS"
"out/iso/BEA.VIS"
"out/iso/LAV.VIS"
"out/iso/CIT.VIS"
"out/iso/FIN.VIS"

View file

@ -191,8 +191,8 @@ bool Listener::connect_to_target(int n_tries, const std::string& ip, int port) {
void Listener::receive_func() {
while (m_connected) {
// attempt to receive a ListenerMessageHeader
int rcvd = 0;
int rcvd_desired = sizeof(ListenerMessageHeader);
u32 rcvd = 0;
u32 rcvd_desired = sizeof(ListenerMessageHeader);
char buff[sizeof(ListenerMessageHeader)];
while (rcvd < rcvd_desired) {
auto got = read_from_socket(listen_socket, buff + rcvd, rcvd_desired - rcvd);