mirror of
https://github.com/open-goal/jak-project.git
synced 2024-10-20 11:26:18 -04:00
improve performance on intel graphics (#1041)
* improve performance on intel graphics * more tweaks, add a glfinish button * remove divide in fragment shader * temp * add cpu sky blend * use vao * change format * use floats in direct renderer * format * format again * tfrag ice
This commit is contained in:
parent
3f84bd85e7
commit
cd9e74819c
|
@ -5,18 +5,15 @@
|
|||
* Common types shared between the compiler and the runtime for the listener connection.
|
||||
*/
|
||||
|
||||
#ifndef JAK1_LISTENER_COMMON_H
|
||||
#define JAK1_LISTENER_COMMON_H
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
/*!
|
||||
* Header of a DECI2 protocol message
|
||||
* TODO - there are other copies of this somewhere
|
||||
* NOTE: we've changed this to use 32-bit integers for len/rsvd
|
||||
*/
|
||||
struct Deci2Header {
|
||||
u16 len; //! size of data following header
|
||||
u16 rsvd; //! zero, used internally by runtime.
|
||||
u32 len; //! size of data following header
|
||||
u32 rsvd; //! zero, used internally by runtime.
|
||||
u16 proto; //! protocol identification number
|
||||
u8 src; //! identification code of sender
|
||||
u8 dst; //! identification code of recipient
|
||||
|
@ -64,5 +61,3 @@ struct ListenerMessageHeader {
|
|||
constexpr int DECI2_PORT = 8112; // TODO - is this a good choice?
|
||||
|
||||
constexpr u16 DECI2_PROTOCOL = 0xe042;
|
||||
|
||||
#endif // JAK1_LISTENER_COMMON_H
|
||||
|
|
|
@ -2049,6 +2049,8 @@ void make_tfrag3_data(std::map<u32, std::vector<GroupedDraw>>& draws,
|
|||
vtx.s = vert.stq.x();
|
||||
vtx.t = vert.stq.y();
|
||||
vtx.q = vert.stq.z();
|
||||
// if this is true, we can remove a divide in the shader
|
||||
assert(vtx.q == 1.f);
|
||||
vtx.color_index = vert.rgba / 4;
|
||||
// assert((vert.rgba >> 2) < 1024); spider cave has 2048?
|
||||
assert((vert.rgba & 3) == 0);
|
||||
|
|
|
@ -1965,6 +1965,8 @@ void add_vertices_and_static_draw(tfrag3::TieTree& tree,
|
|||
vtx.s = vert.tex.x();
|
||||
vtx.t = vert.tex.y();
|
||||
vtx.q = vert.tex.z();
|
||||
// if this is true, we can remove a divide in the shader
|
||||
assert(vtx.q == 1.f);
|
||||
if (vert.color_index_index == UINT32_MAX) {
|
||||
vtx.color_index = 0;
|
||||
} else {
|
||||
|
|
|
@ -94,6 +94,8 @@ set(RUNTIME_SOURCE
|
|||
graphics/opengl_renderer/OpenGLRenderer.cpp
|
||||
graphics/opengl_renderer/Profiler.cpp
|
||||
graphics/opengl_renderer/Shader.cpp
|
||||
graphics/opengl_renderer/SkyBlendCPU.cpp
|
||||
graphics/opengl_renderer/SkyBlendGPU.cpp
|
||||
graphics/opengl_renderer/SkyRenderer.cpp
|
||||
graphics/opengl_renderer/SpriteRenderer.cpp
|
||||
graphics/opengl_renderer/TextureUploadHandler.cpp
|
||||
|
|
|
@ -26,9 +26,11 @@ enum class BucketId {
|
|||
ALPHA_TEX_LEVEL0 = 31,
|
||||
TFRAG_TRANS0_AND_SKY_BLEND_LEVEL0 = 32,
|
||||
TFRAG_DIRT_LEVEL0 = 34,
|
||||
TFRAG_ICE_LEVEL0 = 36,
|
||||
ALPHA_TEX_LEVEL1 = 38,
|
||||
TFRAG_TRANS1_AND_SKY_BLEND_LEVEL1 = 39,
|
||||
TFRAG_DIRT_LEVEL1 = 41,
|
||||
TFRAG_ICE_LEVEL1 = 43,
|
||||
PRIS_TEX_LEVEL0 = 48,
|
||||
PRIS_TEX_LEVEL1 = 51,
|
||||
WATER_TEX_LEVEL0 = 57,
|
||||
|
@ -59,6 +61,8 @@ struct SharedRenderState {
|
|||
void* ee_main_memory = nullptr;
|
||||
u32 offset_of_s7;
|
||||
bool dump_playback = false;
|
||||
|
||||
bool use_sky_cpu = true;
|
||||
};
|
||||
|
||||
/*!
|
||||
|
|
|
@ -8,28 +8,48 @@
|
|||
DirectRenderer::DirectRenderer(const std::string& name, BucketId my_id, int batch_size, Mode mode)
|
||||
: BucketRenderer(name, my_id), m_prim_buffer(batch_size), m_mode(mode) {
|
||||
glGenBuffers(1, &m_ogl.vertex_buffer);
|
||||
glGenBuffers(1, &m_ogl.color_buffer);
|
||||
glGenBuffers(1, &m_ogl.st_buffer);
|
||||
glGenVertexArrays(1, &m_ogl.vao);
|
||||
|
||||
glBindVertexArray(m_ogl.vao);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.vertex_buffer);
|
||||
m_ogl.vertex_buffer_bytes = batch_size * 3 * 3 * sizeof(u32);
|
||||
glBufferData(GL_ARRAY_BUFFER, m_ogl.vertex_buffer_bytes, nullptr, GL_DYNAMIC_DRAW);
|
||||
m_ogl.vertex_buffer_max_verts = batch_size * 3 * 2;
|
||||
m_ogl.vertex_buffer_bytes = m_ogl.vertex_buffer_max_verts * sizeof(Vertex);
|
||||
glBufferData(GL_ARRAY_BUFFER, m_ogl.vertex_buffer_bytes, nullptr,
|
||||
GL_STREAM_DRAW); // todo stream?
|
||||
glEnableVertexAttribArray(0);
|
||||
glVertexAttribPointer(
|
||||
0, // location 0 in the shader
|
||||
4, // 3 floats per vert
|
||||
GL_FLOAT, // floats
|
||||
GL_TRUE, // normalized, ignored,
|
||||
sizeof(Vertex), //
|
||||
(void*)offsetof(Vertex, xyz) // offset in array (why is is this a pointer...)
|
||||
);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.color_buffer);
|
||||
m_ogl.color_buffer_bytes = batch_size * 3 * 4 * sizeof(u8);
|
||||
glBufferData(GL_ARRAY_BUFFER, m_ogl.color_buffer_bytes, nullptr, GL_DYNAMIC_DRAW);
|
||||
glEnableVertexAttribArray(1);
|
||||
glVertexAttribPointer(
|
||||
1, // location 0 in the shader
|
||||
4, // 4 floats per vert (w unused)
|
||||
GL_UNSIGNED_BYTE, // floats
|
||||
GL_TRUE, // normalized, ignored,
|
||||
sizeof(Vertex), //
|
||||
(void*)offsetof(Vertex, rgba) // offset in array (why is is this a pointer...)
|
||||
);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.st_buffer);
|
||||
m_ogl.st_buffer_bytes = batch_size * 3 * 3 * sizeof(float);
|
||||
glBufferData(GL_ARRAY_BUFFER, m_ogl.st_buffer_bytes, nullptr, GL_DYNAMIC_DRAW);
|
||||
glEnableVertexAttribArray(2);
|
||||
glVertexAttribPointer(
|
||||
2, // location 0 in the shader
|
||||
3, // 3 floats per vert
|
||||
GL_FLOAT, // floats
|
||||
GL_FALSE, // normalized, ignored,
|
||||
sizeof(Vertex), //
|
||||
(void*)offsetof(Vertex, stq) // offset in array (why is is this a pointer...)
|
||||
);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
glBindVertexArray(0);
|
||||
}
|
||||
|
||||
DirectRenderer::~DirectRenderer() {
|
||||
glDeleteBuffers(1, &m_ogl.color_buffer);
|
||||
glDeleteBuffers(1, &m_ogl.vertex_buffer);
|
||||
glDeleteBuffers(1, &m_ogl.st_buffer);
|
||||
glDeleteVertexArrays(1, &m_ogl.vao);
|
||||
}
|
||||
|
||||
|
@ -158,50 +178,16 @@ void DirectRenderer::flush_pending(SharedRenderState* render_state, ScopedProfil
|
|||
|
||||
// render!
|
||||
// update buffers:
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.vertex_buffer);
|
||||
glBufferSubData(GL_ARRAY_BUFFER, 0, m_prim_buffer.vert_count * sizeof(math::Vector<u32, 3>),
|
||||
m_prim_buffer.verts.data());
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.color_buffer);
|
||||
glBufferSubData(GL_ARRAY_BUFFER, 0, m_prim_buffer.vert_count * sizeof(math::Vector<u8, 4>),
|
||||
m_prim_buffer.rgba_u8.data());
|
||||
if (m_prim_gl_state.texture_enable) {
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.st_buffer);
|
||||
glBufferSubData(GL_ARRAY_BUFFER, 0, m_prim_buffer.vert_count * sizeof(math::Vector<float, 3>),
|
||||
m_prim_buffer.stqs.data());
|
||||
u32 vertex_offset = m_ogl.last_vertex_offset;
|
||||
if (vertex_offset + m_prim_buffer.vert_count >= m_ogl.vertex_buffer_max_verts) {
|
||||
lg::warn("Buffer wrapped in {} (upcoming size is {}, {} bytes)\n", m_name,
|
||||
m_prim_buffer.vert_count, m_prim_buffer.vert_count * sizeof(Vertex));
|
||||
vertex_offset = 0;
|
||||
}
|
||||
|
||||
// setup attributes:
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.vertex_buffer);
|
||||
glEnableVertexAttribArray(0);
|
||||
glVertexAttribPointer(0, // location 0 in the shader
|
||||
3, // 3 floats per vert
|
||||
GL_UNSIGNED_INT, // floats
|
||||
GL_TRUE, // normalized, ignored,
|
||||
0, // tightly packed
|
||||
0 // offset in array (why is is this a pointer...)
|
||||
);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.color_buffer);
|
||||
glEnableVertexAttribArray(1);
|
||||
glVertexAttribPointer(1, // location 0 in the shader
|
||||
4, // 3 floats per vert
|
||||
GL_UNSIGNED_BYTE, // floats
|
||||
GL_TRUE, // normalized, ignored,
|
||||
0, // tightly packed
|
||||
0);
|
||||
|
||||
if (m_prim_gl_state.texture_enable) {
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_ogl.st_buffer);
|
||||
glEnableVertexAttribArray(2);
|
||||
glVertexAttribPointer(2, // location 0 in the shader
|
||||
3, // 3 floats per vert
|
||||
GL_FLOAT, // floats
|
||||
GL_FALSE, // normalized, ignored,
|
||||
0, // tightly packed
|
||||
0);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
}
|
||||
// assert(false);
|
||||
glBufferSubData(GL_ARRAY_BUFFER, vertex_offset * sizeof(Vertex),
|
||||
m_prim_buffer.vert_count * sizeof(Vertex), m_prim_buffer.vertices.data());
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
|
||||
int draw_count = 0;
|
||||
if (m_mode == Mode::SPRITE_CPU) {
|
||||
|
@ -214,13 +200,13 @@ void DirectRenderer::flush_pending(SharedRenderState* render_state, ScopedProfil
|
|||
}
|
||||
|
||||
if (m_sprite_mode.do_first_draw) {
|
||||
glDrawArrays(GL_TRIANGLES, 0, m_prim_buffer.vert_count);
|
||||
glDrawArrays(GL_TRIANGLES, vertex_offset, m_prim_buffer.vert_count);
|
||||
draw_count++;
|
||||
}
|
||||
if (m_sprite_mode.do_second_draw) {
|
||||
render_state->shaders[ShaderId::SPRITE_CPU_AFAIL].activate();
|
||||
glDepthMask(GL_FALSE);
|
||||
glDrawArrays(GL_TRIANGLES, 0, m_prim_buffer.vert_count);
|
||||
glDrawArrays(GL_TRIANGLES, vertex_offset, m_prim_buffer.vert_count);
|
||||
if (m_test_state.depth_writes) {
|
||||
glDepthMask(GL_TRUE);
|
||||
}
|
||||
|
@ -230,7 +216,7 @@ void DirectRenderer::flush_pending(SharedRenderState* render_state, ScopedProfil
|
|||
draw_count++;
|
||||
}
|
||||
} else {
|
||||
glDrawArrays(GL_TRIANGLES, 0, m_prim_buffer.vert_count);
|
||||
glDrawArrays(GL_TRIANGLES, vertex_offset, m_prim_buffer.vert_count);
|
||||
draw_count++;
|
||||
}
|
||||
|
||||
|
@ -238,7 +224,7 @@ void DirectRenderer::flush_pending(SharedRenderState* render_state, ScopedProfil
|
|||
render_state->shaders[ShaderId::DEBUG_RED].activate();
|
||||
glDisable(GL_BLEND);
|
||||
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
|
||||
glDrawArrays(GL_TRIANGLES, 0, m_prim_buffer.vert_count);
|
||||
glDrawArrays(GL_TRIANGLES, vertex_offset, m_prim_buffer.vert_count);
|
||||
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
|
||||
draw_count++;
|
||||
}
|
||||
|
@ -249,6 +235,8 @@ void DirectRenderer::flush_pending(SharedRenderState* render_state, ScopedProfil
|
|||
prof.add_draw_call(draw_count);
|
||||
m_stats.triangles += n_tris;
|
||||
m_stats.draw_calls += draw_count;
|
||||
m_ogl.last_vertex_offset = vertex_offset + m_prim_buffer.vert_count;
|
||||
m_ogl.last_vertex_offset = (m_ogl.last_vertex_offset + 3) & ~3;
|
||||
m_prim_buffer.vert_count = 0;
|
||||
}
|
||||
|
||||
|
@ -1033,6 +1021,8 @@ void DirectRenderer::reset_state() {
|
|||
|
||||
m_prim_building = PrimBuildState();
|
||||
|
||||
m_ogl.last_vertex_offset = 0;
|
||||
|
||||
m_stats = {};
|
||||
}
|
||||
|
||||
|
@ -1075,17 +1065,18 @@ void DirectRenderer::PrimGlState::from_register(GsPrim reg) {
|
|||
}
|
||||
|
||||
DirectRenderer::PrimitiveBuffer::PrimitiveBuffer(int max_triangles) {
|
||||
rgba_u8.resize(max_triangles * 3);
|
||||
verts.resize(max_triangles * 3);
|
||||
stqs.resize(max_triangles * 3);
|
||||
vertices.resize(max_triangles * 3);
|
||||
max_verts = max_triangles * 3;
|
||||
}
|
||||
|
||||
void DirectRenderer::PrimitiveBuffer::push(const math::Vector<u8, 4>& rgba,
|
||||
const math::Vector<u32, 3>& vert,
|
||||
const math::Vector<float, 3>& st) {
|
||||
rgba_u8[vert_count] = rgba;
|
||||
verts[vert_count] = vert;
|
||||
stqs[vert_count] = st;
|
||||
auto& v = vertices[vert_count];
|
||||
v.rgba = rgba;
|
||||
v.xyz[0] = (float)vert[0] / UINT32_MAX;
|
||||
v.xyz[1] = (float)vert[1] / UINT32_MAX;
|
||||
v.xyz[2] = (float)vert[2] / UINT32_MAX;
|
||||
v.stq = st;
|
||||
vert_count++;
|
||||
}
|
||||
|
|
|
@ -179,11 +179,16 @@ class DirectRenderer : public BucketRenderer {
|
|||
|
||||
} m_prim_building;
|
||||
|
||||
struct Vertex {
|
||||
math::Vector<float, 4> xyz;
|
||||
math::Vector<float, 3> stq;
|
||||
math::Vector<u8, 4> rgba;
|
||||
};
|
||||
static_assert(sizeof(Vertex) == 32);
|
||||
|
||||
struct PrimitiveBuffer {
|
||||
PrimitiveBuffer(int max_triangles);
|
||||
std::vector<math::Vector<u8, 4>> rgba_u8;
|
||||
std::vector<math::Vector<u32, 3>> verts;
|
||||
std::vector<math::Vector<float, 3>> stqs;
|
||||
std::vector<Vertex> vertices;
|
||||
int vert_count = 0;
|
||||
int max_verts = 0;
|
||||
|
||||
|
@ -195,11 +200,11 @@ class DirectRenderer : public BucketRenderer {
|
|||
} m_prim_buffer;
|
||||
|
||||
struct {
|
||||
GLuint vertex_buffer, color_buffer, st_buffer;
|
||||
GLuint vertex_buffer;
|
||||
GLuint vao;
|
||||
u32 vertex_buffer_bytes = 0;
|
||||
u32 color_buffer_bytes = 0;
|
||||
u32 st_buffer_bytes = 0;
|
||||
u32 vertex_buffer_max_verts = 0;
|
||||
u32 last_vertex_offset = 0;
|
||||
} m_ogl;
|
||||
|
||||
struct {
|
||||
|
|
|
@ -13,6 +13,10 @@
|
|||
|
||||
// for the vif callback
|
||||
#include "game/kernel/kmachine.h"
|
||||
namespace {
|
||||
std::string g_current_render;
|
||||
|
||||
}
|
||||
|
||||
/*!
|
||||
* OpenGL Error callback. If we do something invalid, this will be called.
|
||||
|
@ -28,11 +32,14 @@ void GLAPIENTRY opengl_error_callback(GLenum source,
|
|||
// On some drivers this prints on every single texture upload, which is too much spam
|
||||
lg::debug("OpenGL notification 0x{:X} S{:X} T{:X}: {}", id, source, type, message);
|
||||
} else if (severity == GL_DEBUG_SEVERITY_LOW) {
|
||||
lg::info("OpenGL message 0x{:X} S{:X} T{:X}: {}", id, source, type, message);
|
||||
lg::info("[{}] OpenGL message 0x{:X} S{:X} T{:X}: {}", g_current_render, id, source, type,
|
||||
message);
|
||||
} else if (severity == GL_DEBUG_SEVERITY_MEDIUM) {
|
||||
lg::warn("OpenGL warn 0x{:X} S{:X} T{:X}: {}", id, source, type, message);
|
||||
lg::warn("[{}] OpenGL warn 0x{:X} S{:X} T{:X}: {}", g_current_render, id, source, type,
|
||||
message);
|
||||
} else if (severity == GL_DEBUG_SEVERITY_HIGH) {
|
||||
lg::error("OpenGL error 0x{:X} S{:X} T{:X}: {}", id, source, type, message);
|
||||
lg::error("[{}] OpenGL error 0x{:X} S{:X} T{:X}: {}", g_current_render, id, source, type,
|
||||
message);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -60,11 +67,7 @@ void OpenGLRenderer::init_bucket_renderers() {
|
|||
std::vector<tfrag3::TFragmentTreeKind> normal_tfrags = {tfrag3::TFragmentTreeKind::NORMAL,
|
||||
tfrag3::TFragmentTreeKind::LOWRES};
|
||||
std::vector<tfrag3::TFragmentTreeKind> dirt_tfrags = {tfrag3::TFragmentTreeKind::DIRT};
|
||||
// TODO ice
|
||||
// std::vector<tfrag3::TFragmentTreeKind> ice_tfrags = {tfrag3::TFragmentTreeKind::ICE};
|
||||
|
||||
// std::vector<tfrag3::TFragmentTreeKind> trans_tfrags = {tfrag3::TFragmentTreeKind::TRANS,
|
||||
// tfrag3::TFragmentTreeKind::LOWRES_TRANS};
|
||||
std::vector<tfrag3::TFragmentTreeKind> ice_tfrags = {tfrag3::TFragmentTreeKind::ICE};
|
||||
|
||||
init_bucket_renderer<EmptyBucketRenderer>("bucket0", BucketId::BUCKET0);
|
||||
init_bucket_renderer<SkyRenderer>("sky", BucketId::SKY_DRAW);
|
||||
|
@ -79,13 +82,18 @@ void OpenGLRenderer::init_bucket_renderers() {
|
|||
init_bucket_renderer<TextureUploadHandler>("shrub-tex-1", BucketId::SHRUB_TEX_LEVEL1);
|
||||
init_bucket_renderer<TextureUploadHandler>("alpha-tex-0", BucketId::ALPHA_TEX_LEVEL0);
|
||||
init_bucket_renderer<TextureUploadHandler>("alpha-tex-1", BucketId::ALPHA_TEX_LEVEL1);
|
||||
auto sky_blender = std::make_shared<SkyBlender>();
|
||||
auto sky_gpu_blender = std::make_shared<SkyBlendGPU>();
|
||||
auto sky_cpu_blender = std::make_shared<SkyBlendCPU>();
|
||||
init_bucket_renderer<SkyBlendHandler>("sky-blend-and-tfrag-trans-0",
|
||||
BucketId::TFRAG_TRANS0_AND_SKY_BLEND_LEVEL0, sky_blender);
|
||||
BucketId::TFRAG_TRANS0_AND_SKY_BLEND_LEVEL0,
|
||||
sky_gpu_blender, sky_cpu_blender);
|
||||
init_bucket_renderer<TFragment>("tfrag-dirt-0", BucketId::TFRAG_DIRT_LEVEL0, dirt_tfrags, false);
|
||||
init_bucket_renderer<TFragment>("tfrag-ice-0", BucketId::TFRAG_ICE_LEVEL0, ice_tfrags, false);
|
||||
init_bucket_renderer<SkyBlendHandler>("sky-blend-and-tfrag-trans-1",
|
||||
BucketId::TFRAG_TRANS1_AND_SKY_BLEND_LEVEL1, sky_blender);
|
||||
BucketId::TFRAG_TRANS1_AND_SKY_BLEND_LEVEL1,
|
||||
sky_gpu_blender, sky_cpu_blender);
|
||||
init_bucket_renderer<TFragment>("tfrag-dirt-1", BucketId::TFRAG_DIRT_LEVEL1, dirt_tfrags, false);
|
||||
init_bucket_renderer<TFragment>("tfrag-ice-1", BucketId::TFRAG_ICE_LEVEL1, ice_tfrags, false);
|
||||
init_bucket_renderer<TextureUploadHandler>("pris-tex-0", BucketId::PRIS_TEX_LEVEL0);
|
||||
init_bucket_renderer<TextureUploadHandler>("pris-tex-1", BucketId::PRIS_TEX_LEVEL1);
|
||||
init_bucket_renderer<TextureUploadHandler>("water-tex-0", BucketId::WATER_TEX_LEVEL0);
|
||||
|
@ -94,7 +102,7 @@ void OpenGLRenderer::init_bucket_renderers() {
|
|||
init_bucket_renderer<SpriteRenderer>("sprite", BucketId::SPRITE);
|
||||
init_bucket_renderer<DirectRenderer>("debug-draw-0", BucketId::DEBUG_DRAW_0, 1024,
|
||||
DirectRenderer::Mode::NORMAL);
|
||||
init_bucket_renderer<DirectRenderer>("debug-draw-1", BucketId::DEBUG_DRAW_1, 1024,
|
||||
init_bucket_renderer<DirectRenderer>("debug-draw-1", BucketId::DEBUG_DRAW_1, 4096,
|
||||
DirectRenderer::Mode::NORMAL);
|
||||
|
||||
// for now, for any unset renderers, just set them to an EmptyBucketRenderer.
|
||||
|
@ -163,6 +171,9 @@ void OpenGLRenderer::serialize(Serializer& ser) {
|
|||
*/
|
||||
void OpenGLRenderer::draw_renderer_selection_window() {
|
||||
ImGui::Begin("Renderer Debug");
|
||||
|
||||
ImGui::Checkbox("Sky CPU", &m_render_state.use_sky_cpu);
|
||||
|
||||
for (size_t i = 0; i < m_bucket_renderers.size(); i++) {
|
||||
auto renderer = m_bucket_renderers[i].get();
|
||||
if (renderer && !renderer->empty()) {
|
||||
|
@ -232,8 +243,11 @@ void OpenGLRenderer::dispatch_buckets(DmaFollower dma, ScopedProfilerNode& prof)
|
|||
for (int bucket_id = 0; bucket_id < (int)BucketId::MAX_BUCKETS; bucket_id++) {
|
||||
auto& renderer = m_bucket_renderers[bucket_id];
|
||||
auto bucket_prof = prof.make_scoped_child(renderer->name_and_id());
|
||||
// lg::info("Render: {} start\n", renderer->name_and_id());
|
||||
g_current_render = renderer->name_and_id();
|
||||
renderer->render(dma, &m_render_state, bucket_prof);
|
||||
// should have ended at the start of the next chain
|
||||
// lg::info("Render: {} end\n", renderer->name_and_id());
|
||||
// should have ended at the start of the next chain
|
||||
assert(dma.current_tag_offset() == m_render_state.next_bucket);
|
||||
m_render_state.next_bucket += 16;
|
||||
|
||||
|
@ -241,6 +255,7 @@ void OpenGLRenderer::dispatch_buckets(DmaFollower dma, ScopedProfilerNode& prof)
|
|||
vif_interrupt_callback();
|
||||
}
|
||||
}
|
||||
g_current_render = "";
|
||||
|
||||
// TODO ending data.
|
||||
}
|
||||
|
|
166
game/graphics/opengl_renderer/SkyBlendCPU.cpp
Normal file
166
game/graphics/opengl_renderer/SkyBlendCPU.cpp
Normal file
|
@ -0,0 +1,166 @@
|
|||
#include "SkyBlendCPU.h"
|
||||
#include "game/graphics/opengl_renderer/AdgifHandler.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
SkyBlendCPU::SkyBlendCPU() {
|
||||
glGenTextures(2, m_textures);
|
||||
for (int i = 0; i < 2; i++) {
|
||||
glBindTexture(GL_TEXTURE_2D, m_textures[i]);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_sizes[i], m_sizes[i], 0, GL_RGBA,
|
||||
GL_UNSIGNED_INT_8_8_8_8_REV, 0);
|
||||
m_texture_data[i].resize(4 * m_sizes[i] * m_sizes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
SkyBlendCPU::~SkyBlendCPU() {
|
||||
glDeleteTextures(2, m_textures);
|
||||
}
|
||||
|
||||
void blend_sky_initial_fast(u8 intensity, u8* out, const u8* in, u32 size) {
|
||||
__m256i intensity_vec = _mm256_set1_epi16(intensity);
|
||||
for (u32 i = 0; i < size / 16; i++) {
|
||||
__m128i tex_data8 = _mm_loadu_si128((const __m128i*)(in + (i * 16)));
|
||||
__m256i tex_data16 = _mm256_cvtepu8_epi16(tex_data8);
|
||||
tex_data16 = _mm256_mullo_epi16(tex_data16, intensity_vec);
|
||||
tex_data16 = _mm256_srli_epi16(tex_data16, 7);
|
||||
auto hi = _mm256_extracti128_si256(tex_data16, 1);
|
||||
auto result = _mm_packus_epi16(_mm256_castsi256_si128(tex_data16), hi);
|
||||
_mm_storeu_si128((__m128i*)(out + (i * 16)), result);
|
||||
}
|
||||
}
|
||||
|
||||
void blend_sky_fast(u8 intensity, u8* out, const u8* in, u32 size) {
|
||||
__m256i intensity_vec = _mm256_set1_epi16(intensity);
|
||||
for (u32 i = 0; i < size / 16; i++) {
|
||||
__m128i tex_data8 = _mm_loadu_si128((const __m128i*)(in + (i * 16)));
|
||||
__m128i out_val = _mm_loadu_si128((const __m128i*)(out + (i * 16)));
|
||||
__m256i tex_data16 = _mm256_cvtepu8_epi16(tex_data8);
|
||||
tex_data16 = _mm256_mullo_epi16(tex_data16, intensity_vec);
|
||||
tex_data16 = _mm256_srli_epi16(tex_data16, 7);
|
||||
auto hi = _mm256_extracti128_si256(tex_data16, 1);
|
||||
auto result = _mm_packus_epi16(_mm256_castsi256_si128(tex_data16), hi);
|
||||
out_val = _mm_add_epi16(out_val, result);
|
||||
_mm_storeu_si128((__m128i*)(out + (i * 16)), out_val);
|
||||
}
|
||||
}
|
||||
|
||||
SkyBlendStats SkyBlendCPU::do_sky_blends(DmaFollower& dma,
|
||||
SharedRenderState* render_state,
|
||||
ScopedProfilerNode& prof) {
|
||||
SkyBlendStats stats;
|
||||
|
||||
Timer sky_timer;
|
||||
while (dma.current_tag().qwc == 6) {
|
||||
// assuming that the vif and gif-tag is correct
|
||||
auto setup_data = dma.read_and_advance();
|
||||
if (render_state->dump_playback) {
|
||||
// continue;
|
||||
}
|
||||
|
||||
// first is an adgif
|
||||
AdgifHelper adgif(setup_data.data + 16);
|
||||
assert(adgif.is_normal_adgif());
|
||||
assert(adgif.alpha().data == 0x8000000068); // Cs + Cd
|
||||
|
||||
// next is the actual draw
|
||||
auto draw_data = dma.read_and_advance();
|
||||
assert(draw_data.size_bytes == 6 * 16);
|
||||
|
||||
GifTag draw_or_blend_tag(draw_data.data);
|
||||
|
||||
// the first draw overwrites the previous frame's draw by disabling alpha blend (ABE = 0)
|
||||
bool is_first_draw = !GsPrim(draw_or_blend_tag.prim()).abe();
|
||||
|
||||
// here's we're relying on the format of the drawing to get the alpha/offset.
|
||||
u32 coord;
|
||||
u32 intensity;
|
||||
memcpy(&coord, draw_data.data + (5 * 16), 4);
|
||||
memcpy(&intensity, draw_data.data + 16, 4);
|
||||
|
||||
// we didn't parse the render-to-texture setup earlier, so we need a way to tell sky from
|
||||
// clouds. we can look at the drawing coordinates to tell - the sky is smaller than the clouds.
|
||||
int buffer_idx = 0;
|
||||
if (coord == 0x200) {
|
||||
// sky
|
||||
buffer_idx = 0;
|
||||
} else if (coord == 0x400) {
|
||||
buffer_idx = 1;
|
||||
} else {
|
||||
assert(false); // bad data
|
||||
}
|
||||
|
||||
// look up the source texture
|
||||
auto tex = render_state->texture_pool->lookup(adgif.tex0().tbp0());
|
||||
assert(tex);
|
||||
assert(!tex->only_on_gpu); // we need the actual data!!
|
||||
|
||||
// slow version
|
||||
/*
|
||||
if (is_first_draw) {
|
||||
memset(m_texture_data[buffer_idx].data(), 0, m_texture_data[buffer_idx].size());
|
||||
}
|
||||
|
||||
// intensities should be 0-128 (maybe higher is okay, but I don't see how this could be
|
||||
// generated with the GOAL code.)
|
||||
assert(intensity <= 128);
|
||||
assert(m_texture_data[buffer_idx].size() == tex->data.size());
|
||||
for (size_t i = 0; i < m_texture_data[buffer_idx].size(); i++) {
|
||||
u32 val = tex->data[i] * intensity;
|
||||
val >>= 7;
|
||||
m_texture_data[buffer_idx][i] += val;
|
||||
}
|
||||
*/
|
||||
if (is_first_draw) {
|
||||
blend_sky_initial_fast(intensity, m_texture_data[buffer_idx].data(), tex->data.data(),
|
||||
tex->data.size());
|
||||
} else {
|
||||
blend_sky_fast(intensity, m_texture_data[buffer_idx].data(), tex->data.data(),
|
||||
tex->data.size());
|
||||
}
|
||||
|
||||
if (buffer_idx == 0) {
|
||||
if (is_first_draw) {
|
||||
stats.sky_draws++;
|
||||
} else {
|
||||
stats.sky_blends++;
|
||||
}
|
||||
} else {
|
||||
if (is_first_draw) {
|
||||
stats.cloud_draws++;
|
||||
} else {
|
||||
stats.cloud_blends++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// put in pool.
|
||||
for (int i = 0; i < 2; i++) {
|
||||
// todo - these are hardcoded and rely on the vram layout.
|
||||
u32 tbp = i == 0 ? 8064 : 8096;
|
||||
|
||||
// lookup existing, or create a new entry
|
||||
TextureRecord* tex = render_state->texture_pool->lookup(tbp);
|
||||
if (!tex) {
|
||||
auto tsp = std::make_shared<TextureRecord>();
|
||||
render_state->texture_pool->set_texture(tbp, tsp);
|
||||
tex = tsp.get();
|
||||
}
|
||||
|
||||
// update it
|
||||
glBindTexture(GL_TEXTURE_2D, m_textures[i]);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_sizes[i], m_sizes[i], 0, GL_RGBA,
|
||||
GL_UNSIGNED_INT_8_8_8_8_REV, m_texture_data[i].data());
|
||||
|
||||
tex->gpu_texture = m_textures[i];
|
||||
tex->on_gpu = true;
|
||||
tex->only_on_gpu = true;
|
||||
tex->do_gc = false;
|
||||
tex->w = m_sizes[i];
|
||||
tex->h = m_sizes[i];
|
||||
tex->name = fmt::format("PC-SKY-{}", i);
|
||||
}
|
||||
// fmt::print("sky blend took {:.2f} ms\n", sky_timer.getMs());
|
||||
|
||||
return stats;
|
||||
}
|
21
game/graphics/opengl_renderer/SkyBlendCPU.h
Normal file
21
game/graphics/opengl_renderer/SkyBlendCPU.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
#pragma once
|
||||
|
||||
#include "common/dma/dma_chain_read.h"
|
||||
#include "game/graphics/opengl_renderer/BucketRenderer.h"
|
||||
#include "game/graphics/pipelines/opengl.h"
|
||||
#include "game/graphics/opengl_renderer/SkyBlendCommon.h"
|
||||
|
||||
class SkyBlendCPU {
|
||||
public:
|
||||
SkyBlendCPU();
|
||||
~SkyBlendCPU();
|
||||
|
||||
SkyBlendStats do_sky_blends(DmaFollower& dma,
|
||||
SharedRenderState* render_state,
|
||||
ScopedProfilerNode& prof);
|
||||
|
||||
private:
|
||||
GLuint m_textures[2]; // sky, clouds
|
||||
static constexpr int m_sizes[2] = {32, 64};
|
||||
std::vector<u8> m_texture_data[2];
|
||||
};
|
8
game/graphics/opengl_renderer/SkyBlendCommon.h
Normal file
8
game/graphics/opengl_renderer/SkyBlendCommon.h
Normal file
|
@ -0,0 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
struct SkyBlendStats {
|
||||
int sky_draws = 0;
|
||||
int cloud_draws = 0;
|
||||
int sky_blends = 0;
|
||||
int cloud_blends = 0;
|
||||
};
|
223
game/graphics/opengl_renderer/SkyBlendGPU.cpp
Normal file
223
game/graphics/opengl_renderer/SkyBlendGPU.cpp
Normal file
|
@ -0,0 +1,223 @@
|
|||
#include "SkyBlendGPU.h"
|
||||
|
||||
#include "common/log/log.h"
|
||||
#include "game/graphics/opengl_renderer/AdgifHandler.h"
|
||||
|
||||
SkyBlendGPU::SkyBlendGPU() {
|
||||
// generate textures for sky blending
|
||||
glGenFramebuffers(2, m_framebuffers);
|
||||
glGenTextures(2, m_textures);
|
||||
|
||||
GLint old_framebuffer;
|
||||
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_framebuffer);
|
||||
|
||||
// setup the framebuffers
|
||||
for (int i = 0; i < 2; i++) {
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffers[i]);
|
||||
glBindTexture(GL_TEXTURE_2D, m_textures[i]);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_sizes[i], m_sizes[i], 0, GL_RGBA,
|
||||
GL_UNSIGNED_INT_8_8_8_8_REV, 0);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_textures[i], 0);
|
||||
GLenum draw_buffers[1] = {GL_COLOR_ATTACHMENT0};
|
||||
glDrawBuffers(1, draw_buffers);
|
||||
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
|
||||
lg::error("SkyTextureHandler setup failed.");
|
||||
}
|
||||
}
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, 0);
|
||||
|
||||
glGenBuffers(1, &m_gl_vertex_buffer);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_gl_vertex_buffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex) * 6, nullptr, GL_DYNAMIC_DRAW);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, old_framebuffer);
|
||||
|
||||
// we only draw squares
|
||||
m_vertex_data[0].x = 0;
|
||||
m_vertex_data[0].y = 0;
|
||||
|
||||
m_vertex_data[1].x = 1;
|
||||
m_vertex_data[1].y = 0;
|
||||
|
||||
m_vertex_data[2].x = 0;
|
||||
m_vertex_data[2].y = 1;
|
||||
|
||||
m_vertex_data[3].x = 1;
|
||||
m_vertex_data[3].y = 0;
|
||||
|
||||
m_vertex_data[4].x = 0;
|
||||
m_vertex_data[4].y = 1;
|
||||
|
||||
m_vertex_data[5].x = 1;
|
||||
m_vertex_data[5].y = 1;
|
||||
}
|
||||
|
||||
SkyBlendGPU::~SkyBlendGPU() {
|
||||
glDeleteFramebuffers(2, m_framebuffers);
|
||||
glDeleteBuffers(1, &m_gl_vertex_buffer);
|
||||
glDeleteTextures(2, m_textures);
|
||||
}
|
||||
|
||||
SkyBlendStats SkyBlendGPU::do_sky_blends(DmaFollower& dma,
|
||||
SharedRenderState* render_state,
|
||||
ScopedProfilerNode& prof) {
|
||||
SkyBlendStats stats;
|
||||
GLuint vao;
|
||||
glGenVertexArrays(1, &vao);
|
||||
glBindVertexArray(vao);
|
||||
|
||||
GLint old_viewport[4];
|
||||
glGetIntegerv(GL_VIEWPORT, old_viewport);
|
||||
|
||||
GLint old_framebuffer;
|
||||
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_framebuffer);
|
||||
|
||||
while (dma.current_tag().qwc == 6) {
|
||||
// assuming that the vif and gif-tag is correct
|
||||
auto setup_data = dma.read_and_advance();
|
||||
if (render_state->dump_playback) {
|
||||
// continue;
|
||||
}
|
||||
|
||||
// first is an adgif
|
||||
AdgifHelper adgif(setup_data.data + 16);
|
||||
assert(adgif.is_normal_adgif());
|
||||
assert(adgif.alpha().data == 0x8000000068); // Cs + Cd
|
||||
|
||||
// next is the actual draw
|
||||
auto draw_data = dma.read_and_advance();
|
||||
assert(draw_data.size_bytes == 6 * 16);
|
||||
|
||||
GifTag draw_or_blend_tag(draw_data.data);
|
||||
|
||||
// the first draw overwrites the previous frame's draw by disabling alpha blend (ABE = 0)
|
||||
bool is_first_draw = !GsPrim(draw_or_blend_tag.prim()).abe();
|
||||
|
||||
// here's we're relying on the format of the drawing to get the alpha/offset.
|
||||
u32 coord;
|
||||
u32 intensity;
|
||||
memcpy(&coord, draw_data.data + (5 * 16), 4);
|
||||
memcpy(&intensity, draw_data.data + 16, 4);
|
||||
|
||||
// we didn't parse the render-to-texture setup earlier, so we need a way to tell sky from
|
||||
// clouds. we can look at the drawing coordinates to tell - the sky is smaller than the clouds.
|
||||
int buffer_idx = 0;
|
||||
if (coord == 0x200) {
|
||||
// sky
|
||||
buffer_idx = 0;
|
||||
} else if (coord == 0x400) {
|
||||
buffer_idx = 1;
|
||||
} else {
|
||||
assert(false); // bad data
|
||||
}
|
||||
|
||||
// look up the source texture
|
||||
auto tex = render_state->texture_pool->lookup(adgif.tex0().tbp0());
|
||||
assert(tex);
|
||||
|
||||
if (!tex->on_gpu) {
|
||||
render_state->texture_pool->upload_to_gpu(tex);
|
||||
}
|
||||
|
||||
// setup for rendering!
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffers[buffer_idx]);
|
||||
glViewport(0, 0, m_sizes[buffer_idx], m_sizes[buffer_idx]);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_textures[buffer_idx], 0);
|
||||
render_state->shaders[ShaderId::SKY_BLEND].activate();
|
||||
|
||||
// if the first is set, it disables alpha. we can just clear here, so it's easier to find
|
||||
// in renderdoc.
|
||||
if (is_first_draw) {
|
||||
float clear[4] = {0, 0, 0, 0};
|
||||
glClearBufferfv(GL_COLOR, 0, clear);
|
||||
}
|
||||
|
||||
// intensities should be 0-128 (maybe higher is okay, but I don't see how this could be
|
||||
// generated with the GOAL code.)
|
||||
assert(intensity <= 128);
|
||||
|
||||
// todo - could do this on the GPU, but probably not worth it for <20 triangles...
|
||||
float intensity_float = intensity / 128.f;
|
||||
for (auto& vert : m_vertex_data) {
|
||||
vert.intensity = intensity_float;
|
||||
}
|
||||
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glEnable(GL_BLEND);
|
||||
|
||||
// will add.
|
||||
glBlendFunc(GL_ONE, GL_ONE);
|
||||
|
||||
// setup draw data
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_gl_vertex_buffer);
|
||||
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vertex) * 6, m_vertex_data);
|
||||
glEnableVertexAttribArray(0);
|
||||
glVertexAttribPointer(0, // location 0 in the shader
|
||||
3, // 3 floats per vert
|
||||
GL_FLOAT, // floats
|
||||
GL_TRUE, // normalized, ignored,
|
||||
0, // tightly packed
|
||||
0
|
||||
|
||||
);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, tex->gpu_texture);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glUniform1i(glGetUniformLocation(render_state->shaders[ShaderId::SKY_BLEND].id(), "T0"), 0);
|
||||
|
||||
// Draw a sqaure
|
||||
glDrawArrays(GL_TRIANGLES, 0, 6);
|
||||
|
||||
// 1 draw, 2 triangles
|
||||
prof.add_draw_call(1);
|
||||
prof.add_tri(2);
|
||||
|
||||
if (buffer_idx == 0) {
|
||||
if (is_first_draw) {
|
||||
stats.sky_draws++;
|
||||
} else {
|
||||
stats.sky_blends++;
|
||||
}
|
||||
} else {
|
||||
if (is_first_draw) {
|
||||
stats.cloud_draws++;
|
||||
} else {
|
||||
stats.cloud_blends++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// put in pool.
|
||||
for (int i = 0; i < 2; i++) {
|
||||
// todo - these are hardcoded and rely on the vram layout.
|
||||
u32 tbp = i == 0 ? 8064 : 8096;
|
||||
|
||||
// lookup existing, or create a new entry
|
||||
TextureRecord* tex = render_state->texture_pool->lookup(tbp);
|
||||
if (!tex) {
|
||||
auto tsp = std::make_shared<TextureRecord>();
|
||||
render_state->texture_pool->set_texture(tbp, tsp);
|
||||
tex = tsp.get();
|
||||
}
|
||||
|
||||
// update it
|
||||
tex->gpu_texture = m_textures[i];
|
||||
tex->on_gpu = true;
|
||||
tex->only_on_gpu = true;
|
||||
tex->do_gc = false;
|
||||
tex->w = m_sizes[i];
|
||||
tex->h = m_sizes[i];
|
||||
tex->name = fmt::format("PC-SKY-{}", i);
|
||||
}
|
||||
|
||||
glViewport(old_viewport[0], old_viewport[1], old_viewport[2], old_viewport[3]);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, old_framebuffer);
|
||||
glBindVertexArray(0);
|
||||
glDeleteVertexArrays(1, &vao);
|
||||
|
||||
return stats;
|
||||
}
|
29
game/graphics/opengl_renderer/SkyBlendGPU.h
Normal file
29
game/graphics/opengl_renderer/SkyBlendGPU.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
|
||||
#include "common/dma/dma_chain_read.h"
|
||||
#include "game/graphics/opengl_renderer/BucketRenderer.h"
|
||||
#include "game/graphics/pipelines/opengl.h"
|
||||
#include "game/graphics/opengl_renderer/SkyBlendCommon.h"
|
||||
|
||||
class SkyBlendGPU {
|
||||
public:
|
||||
SkyBlendGPU();
|
||||
~SkyBlendGPU();
|
||||
|
||||
SkyBlendStats do_sky_blends(DmaFollower& dma,
|
||||
SharedRenderState* render_state,
|
||||
ScopedProfilerNode& prof);
|
||||
|
||||
private:
|
||||
GLuint m_framebuffers[2]; // sky, clouds
|
||||
GLuint m_textures[2]; // sky, clouds
|
||||
int m_sizes[2] = {32, 64};
|
||||
GLuint m_gl_vertex_buffer;
|
||||
|
||||
struct Vertex {
|
||||
float x = 0;
|
||||
float y = 0;
|
||||
float intensity = 0;
|
||||
};
|
||||
|
||||
Vertex m_vertex_data[6];
|
||||
};
|
|
@ -19,230 +19,13 @@
|
|||
|
||||
// size of the sky texture is 64x96, but it's actually a 64x64 (clouds) and a 32x32 (sky)
|
||||
|
||||
SkyBlender::SkyBlender() {
|
||||
// generate textures for sky blending
|
||||
glGenFramebuffers(2, m_framebuffers);
|
||||
glGenTextures(2, m_textures);
|
||||
|
||||
GLint old_framebuffer;
|
||||
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_framebuffer);
|
||||
|
||||
// setup the framebuffers
|
||||
for (int i = 0; i < 2; i++) {
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffers[i]);
|
||||
glBindTexture(GL_TEXTURE_2D, m_textures[i]);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_sizes[i], m_sizes[i], 0, GL_RGBA,
|
||||
GL_UNSIGNED_INT_8_8_8_8_REV, 0);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_textures[i], 0);
|
||||
GLenum draw_buffers[1] = {GL_COLOR_ATTACHMENT0};
|
||||
glDrawBuffers(1, draw_buffers);
|
||||
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
|
||||
lg::error("SkyTextureHandler setup failed.");
|
||||
}
|
||||
}
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, 0);
|
||||
|
||||
glGenBuffers(1, &m_gl_vertex_buffer);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_gl_vertex_buffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex) * 6, nullptr, GL_DYNAMIC_DRAW);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, old_framebuffer);
|
||||
|
||||
// we only draw squares
|
||||
m_vertex_data[0].x = 0;
|
||||
m_vertex_data[0].y = 0;
|
||||
|
||||
m_vertex_data[1].x = 1;
|
||||
m_vertex_data[1].y = 0;
|
||||
|
||||
m_vertex_data[2].x = 0;
|
||||
m_vertex_data[2].y = 1;
|
||||
|
||||
m_vertex_data[3].x = 1;
|
||||
m_vertex_data[3].y = 0;
|
||||
|
||||
m_vertex_data[4].x = 0;
|
||||
m_vertex_data[4].y = 1;
|
||||
|
||||
m_vertex_data[5].x = 1;
|
||||
m_vertex_data[5].y = 1;
|
||||
}
|
||||
|
||||
SkyBlender::~SkyBlender() {
|
||||
glDeleteFramebuffers(2, m_framebuffers);
|
||||
glDeleteBuffers(1, &m_gl_vertex_buffer);
|
||||
glDeleteTextures(2, m_textures);
|
||||
}
|
||||
|
||||
SkyBlender::Stats SkyBlender::do_sky_blends(DmaFollower& dma,
|
||||
SharedRenderState* render_state,
|
||||
ScopedProfilerNode& prof) {
|
||||
Stats stats;
|
||||
GLuint vao;
|
||||
glGenVertexArrays(1, &vao);
|
||||
glBindVertexArray(vao);
|
||||
|
||||
GLint old_viewport[4];
|
||||
glGetIntegerv(GL_VIEWPORT, old_viewport);
|
||||
|
||||
GLint old_framebuffer;
|
||||
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_framebuffer);
|
||||
|
||||
while (dma.current_tag().qwc == 6) {
|
||||
// assuming that the vif and gif-tag is correct
|
||||
auto setup_data = dma.read_and_advance();
|
||||
if (render_state->dump_playback) {
|
||||
// continue;
|
||||
}
|
||||
|
||||
// first is an adgif
|
||||
AdgifHelper adgif(setup_data.data + 16);
|
||||
assert(adgif.is_normal_adgif());
|
||||
assert(adgif.alpha().data == 0x8000000068); // Cs + Cd
|
||||
|
||||
// next is the actual draw
|
||||
auto draw_data = dma.read_and_advance();
|
||||
assert(draw_data.size_bytes == 6 * 16);
|
||||
|
||||
GifTag draw_or_blend_tag(draw_data.data);
|
||||
|
||||
// the first draw overwrites the previous frame's draw by disabling alpha blend (ABE = 0)
|
||||
bool is_first_draw = !GsPrim(draw_or_blend_tag.prim()).abe();
|
||||
|
||||
// here's we're relying on the format of the drawing to get the alpha/offset.
|
||||
u32 coord;
|
||||
u32 intensity;
|
||||
memcpy(&coord, draw_data.data + (5 * 16), 4);
|
||||
memcpy(&intensity, draw_data.data + 16, 4);
|
||||
|
||||
// we didn't parse the render-to-texture setup earlier, so we need a way to tell sky from
|
||||
// clouds. we can look at the drawing coordinates to tell - the sky is smaller than the clouds.
|
||||
int buffer_idx = 0;
|
||||
if (coord == 0x200) {
|
||||
// sky
|
||||
buffer_idx = 0;
|
||||
} else if (coord == 0x400) {
|
||||
buffer_idx = 1;
|
||||
} else {
|
||||
assert(false); // bad data
|
||||
}
|
||||
|
||||
// look up the source texture
|
||||
auto tex = render_state->texture_pool->lookup(adgif.tex0().tbp0());
|
||||
assert(tex);
|
||||
|
||||
if (!tex->on_gpu) {
|
||||
render_state->texture_pool->upload_to_gpu(tex);
|
||||
}
|
||||
|
||||
// setup for rendering!
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffers[buffer_idx]);
|
||||
glViewport(0, 0, m_sizes[buffer_idx], m_sizes[buffer_idx]);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_textures[buffer_idx], 0);
|
||||
render_state->shaders[ShaderId::SKY_BLEND].activate();
|
||||
|
||||
// if the first is set, it disables alpha. we can just clear here, so it's easier to find
|
||||
// in renderdoc.
|
||||
if (is_first_draw) {
|
||||
float clear[4] = {0, 0, 0, 0};
|
||||
glClearBufferfv(GL_COLOR, 0, clear);
|
||||
}
|
||||
|
||||
// intensities should be 0-128 (maybe higher is okay, but I don't see how this could be
|
||||
// generated with the GOAL code.)
|
||||
assert(intensity <= 128);
|
||||
|
||||
// todo - could do this on the GPU, but probably not worth it for <20 triangles...
|
||||
float intensity_float = intensity / 128.f;
|
||||
for (auto& vert : m_vertex_data) {
|
||||
vert.intensity = intensity_float;
|
||||
}
|
||||
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glEnable(GL_BLEND);
|
||||
|
||||
// will add.
|
||||
glBlendFunc(GL_ONE, GL_ONE);
|
||||
|
||||
// setup draw data
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_gl_vertex_buffer);
|
||||
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vertex) * 6, m_vertex_data);
|
||||
glEnableVertexAttribArray(0);
|
||||
glVertexAttribPointer(0, // location 0 in the shader
|
||||
3, // 3 floats per vert
|
||||
GL_FLOAT, // floats
|
||||
GL_TRUE, // normalized, ignored,
|
||||
0, // tightly packed
|
||||
0
|
||||
|
||||
);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, tex->gpu_texture);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glUniform1i(glGetUniformLocation(render_state->shaders[ShaderId::SKY_BLEND].id(), "T0"), 0);
|
||||
|
||||
// Draw a sqaure
|
||||
glDrawArrays(GL_TRIANGLES, 0, 6);
|
||||
|
||||
// 1 draw, 2 triangles
|
||||
prof.add_draw_call(1);
|
||||
prof.add_tri(2);
|
||||
|
||||
if (buffer_idx == 0) {
|
||||
if (is_first_draw) {
|
||||
stats.sky_draws++;
|
||||
} else {
|
||||
stats.sky_blends++;
|
||||
}
|
||||
} else {
|
||||
if (is_first_draw) {
|
||||
stats.cloud_draws++;
|
||||
} else {
|
||||
stats.cloud_blends++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// put in pool.
|
||||
for (int i = 0; i < 2; i++) {
|
||||
// todo - these are hardcoded and rely on the vram layout.
|
||||
u32 tbp = i == 0 ? 8064 : 8096;
|
||||
|
||||
// lookup existing, or create a new entry
|
||||
TextureRecord* tex = render_state->texture_pool->lookup(tbp);
|
||||
if (!tex) {
|
||||
auto tsp = std::make_shared<TextureRecord>();
|
||||
render_state->texture_pool->set_texture(tbp, tsp);
|
||||
tex = tsp.get();
|
||||
}
|
||||
|
||||
// update it
|
||||
tex->gpu_texture = m_textures[i];
|
||||
tex->on_gpu = true;
|
||||
tex->only_on_gpu = true;
|
||||
tex->do_gc = false;
|
||||
tex->w = m_sizes[i];
|
||||
tex->h = m_sizes[i];
|
||||
tex->name = fmt::format("PC-SKY-{}", i);
|
||||
}
|
||||
|
||||
glViewport(old_viewport[0], old_viewport[1], old_viewport[2], old_viewport[3]);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, old_framebuffer);
|
||||
glBindVertexArray(0);
|
||||
glDeleteVertexArrays(1, &vao);
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
SkyBlendHandler::SkyBlendHandler(const std::string& name,
|
||||
BucketId my_id,
|
||||
std::shared_ptr<SkyBlender> shared_blender)
|
||||
std::shared_ptr<SkyBlendGPU> shared_blender,
|
||||
std::shared_ptr<SkyBlendCPU> shared_blender_cpu)
|
||||
: BucketRenderer(name, my_id),
|
||||
m_shared_blender(shared_blender),
|
||||
m_shared_gpu_blender(shared_blender),
|
||||
m_shared_cpu_blender(shared_blender_cpu),
|
||||
m_tfrag_renderer(fmt::format("tfrag-{}", name),
|
||||
my_id,
|
||||
{tfrag3::TFragmentTreeKind::TRANS, tfrag3::TFragmentTreeKind::LOWRES_TRANS},
|
||||
|
@ -258,14 +41,19 @@ void SkyBlendHandler::handle_sky_copies(DmaFollower& dma,
|
|||
}
|
||||
return;
|
||||
} else {
|
||||
m_stats = m_shared_blender->do_sky_blends(dma, render_state, prof);
|
||||
if (render_state->use_sky_cpu) {
|
||||
m_gpu_stats = m_shared_cpu_blender->do_sky_blends(dma, render_state, prof);
|
||||
|
||||
} else {
|
||||
m_gpu_stats = m_shared_gpu_blender->do_sky_blends(dma, render_state, prof);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SkyBlendHandler::render(DmaFollower& dma,
|
||||
SharedRenderState* render_state,
|
||||
ScopedProfilerNode& prof) {
|
||||
m_stats = {};
|
||||
m_gpu_stats = {};
|
||||
// First thing should be a NEXT with two nops. this is a jump from buckets to sprite data
|
||||
auto data0 = dma.read_and_advance();
|
||||
assert(data0.vif1() == 0);
|
||||
|
@ -320,8 +108,8 @@ void SkyBlendHandler::render(DmaFollower& dma,
|
|||
|
||||
void SkyBlendHandler::draw_debug_window() {
|
||||
ImGui::Separator();
|
||||
ImGui::Text("Draw/Blend ( sky ): %d/%d", m_stats.sky_draws, m_stats.sky_blends);
|
||||
ImGui::Text("Draw/Blend (cloud): %d/%d", m_stats.cloud_draws, m_stats.cloud_blends);
|
||||
ImGui::Text("Draw/Blend ( sky ): %d/%d", m_gpu_stats.sky_draws, m_gpu_stats.sky_blends);
|
||||
ImGui::Text("Draw/Blend (cloud): %d/%d", m_gpu_stats.cloud_draws, m_gpu_stats.cloud_blends);
|
||||
|
||||
if (ImGui::TreeNode("tfrag")) {
|
||||
m_tfrag_renderer.draw_debug_window();
|
||||
|
|
|
@ -3,35 +3,8 @@
|
|||
#include "game/graphics/opengl_renderer/BucketRenderer.h"
|
||||
#include "game/graphics/opengl_renderer/DirectRenderer.h"
|
||||
#include "game/graphics/opengl_renderer/tfrag/TFragment.h"
|
||||
|
||||
class SkyBlender {
|
||||
public:
|
||||
SkyBlender();
|
||||
~SkyBlender();
|
||||
|
||||
struct Stats {
|
||||
int sky_draws = 0;
|
||||
int cloud_draws = 0;
|
||||
int sky_blends = 0;
|
||||
int cloud_blends = 0;
|
||||
};
|
||||
|
||||
Stats do_sky_blends(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof);
|
||||
|
||||
private:
|
||||
GLuint m_framebuffers[2]; // sky, clouds
|
||||
GLuint m_textures[2]; // sky, clouds
|
||||
int m_sizes[2] = {32, 64};
|
||||
GLuint m_gl_vertex_buffer;
|
||||
|
||||
struct Vertex {
|
||||
float x = 0;
|
||||
float y = 0;
|
||||
float intensity = 0;
|
||||
};
|
||||
|
||||
Vertex m_vertex_data[6];
|
||||
};
|
||||
#include "game/graphics//opengl_renderer/SkyBlendGPU.h"
|
||||
#include "game/graphics//opengl_renderer/SkyBlendCPU.h"
|
||||
|
||||
/*!
|
||||
* Handles texture blending for the sky.
|
||||
|
@ -41,7 +14,8 @@ class SkyBlendHandler : public BucketRenderer {
|
|||
public:
|
||||
SkyBlendHandler(const std::string& name,
|
||||
BucketId my_id,
|
||||
std::shared_ptr<SkyBlender> shared_blender);
|
||||
std::shared_ptr<SkyBlendGPU> shared_gpu_blender,
|
||||
std::shared_ptr<SkyBlendCPU> shared_cpu_blender);
|
||||
void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override;
|
||||
void draw_debug_window() override;
|
||||
|
||||
|
@ -50,8 +24,9 @@ class SkyBlendHandler : public BucketRenderer {
|
|||
SharedRenderState* render_state,
|
||||
ScopedProfilerNode& prof);
|
||||
|
||||
std::shared_ptr<SkyBlender> m_shared_blender;
|
||||
SkyBlender::Stats m_stats;
|
||||
std::shared_ptr<SkyBlendGPU> m_shared_gpu_blender;
|
||||
std::shared_ptr<SkyBlendCPU> m_shared_cpu_blender;
|
||||
SkyBlendStats m_gpu_stats;
|
||||
TFragment m_tfrag_renderer;
|
||||
};
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ SpriteRenderer::SpriteRenderer(const std::string& name, BucketId my_id)
|
|||
: BucketRenderer(name, my_id),
|
||||
m_sprite_renderer(fmt::format("{}.sprites", name),
|
||||
my_id,
|
||||
100,
|
||||
4000,
|
||||
DirectRenderer::Mode::SPRITE_CPU),
|
||||
m_direct_renderer(fmt::format("{}.direct", name), my_id, 100, DirectRenderer::Mode::NORMAL) {}
|
||||
|
||||
|
|
|
@ -67,6 +67,8 @@ void FrameTimeRecorder::draw_window(const DmaStats& dma_stats) {
|
|||
if (ImGui::Button("Single Frame Advance")) {
|
||||
m_single_frame = true;
|
||||
}
|
||||
ImGui::SameLine();
|
||||
ImGui::Checkbox("GLFinish", &do_gl_finish);
|
||||
}
|
||||
ImGui::End();
|
||||
}
|
||||
|
|
|
@ -23,6 +23,8 @@ class FrameTimeRecorder {
|
|||
return m_play;
|
||||
}
|
||||
|
||||
bool do_gl_finish = false;
|
||||
|
||||
private:
|
||||
float m_frame_times[SIZE] = {0};
|
||||
int m_idx = 0;
|
||||
|
@ -47,6 +49,7 @@ class OpenGlDebugGui {
|
|||
const char* screenshot_name() const { return m_screenshot_save_name; }
|
||||
|
||||
bool should_advance_frame() { return m_frame_timer.should_advance_frame(); }
|
||||
bool should_gl_finish() { return m_frame_timer.do_gl_finish; }
|
||||
|
||||
bool get_screenshot_flag() {
|
||||
if (m_want_screenshot) {
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
out vec4 color;
|
||||
|
||||
in vec4 fragment_color;
|
||||
in vec3 tex_coord;
|
||||
noperspective in vec3 tex_coord;
|
||||
uniform sampler2D tex_T0;
|
||||
|
||||
void main() {
|
||||
|
|
|
@ -11,7 +11,7 @@ uniform float alpha_max;
|
|||
|
||||
void main() {
|
||||
//vec4 T0 = texture(tex_T0, tex_coord);
|
||||
vec4 T0 = texture(tex_T0, tex_coord.xy / tex_coord.z);
|
||||
vec4 T0 = texture(tex_T0, tex_coord.xy);
|
||||
color = fragment_color * T0 * 2.0;
|
||||
|
||||
if (color.a < alpha_min) {
|
||||
|
|
|
@ -37,6 +37,11 @@ Tfrag3::~Tfrag3() {
|
|||
void Tfrag3::setup_for_level(const std::vector<tfrag3::TFragmentTreeKind>& tree_kinds,
|
||||
const std::string& level,
|
||||
SharedRenderState* render_state) {
|
||||
// regardless of how many we use some fixed max
|
||||
// we won't actually interp or upload to gpu the unused ones, but we need a fixed maximum so
|
||||
// indexing works properly.
|
||||
m_color_result.resize(TIME_OF_DAY_COLOR_COUNT);
|
||||
|
||||
// make sure we have the level data.
|
||||
auto lev_data = render_state->loader.get_tfrag3_level(level);
|
||||
if (m_level_name != level) {
|
||||
|
@ -47,12 +52,13 @@ void Tfrag3::setup_for_level(const std::vector<tfrag3::TFragmentTreeKind>& tree_
|
|||
fmt::print("level has {} trees\n", lev_data->tfrag_trees.size());
|
||||
m_cached_trees.clear();
|
||||
|
||||
size_t idx_buffer_len = 0;
|
||||
size_t time_of_day_count = 0;
|
||||
size_t vis_temp_len = 0;
|
||||
size_t max_draw = 0;
|
||||
|
||||
for (size_t tree_idx = 0; tree_idx < lev_data->tfrag_trees.size(); tree_idx++) {
|
||||
size_t idx_buffer_len = 0;
|
||||
|
||||
const auto& tree = lev_data->tfrag_trees[tree_idx];
|
||||
m_cached_trees.emplace_back();
|
||||
auto& tree_cache = m_cached_trees.back();
|
||||
|
@ -78,7 +84,7 @@ void Tfrag3::setup_for_level(const std::vector<tfrag3::TFragmentTreeKind>& tree_
|
|||
vis_temp_len = std::max(vis_temp_len, tree.bvh.vis_nodes.size());
|
||||
glBindBuffer(GL_ARRAY_BUFFER, tree_cache.vertex_buffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, verts * sizeof(tfrag3::PreloadedVertex), nullptr,
|
||||
GL_DYNAMIC_DRAW);
|
||||
GL_STREAM_DRAW);
|
||||
glEnableVertexAttribArray(0);
|
||||
glEnableVertexAttribArray(1);
|
||||
glEnableVertexAttribArray(2);
|
||||
|
@ -108,6 +114,20 @@ void Tfrag3::setup_for_level(const std::vector<tfrag3::TFragmentTreeKind>& tree_
|
|||
sizeof(tfrag3::PreloadedVertex), // stride
|
||||
(void*)offsetof(tfrag3::PreloadedVertex, color_index) // offset (0)
|
||||
);
|
||||
|
||||
glGenBuffers(1, &tree_cache.index_buffer);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, tree_cache.index_buffer);
|
||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_len * sizeof(u32), nullptr,
|
||||
GL_STREAM_DRAW);
|
||||
tree_cache.index_list.resize(idx_buffer_len);
|
||||
|
||||
glGenTextures(1, &tree_cache.time_of_day_texture);
|
||||
glBindTexture(GL_TEXTURE_1D, tree_cache.time_of_day_texture);
|
||||
// just fill with zeros. this lets use use the faster texsubimage later
|
||||
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, TIME_OF_DAY_COLOR_COUNT, 0, GL_RGBA,
|
||||
GL_UNSIGNED_INT_8_8_8_8, m_color_result.data());
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glBindVertexArray(0);
|
||||
}
|
||||
}
|
||||
|
@ -136,28 +156,8 @@ void Tfrag3::setup_for_level(const std::vector<tfrag3::TFragmentTreeKind>& tree_
|
|||
m_textures.push_back(gl_tex);
|
||||
}
|
||||
|
||||
fmt::print("level max index stream: {}\n", idx_buffer_len);
|
||||
m_cache.index_list.resize(idx_buffer_len);
|
||||
m_has_index_buffer = true;
|
||||
glGenBuffers(1, &m_index_buffer);
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
|
||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_len * sizeof(u32), nullptr, GL_DYNAMIC_DRAW);
|
||||
|
||||
fmt::print("level max time of day: {}\n", time_of_day_count);
|
||||
assert(time_of_day_count <= TIME_OF_DAY_COLOR_COUNT);
|
||||
// regardless of how many we use some fixed max
|
||||
// we won't actually interp or upload to gpu the unused ones, but we need a fixed maximum so
|
||||
// indexing works properly.
|
||||
m_color_result.resize(TIME_OF_DAY_COLOR_COUNT);
|
||||
glGenTextures(1, &m_time_of_day_texture);
|
||||
m_has_time_of_day_texture = true;
|
||||
glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture);
|
||||
// just fill with zeros. this lets use use the faster texsubimage later
|
||||
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, TIME_OF_DAY_COLOR_COUNT, 0, GL_RGBA,
|
||||
GL_UNSIGNED_INT_8_8_8_8, m_color_result.data());
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
|
||||
m_level_name = level;
|
||||
fmt::print("TFRAG3 setup: {:.3f}\n", tfrag3_setup_timer.getSeconds());
|
||||
|
@ -179,7 +179,7 @@ void Tfrag3::render_tree(const TfragRenderSettings& settings,
|
|||
interp_time_of_day_slow(settings.time_of_day_weights, *tree.colors, m_color_result.data());
|
||||
}
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture);
|
||||
glBindTexture(GL_TEXTURE_1D, tree.time_of_day_texture);
|
||||
glTexSubImage1D(GL_TEXTURE_1D, 0, 0, tree.colors->size(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV,
|
||||
m_color_result.data());
|
||||
|
||||
|
@ -187,7 +187,7 @@ void Tfrag3::render_tree(const TfragRenderSettings& settings,
|
|||
|
||||
glBindVertexArray(tree.vao);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, tree.vertex_buffer);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, tree.index_buffer);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glEnable(GL_PRIMITIVE_RESTART);
|
||||
glPrimitiveRestartIndex(UINT32_MAX);
|
||||
|
@ -195,10 +195,9 @@ void Tfrag3::render_tree(const TfragRenderSettings& settings,
|
|||
cull_check_all_slow(settings.planes, tree.vis->vis_nodes, m_cache.vis_temp.data());
|
||||
|
||||
int idx_buffer_ptr = make_index_list_from_vis_string(
|
||||
m_cache.draw_idx_temp.data(), m_cache.index_list.data(), *tree.draws, m_cache.vis_temp);
|
||||
m_cache.draw_idx_temp.data(), tree.index_list.data(), *tree.draws, m_cache.vis_temp);
|
||||
|
||||
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, idx_buffer_ptr * sizeof(u32),
|
||||
m_cache.index_list.data());
|
||||
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, idx_buffer_ptr * sizeof(u32), tree.index_list.data());
|
||||
|
||||
for (size_t draw_idx = 0; draw_idx < tree.draws->size(); draw_idx++) {
|
||||
const auto& draw = tree.draws->operator[](draw_idx);
|
||||
|
@ -332,22 +331,14 @@ void Tfrag3::discard_tree_cache() {
|
|||
|
||||
for (auto& tree : m_cached_trees) {
|
||||
if (tree.kind != tfrag3::TFragmentTreeKind::INVALID) {
|
||||
glBindTexture(GL_TEXTURE_1D, tree.time_of_day_texture);
|
||||
glDeleteTextures(1, &tree.time_of_day_texture);
|
||||
glDeleteBuffers(1, &tree.vertex_buffer);
|
||||
glDeleteBuffers(1, &tree.index_buffer);
|
||||
glDeleteVertexArrays(1, &tree.vao);
|
||||
}
|
||||
}
|
||||
|
||||
if (m_has_index_buffer) {
|
||||
glDeleteBuffers(1, &m_index_buffer);
|
||||
m_has_index_buffer = false;
|
||||
}
|
||||
|
||||
if (m_has_time_of_day_texture) {
|
||||
glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture);
|
||||
glDeleteTextures(1, &m_time_of_day_texture);
|
||||
m_has_time_of_day_texture = false;
|
||||
}
|
||||
|
||||
// delete textures and stuff.
|
||||
m_cached_trees.clear();
|
||||
}
|
||||
|
|
|
@ -48,6 +48,9 @@ class Tfrag3 {
|
|||
struct TreeCache {
|
||||
tfrag3::TFragmentTreeKind kind;
|
||||
GLuint vertex_buffer = -1;
|
||||
GLuint index_buffer = -1;
|
||||
std::vector<u32> index_list;
|
||||
GLuint time_of_day_texture;
|
||||
GLuint vao;
|
||||
u32 vert_count = 0;
|
||||
const std::vector<tfrag3::StripDraw>* draws = nullptr;
|
||||
|
@ -71,21 +74,15 @@ class Tfrag3 {
|
|||
struct Cache {
|
||||
std::vector<u8> vis_temp;
|
||||
std::vector<std::pair<int, int>> draw_idx_temp;
|
||||
std::vector<u32> index_list;
|
||||
} m_cache;
|
||||
|
||||
std::string m_level_name;
|
||||
|
||||
std::vector<GLuint> m_textures;
|
||||
std::vector<TreeCache> m_cached_trees;
|
||||
GLuint m_time_of_day_texture = -1;
|
||||
bool m_has_time_of_day_texture = false;
|
||||
|
||||
std::vector<math::Vector<u8, 4>> m_color_result;
|
||||
|
||||
bool m_has_index_buffer = false;
|
||||
GLuint m_index_buffer = -1;
|
||||
|
||||
GLuint m_debug_vao = -1;
|
||||
GLuint m_debug_verts = -1;
|
||||
|
||||
|
|
|
@ -17,6 +17,11 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s
|
|||
// TODO: right now this will wait to load from disk and unpack it.
|
||||
auto lev_data = render_state->loader.get_tfrag3_level(level);
|
||||
|
||||
// regardless of how many we use some fixed max
|
||||
// we won't actually interp or upload to gpu the unused ones, but we need a fixed maximum so
|
||||
// indexing works properly.
|
||||
m_color_result.resize(TIME_OF_DAY_COLOR_COUNT);
|
||||
|
||||
if (m_level_name != level) {
|
||||
Timer tie_setup_timer;
|
||||
// We changed level!
|
||||
|
@ -26,7 +31,6 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s
|
|||
fmt::print(" New level has {} tie trees\n", lev_data->tie_trees.size());
|
||||
m_trees.resize(lev_data->tie_trees.size());
|
||||
|
||||
size_t idx_buffer_len = 0;
|
||||
size_t time_of_day_count = 0;
|
||||
size_t vis_temp_len = 0;
|
||||
size_t max_draw = 0;
|
||||
|
@ -34,6 +38,7 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s
|
|||
|
||||
// set up each tree
|
||||
for (size_t tree_idx = 0; tree_idx < lev_data->tie_trees.size(); tree_idx++) {
|
||||
size_t idx_buffer_len = 0;
|
||||
const auto& tree = lev_data->tie_trees[tree_idx];
|
||||
max_draw = std::max(tree.static_draws.size(), max_draw);
|
||||
for (auto& draw : tree.static_draws) {
|
||||
|
@ -85,6 +90,21 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s
|
|||
sizeof(tfrag3::PreloadedVertex), // stride
|
||||
(void*)offsetof(tfrag3::PreloadedVertex, color_index) // offset (0)
|
||||
);
|
||||
|
||||
glGenBuffers(1, &m_trees[tree_idx].index_buffer);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_trees[tree_idx].index_buffer);
|
||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_len * sizeof(u32), nullptr, GL_STREAM_DRAW);
|
||||
m_trees[tree_idx].index_list.resize(idx_buffer_len);
|
||||
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glGenTextures(1, &m_trees[tree_idx].time_of_day_texture);
|
||||
glBindTexture(GL_TEXTURE_1D, m_trees[tree_idx].time_of_day_texture);
|
||||
// just fill with zeros. this lets use use the faster texsubimage later
|
||||
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, TIME_OF_DAY_COLOR_COUNT, 0, GL_RGBA,
|
||||
GL_UNSIGNED_INT_8_8_8_8, m_color_result.data());
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
|
||||
glBindVertexArray(0);
|
||||
}
|
||||
|
||||
|
@ -113,28 +133,8 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s
|
|||
m_textures.push_back(gl_tex);
|
||||
}
|
||||
|
||||
fmt::print("level TIE index stream: {}\n", idx_buffer_len);
|
||||
m_cache.index_list.resize(idx_buffer_len);
|
||||
m_has_index_buffer = true;
|
||||
glGenBuffers(1, &m_index_buffer);
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
|
||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_len * sizeof(u32), nullptr, GL_STREAM_DRAW);
|
||||
|
||||
fmt::print("level max time of day: {}\n", time_of_day_count);
|
||||
assert(time_of_day_count <= TIME_OF_DAY_COLOR_COUNT);
|
||||
// regardless of how many we use some fixed max
|
||||
// we won't actually interp or upload to gpu the unused ones, but we need a fixed maximum so
|
||||
// indexing works properly.
|
||||
m_color_result.resize(TIME_OF_DAY_COLOR_COUNT);
|
||||
glGenTextures(1, &m_time_of_day_texture);
|
||||
m_has_time_of_day_texture = true;
|
||||
glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture);
|
||||
// just fill with zeros. this lets use use the faster texsubimage later
|
||||
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, TIME_OF_DAY_COLOR_COUNT, 0, GL_RGBA,
|
||||
GL_UNSIGNED_INT_8_8_8_8, m_color_result.data());
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
|
||||
m_level_name = level;
|
||||
fmt::print("TIE setup: {:.3f}\n", tie_setup_timer.getSeconds());
|
||||
|
@ -149,21 +149,13 @@ void Tie3::discard_tree_cache() {
|
|||
m_textures.clear();
|
||||
|
||||
for (auto& tree : m_trees) {
|
||||
glBindTexture(GL_TEXTURE_1D, tree.time_of_day_texture);
|
||||
glDeleteTextures(1, &tree.time_of_day_texture);
|
||||
glDeleteBuffers(1, &tree.vertex_buffer);
|
||||
glDeleteBuffers(1, &tree.index_buffer);
|
||||
glDeleteVertexArrays(1, &tree.vao);
|
||||
}
|
||||
|
||||
if (m_has_index_buffer) {
|
||||
glDeleteBuffers(1, &m_index_buffer);
|
||||
m_has_index_buffer = false;
|
||||
}
|
||||
|
||||
if (m_has_time_of_day_texture) {
|
||||
glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture);
|
||||
glDeleteTextures(1, &m_time_of_day_texture);
|
||||
m_has_time_of_day_texture = false;
|
||||
}
|
||||
|
||||
m_trees.clear();
|
||||
}
|
||||
|
||||
|
@ -284,7 +276,7 @@ void Tie3::render_tree(int idx,
|
|||
|
||||
Timer setup_timer;
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture);
|
||||
glBindTexture(GL_TEXTURE_1D, tree.time_of_day_texture);
|
||||
glTexSubImage1D(GL_TEXTURE_1D, 0, 0, tree.colors->size(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV,
|
||||
m_color_result.data());
|
||||
|
||||
|
@ -292,7 +284,7 @@ void Tie3::render_tree(int idx,
|
|||
|
||||
glBindVertexArray(tree.vao);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, tree.vertex_buffer);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, tree.index_buffer);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glEnable(GL_PRIMITIVE_RESTART);
|
||||
glPrimitiveRestartIndex(UINT32_MAX);
|
||||
|
@ -305,7 +297,7 @@ void Tie3::render_tree(int idx,
|
|||
tree.perf.cull_time.add(0);
|
||||
Timer index_timer;
|
||||
idx_buffer_ptr = make_all_visible_index_list(m_cache.draw_idx_temp.data(),
|
||||
m_cache.index_list.data(), *tree.draws);
|
||||
tree.index_list.data(), *tree.draws);
|
||||
tree.perf.index_time.add(index_timer.getSeconds());
|
||||
tree.perf.index_upload = sizeof(u32) * idx_buffer_ptr;
|
||||
} else {
|
||||
|
@ -315,14 +307,13 @@ void Tie3::render_tree(int idx,
|
|||
|
||||
Timer index_timer;
|
||||
idx_buffer_ptr = make_index_list_from_vis_string(
|
||||
m_cache.draw_idx_temp.data(), m_cache.index_list.data(), *tree.draws, m_cache.vis_temp);
|
||||
m_cache.draw_idx_temp.data(), tree.index_list.data(), *tree.draws, m_cache.vis_temp);
|
||||
tree.perf.index_time.add(index_timer.getSeconds());
|
||||
tree.perf.index_upload = sizeof(u32) * idx_buffer_ptr;
|
||||
}
|
||||
|
||||
Timer draw_timer;
|
||||
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, idx_buffer_ptr * sizeof(u32),
|
||||
m_cache.index_list.data());
|
||||
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, idx_buffer_ptr * sizeof(u32), tree.index_list.data());
|
||||
|
||||
for (size_t draw_idx = 0; draw_idx < tree.draws->size(); draw_idx++) {
|
||||
const auto& draw = tree.draws->operator[](draw_idx);
|
||||
|
|
|
@ -27,6 +27,9 @@ class Tie3 : public BucketRenderer {
|
|||
void discard_tree_cache();
|
||||
struct Tree {
|
||||
GLuint vertex_buffer;
|
||||
GLuint index_buffer;
|
||||
GLuint time_of_day_texture;
|
||||
std::vector<u32> index_list;
|
||||
GLuint vao;
|
||||
u32 vert_count;
|
||||
const std::vector<tfrag3::StripDraw>* draws = nullptr;
|
||||
|
@ -55,17 +58,10 @@ class Tie3 : public BucketRenderer {
|
|||
struct Cache {
|
||||
std::vector<u8> vis_temp;
|
||||
std::vector<std::pair<int, int>> draw_idx_temp;
|
||||
std::vector<u32> index_list;
|
||||
} m_cache;
|
||||
|
||||
GLuint m_time_of_day_texture = -1;
|
||||
bool m_has_time_of_day_texture = false;
|
||||
|
||||
std::vector<math::Vector<u8, 4>> m_color_result;
|
||||
|
||||
bool m_has_index_buffer = false;
|
||||
GLuint m_index_buffer = -1;
|
||||
|
||||
static constexpr int TIME_OF_DAY_COLOR_COUNT = 8192;
|
||||
|
||||
char m_user_level[255] = "vi1";
|
||||
|
|
|
@ -402,6 +402,10 @@ static void gl_render_display(GfxDisplay* display) {
|
|||
render_game_frame(width, height, lbox_w, lbox_h);
|
||||
}
|
||||
|
||||
if (g_gfx_data->debug_gui.should_gl_finish()) {
|
||||
glFinish();
|
||||
}
|
||||
|
||||
// render imgui
|
||||
g_gfx_data->debug_gui.draw(g_gfx_data->dma_copier.get_last_result().stats);
|
||||
ImGui::Render();
|
||||
|
|
|
@ -938,7 +938,10 @@ void CancelDGO(RPC_Dgo_Cmd* cmd) {
|
|||
SendMbx(sync_mbx, nullptr);
|
||||
// wait for it to abort.
|
||||
WaitMbx(dgo_mbx);
|
||||
assert(cmd); // bug
|
||||
// this will cause a crash if we cancel because we try to load 2 dgos at the same time.
|
||||
// this should succeed if it's an actual cancel because we changed which level we're trying to
|
||||
// load.
|
||||
assert(cmd);
|
||||
cmd->result = DGO_RPC_RESULT_ABORTED;
|
||||
scmd.cmd_id = 0;
|
||||
}
|
||||
|
|
|
@ -221,7 +221,7 @@ void Deci2Server::run() {
|
|||
|
||||
auto& driver = d2_drivers[handler];
|
||||
|
||||
int sent_to_program = 0;
|
||||
u32 sent_to_program = 0;
|
||||
while (!want_exit() && (hdr->rsvd < hdr->len || sent_to_program < hdr->rsvd)) {
|
||||
// send what we have to the program
|
||||
if (sent_to_program < hdr->rsvd) {
|
||||
|
|
|
@ -284,6 +284,7 @@
|
|||
"out/iso/MAI.VIS"
|
||||
"out/iso/SNO.VIS"
|
||||
"out/iso/BEA.VIS"
|
||||
"out/iso/LAV.VIS"
|
||||
"out/iso/CIT.VIS"
|
||||
"out/iso/FIN.VIS"
|
||||
|
||||
|
|
|
@ -191,8 +191,8 @@ bool Listener::connect_to_target(int n_tries, const std::string& ip, int port) {
|
|||
void Listener::receive_func() {
|
||||
while (m_connected) {
|
||||
// attempt to receive a ListenerMessageHeader
|
||||
int rcvd = 0;
|
||||
int rcvd_desired = sizeof(ListenerMessageHeader);
|
||||
u32 rcvd = 0;
|
||||
u32 rcvd_desired = sizeof(ListenerMessageHeader);
|
||||
char buff[sizeof(ListenerMessageHeader)];
|
||||
while (rcvd < rcvd_desired) {
|
||||
auto got = read_from_socket(listen_socket, buff + rcvd, rcvd_desired - rcvd);
|
||||
|
|
Loading…
Reference in a new issue