jak-project/game/graphics/opengl_renderer/background/background_common.cpp
water111 a918e2d9de
[gfx] Clean up background renderer matrices, fix "hole covers" (#2866)
The way we got/stored background matrices is a bit weird and full of
leftovers from the first attempts at porting renderers. This doesn't
work well with the Jak 2 "other camera" system where some stuff is
rendered with a different camera matrix.

This cleans most of it up. The exception is that the collide mesh
renderer and the additional sprite culling I added still need to peek at
some cached camera matrices.

This fixes the problem where etie uses the wrong matrices for "other
camera" levels. Now the "hole covers" go in the holes in the background
of the throne room.

![image](https://github.com/open-goal/jak-project/assets/48171810/73a88f7b-05d4-4e9c-bb34-5b45efffcb69)
2023-07-29 20:34:42 -04:00

790 lines
28 KiB
C++

#include "background_common.h"
#ifdef __aarch64__
#include "third-party/sse2neon/sse2neon.h"
#else
#include <immintrin.h>
#endif
#include "common/util/os.h"
#include "game/graphics/opengl_renderer/BucketRenderer.h"
#include "game/graphics/pipelines/opengl.h"
DoubleDraw setup_opengl_from_draw_mode(DrawMode mode, u32 tex_unit, bool mipmap) {
glActiveTexture(tex_unit);
if (mode.get_zt_enable()) {
glEnable(GL_DEPTH_TEST);
switch (mode.get_depth_test()) {
case GsTest::ZTest::NEVER:
glDepthFunc(GL_NEVER);
break;
case GsTest::ZTest::ALWAYS:
glDepthFunc(GL_ALWAYS);
break;
case GsTest::ZTest::GEQUAL:
glDepthFunc(GL_GEQUAL);
break;
case GsTest::ZTest::GREATER:
glDepthFunc(GL_GREATER);
break;
default:
ASSERT(false);
}
} else {
glDisable(GL_DEPTH_TEST);
}
DoubleDraw double_draw;
bool should_enable_blend = false;
if (mode.get_ab_enable() && mode.get_alpha_blend() != DrawMode::AlphaBlend::DISABLED) {
should_enable_blend = true;
switch (mode.get_alpha_blend()) {
case DrawMode::AlphaBlend::SRC_SRC_SRC_SRC:
should_enable_blend = false;
// (SRC - SRC) * alpha + SRC = SRC, no blend.
break;
case DrawMode::AlphaBlend::SRC_DST_SRC_DST:
glBlendEquation(GL_FUNC_ADD);
glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ZERO);
break;
case DrawMode::AlphaBlend::SRC_0_SRC_DST:
glBlendEquation(GL_FUNC_ADD);
glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_ONE, GL_ZERO);
break;
case DrawMode::AlphaBlend::SRC_0_FIX_DST:
glBlendEquation(GL_FUNC_ADD);
glBlendFuncSeparate(GL_ONE, GL_ONE, GL_ONE, GL_ZERO);
break;
case DrawMode::AlphaBlend::SRC_DST_FIX_DST:
// Cv = (Cs - Cd) * FIX + Cd
// Cs * FIX * 0.5
// Cd * FIX * 0.5
glBlendEquation(GL_FUNC_ADD);
glBlendFuncSeparate(GL_CONSTANT_COLOR, GL_CONSTANT_COLOR, GL_ONE, GL_ZERO);
glBlendColor(0.5, 0.5, 0.5, 0.5);
break;
case DrawMode::AlphaBlend::ZERO_SRC_SRC_DST:
glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_ONE, GL_ZERO);
glBlendEquation(GL_FUNC_REVERSE_SUBTRACT);
break;
case DrawMode::AlphaBlend::SRC_0_DST_DST:
glBlendFunc(GL_DST_ALPHA, GL_ONE);
glBlendEquation(GL_FUNC_ADD);
double_draw.color_mult = 0.5f;
break;
default:
ASSERT(false);
}
} else {
should_enable_blend = false;
}
if (should_enable_blend) {
glEnable(GL_BLEND);
} else {
glDisable(GL_BLEND);
}
if (mode.get_clamp_s_enable()) {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
} else {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
}
if (mode.get_clamp_t_enable()) {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
} else {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
}
if (mode.get_filt_enable()) {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,
mipmap ? GL_LINEAR_MIPMAP_LINEAR : GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
} else {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
}
// for some reason, they set atest NEVER + FB_ONLY to disable depth writes
bool alpha_hack_to_disable_z_write = false;
float alpha_min = 0.;
if (mode.get_at_enable()) {
switch (mode.get_alpha_test()) {
case DrawMode::AlphaTest::ALWAYS:
break;
case DrawMode::AlphaTest::GEQUAL:
alpha_min = mode.get_aref() / 127.f;
switch (mode.get_alpha_fail()) {
case GsTest::AlphaFail::KEEP:
// ok, no need for double draw
break;
case GsTest::AlphaFail::FB_ONLY:
if (mode.get_depth_write_enable()) {
// darn, we need to draw twice
double_draw.kind = DoubleDrawKind::AFAIL_NO_DEPTH_WRITE;
double_draw.aref_second = alpha_min;
} else {
alpha_min = 0.f;
}
break;
default:
ASSERT(false);
}
break;
case DrawMode::AlphaTest::NEVER:
if (mode.get_alpha_fail() == GsTest::AlphaFail::FB_ONLY) {
alpha_hack_to_disable_z_write = true;
} else {
ASSERT(false);
}
break;
default:
ASSERT(false);
}
}
if (mode.get_depth_write_enable() && !alpha_hack_to_disable_z_write) {
glDepthMask(GL_TRUE);
} else {
glDepthMask(GL_FALSE);
}
double_draw.aref_first = alpha_min;
return double_draw;
}
DoubleDraw setup_tfrag_shader(SharedRenderState* render_state, DrawMode mode, ShaderId shader) {
auto draw_settings = setup_opengl_from_draw_mode(mode, GL_TEXTURE0, true);
auto sh_id = render_state->shaders[shader].id();
if (auto u_id = glGetUniformLocation(sh_id, "alpha_min"); u_id != -1) {
glUniform1f(u_id, draw_settings.aref_first);
}
if (auto u_id = glGetUniformLocation(sh_id, "alpha_max"); u_id != -1) {
glUniform1f(u_id, 10.f);
}
return draw_settings;
}
void first_tfrag_draw_setup(const TfragRenderSettings& settings,
SharedRenderState* render_state,
ShaderId shader) {
const auto& sh = render_state->shaders[shader];
sh.activate();
auto id = sh.id();
glUniform1i(glGetUniformLocation(id, "gfx_hack_no_tex"), Gfx::g_global_settings.hack_no_tex);
glUniform1i(glGetUniformLocation(id, "decal"), false);
glUniform1i(glGetUniformLocation(id, "tex_T0"), 0);
glUniformMatrix4fv(glGetUniformLocation(id, "camera"), 1, GL_FALSE,
settings.camera.camera[0].data());
glUniform4f(glGetUniformLocation(id, "hvdf_offset"), settings.camera.hvdf_off[0],
settings.camera.hvdf_off[1], settings.camera.hvdf_off[2],
settings.camera.hvdf_off[3]);
glUniform1f(glGetUniformLocation(id, "fog_constant"), settings.camera.fog.x());
glUniform1f(glGetUniformLocation(id, "fog_min"), settings.camera.fog.y());
glUniform1f(glGetUniformLocation(id, "fog_max"), settings.camera.fog.z());
glUniform4f(glGetUniformLocation(id, "fog_color"), render_state->fog_color[0] / 255.f,
render_state->fog_color[1] / 255.f, render_state->fog_color[2] / 255.f,
render_state->fog_intensity / 255);
glUniform1f(glGetUniformLocation(id, "fog_hack_threshold"),
render_state->version == GameVersion::Jak1 ? 0.005f : 0);
}
void interp_time_of_day_slow(const math::Vector<s32, 4> itimes[4],
const std::vector<tfrag3::TimeOfDayColor>& in,
math::Vector<u8, 4>* out) {
// Timer interp_timer;
math::Vector4f weights[8];
for (int component = 0; component < 8; component++) {
int quad_idx = component / 2;
int word_off = (component % 2 * 2);
for (int channel = 0; channel < 4; channel++) {
int word = word_off + (channel / 2);
int hw_off = channel % 2;
u32 word_val = itimes[quad_idx][word];
u32 hw_val = hw_off ? (word_val >> 16) : word_val;
hw_val = hw_val & 0xff;
weights[component][channel] = hw_val / 64.f;
}
}
for (size_t color = 0; color < in.size(); color++) {
math::Vector4f result = math::Vector4f::zero();
for (int component = 0; component < 8; component++) {
for (int channel = 0; channel < 4; channel++) {
result[channel] += in[color].rgba[component][channel] * weights[component][channel];
}
// result += in[color].rgba[component].cast<float>() * weights[component];
}
result[0] = std::min(result[0], 255.f);
result[1] = std::min(result[1], 255.f);
result[2] = std::min(result[2], 255.f);
result[3] = std::min(result[3], 128.f); // note: different for alpha!
out[color] = result.cast<u8>();
}
}
// we want to absolutely minimize the number of time we have to "cross lanes" in AVX (meaning X
// component of one vector interacts with Y component of another). We can make this a lot better by
// taking groups of 4 time of day colors (each containing 8x RGBAs) and rearranging them with this
// pattern. We want to compute:
// [rgba][0][0] * weights[0] + [rgba][0][1] * weights[1] + [rgba][0][2]... + rgba[0][7] * weights[7]
// RGBA is already a vector of 4 components, but with AVX we have vectors with 32 bytes which fit
// 16 colors in them.
// This makes each vector have:
// colors0 = [rgba][0][0], [rgba][1][0], [rgba][2][0], [rgba][3][0]
// colors1 = [rgba][0][1], [rgba][1][1], [rgba][2][1], [rgba][3][1]
// ...
// so we can basically add up the columns (multiplying by weights in between)
// and we'll end up with [final0, final1, final2, final3, final4]
// the swizzle function below rearranges to get this pattern.
// it's not the most efficient way to do it, but it just runs during loading and not on every frame.
SwizzledTimeOfDay swizzle_time_of_day(const std::vector<tfrag3::TimeOfDayColor>& in) {
SwizzledTimeOfDay out;
out.data.resize((in.size() + 3) * 8 * 4);
// we're rearranging per 4 colors (groups of 32 * 4 = 128)
// color (lots of these)
// component (8 of these)
// channel (4 of these, rgba)
for (u32 color_quad = 0; color_quad < (in.size() + 3) / 4; color_quad++) {
u8* quad_out = out.data.data() + color_quad * 128;
for (u32 component = 0; component < 8; component++) {
for (u32 color = 0; color < 4; color++) {
for (u32 channel = 0; channel < 4; channel++) {
size_t in_idx = color_quad * 4 + color;
if (in_idx < in.size()) {
*quad_out = in.at(color_quad * 4 + color).rgba[component][channel];
} else {
*quad_out = 0;
}
quad_out++;
}
}
}
}
out.color_count = (in.size() + 3) & (~3);
return out;
}
#ifndef __aarch64__
void interp_time_of_day_fast(const math::Vector<s32, 4> itimes[4],
const SwizzledTimeOfDay& swizzled_colors,
math::Vector<u8, 4>* out) {
math::Vector<u16, 4> weights[8];
for (int component = 0; component < 8; component++) {
int quad_idx = component / 2;
int word_off = (component % 2 * 2);
for (int channel = 0; channel < 4; channel++) {
int word = word_off + (channel / 2);
int hw_off = channel % 2;
u32 word_val = itimes[quad_idx][word];
u32 hw_val = hw_off ? (word_val >> 16) : word_val;
hw_val = hw_val & 0xff;
weights[component][channel] = hw_val;
}
}
// weight multipliers
__m128i weights0 = _mm_setr_epi16(weights[0][0], weights[0][1], weights[0][2], weights[0][3],
weights[0][0], weights[0][1], weights[0][2], weights[0][3]);
__m128i weights1 = _mm_setr_epi16(weights[1][0], weights[1][1], weights[1][2], weights[1][3],
weights[1][0], weights[1][1], weights[1][2], weights[1][3]);
__m128i weights2 = _mm_setr_epi16(weights[2][0], weights[2][1], weights[2][2], weights[2][3],
weights[2][0], weights[2][1], weights[2][2], weights[2][3]);
__m128i weights3 = _mm_setr_epi16(weights[3][0], weights[3][1], weights[3][2], weights[3][3],
weights[3][0], weights[3][1], weights[3][2], weights[3][3]);
__m128i weights4 = _mm_setr_epi16(weights[4][0], weights[4][1], weights[4][2], weights[4][3],
weights[4][0], weights[4][1], weights[4][2], weights[4][3]);
__m128i weights5 = _mm_setr_epi16(weights[5][0], weights[5][1], weights[5][2], weights[5][3],
weights[5][0], weights[5][1], weights[5][2], weights[5][3]);
__m128i weights6 = _mm_setr_epi16(weights[6][0], weights[6][1], weights[6][2], weights[6][3],
weights[6][0], weights[6][1], weights[6][2], weights[6][3]);
__m128i weights7 = _mm_setr_epi16(weights[7][0], weights[7][1], weights[7][2], weights[7][3],
weights[7][0], weights[7][1], weights[7][2], weights[7][3]);
// saturation: note that alpha is saturated to 128 but the rest are 255.
// TODO: maybe we should saturate to 255 for everybody (can do this using a single packus) and
// change the shader to deal with this.
__m128i sat = _mm_set_epi16(128, 255, 255, 255, 128, 255, 255, 255);
for (u32 color_quad = 0; color_quad < swizzled_colors.color_count / 4; color_quad++) {
// first, load colors. We put 16 bytes / register and don't touch the upper half because we
// convert u8s to u16s.
{
const u8* base = swizzled_colors.data.data() + color_quad * 128;
__m128i color0_p = _mm_loadu_si64((const __m128i*)(base + 0));
__m128i color1_p = _mm_loadu_si64((const __m128i*)(base + 16));
__m128i color2_p = _mm_loadu_si64((const __m128i*)(base + 32));
__m128i color3_p = _mm_loadu_si64((const __m128i*)(base + 48));
__m128i color4_p = _mm_loadu_si64((const __m128i*)(base + 64));
__m128i color5_p = _mm_loadu_si64((const __m128i*)(base + 80));
__m128i color6_p = _mm_loadu_si64((const __m128i*)(base + 96));
__m128i color7_p = _mm_loadu_si64((const __m128i*)(base + 112));
// unpack to 16-bits. each has 16x 16 bit colors.
__m128i color0 = _mm_cvtepu8_epi16(color0_p);
__m128i color1 = _mm_cvtepu8_epi16(color1_p);
__m128i color2 = _mm_cvtepu8_epi16(color2_p);
__m128i color3 = _mm_cvtepu8_epi16(color3_p);
__m128i color4 = _mm_cvtepu8_epi16(color4_p);
__m128i color5 = _mm_cvtepu8_epi16(color5_p);
__m128i color6 = _mm_cvtepu8_epi16(color6_p);
__m128i color7 = _mm_cvtepu8_epi16(color7_p);
// multiply by weights
color0 = _mm_mullo_epi16(color0, weights0);
color1 = _mm_mullo_epi16(color1, weights1);
color2 = _mm_mullo_epi16(color2, weights2);
color3 = _mm_mullo_epi16(color3, weights3);
color4 = _mm_mullo_epi16(color4, weights4);
color5 = _mm_mullo_epi16(color5, weights5);
color6 = _mm_mullo_epi16(color6, weights6);
color7 = _mm_mullo_epi16(color7, weights7);
// add. This order minimizes dependencies.
color0 = _mm_adds_epi16(color0, color1);
color2 = _mm_adds_epi16(color2, color3);
color4 = _mm_adds_epi16(color4, color5);
color6 = _mm_adds_epi16(color6, color7);
color0 = _mm_adds_epi16(color0, color2);
color4 = _mm_adds_epi16(color4, color6);
color0 = _mm_adds_epi16(color0, color4);
// divide, because we multiplied our weights by 2^7.
color0 = _mm_srli_epi16(color0, 6);
// saturate
color0 = _mm_min_epu16(sat, color0);
// back to u8s.
auto result = _mm_packus_epi16(color0, color0);
// store result
_mm_storel_epi64((__m128i*)(&out[color_quad * 4]), result);
}
{
const u8* base = swizzled_colors.data.data() + color_quad * 128 + 8;
__m128i color0_p = _mm_loadu_si64((const __m128i*)(base + 0));
__m128i color1_p = _mm_loadu_si64((const __m128i*)(base + 16));
__m128i color2_p = _mm_loadu_si64((const __m128i*)(base + 32));
__m128i color3_p = _mm_loadu_si64((const __m128i*)(base + 48));
__m128i color4_p = _mm_loadu_si64((const __m128i*)(base + 64));
__m128i color5_p = _mm_loadu_si64((const __m128i*)(base + 80));
__m128i color6_p = _mm_loadu_si64((const __m128i*)(base + 96));
__m128i color7_p = _mm_loadu_si64((const __m128i*)(base + 112));
// unpack to 16-bits. each has 16x 16 bit colors.
__m128i color0 = _mm_cvtepu8_epi16(color0_p);
__m128i color1 = _mm_cvtepu8_epi16(color1_p);
__m128i color2 = _mm_cvtepu8_epi16(color2_p);
__m128i color3 = _mm_cvtepu8_epi16(color3_p);
__m128i color4 = _mm_cvtepu8_epi16(color4_p);
__m128i color5 = _mm_cvtepu8_epi16(color5_p);
__m128i color6 = _mm_cvtepu8_epi16(color6_p);
__m128i color7 = _mm_cvtepu8_epi16(color7_p);
// multiply by weights
color0 = _mm_mullo_epi16(color0, weights0);
color1 = _mm_mullo_epi16(color1, weights1);
color2 = _mm_mullo_epi16(color2, weights2);
color3 = _mm_mullo_epi16(color3, weights3);
color4 = _mm_mullo_epi16(color4, weights4);
color5 = _mm_mullo_epi16(color5, weights5);
color6 = _mm_mullo_epi16(color6, weights6);
color7 = _mm_mullo_epi16(color7, weights7);
// add. This order minimizes dependencies.
color0 = _mm_adds_epi16(color0, color1);
color2 = _mm_adds_epi16(color2, color3);
color4 = _mm_adds_epi16(color4, color5);
color6 = _mm_adds_epi16(color6, color7);
color0 = _mm_adds_epi16(color0, color2);
color4 = _mm_adds_epi16(color4, color6);
color0 = _mm_adds_epi16(color0, color4);
// divide, because we multiplied our weights by 2^7.
color0 = _mm_srli_epi16(color0, 6);
// saturate
color0 = _mm_min_epu16(sat, color0);
// back to u8s.
auto result = _mm_packus_epi16(color0, color0);
// store result
_mm_storel_epi64((__m128i*)(&out[color_quad * 4 + 2]), result);
}
}
}
#endif
bool sphere_in_view_ref(const math::Vector4f& sphere, const math::Vector4f* planes) {
math::Vector4f acc =
planes[0] * sphere.x() + planes[1] * sphere.y() + planes[2] * sphere.z() - planes[3];
return acc.x() > -sphere.w() && acc.y() > -sphere.w() && acc.z() > -sphere.w() &&
acc.w() > -sphere.w();
}
// this isn't super efficient, but we spend so little time here it's not worth it to go faster.
void cull_check_all_slow(const math::Vector4f* planes,
const std::vector<tfrag3::VisNode>& nodes,
const u8* level_occlusion_string,
u8* out) {
if (level_occlusion_string) {
for (size_t i = 0; i < nodes.size(); i++) {
u16 my_id = nodes[i].my_id;
bool not_occluded =
my_id != 0xffff && level_occlusion_string[my_id / 8] & (1 << (7 - (my_id & 7)));
out[i] = not_occluded && sphere_in_view_ref(nodes[i].bsphere, planes);
}
} else {
for (size_t i = 0; i < nodes.size(); i++) {
out[i] = sphere_in_view_ref(nodes[i].bsphere, planes);
}
}
}
void make_all_visible_multidraws(std::pair<int, int>* draw_ptrs_out,
GLsizei* counts_out,
void** index_offsets_out,
const std::vector<tfrag3::ShrubDraw>& draws) {
u64 md_idx = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
u64 iidx = draw.first_index_index;
std::pair<int, int> ds;
ds.first = md_idx;
ds.second = 1;
counts_out[md_idx] = draw.num_indices;
index_offsets_out[md_idx] = (void*)(iidx * sizeof(u32));
md_idx++;
draw_ptrs_out[i] = ds;
}
}
u32 make_all_visible_multidraws(std::pair<int, int>* draw_ptrs_out,
GLsizei* counts_out,
void** index_offsets_out,
const std::vector<tfrag3::StripDraw>& draws) {
u64 md_idx = 0;
u32 num_tris = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
u64 iidx = draw.unpacked.idx_of_first_idx_in_full_buffer;
std::pair<int, int> ds;
ds.first = md_idx;
ds.second = 1;
int num_inds = 0;
for (auto& grp : draw.vis_groups) {
num_tris += grp.num_tris;
num_inds += grp.num_inds;
}
counts_out[md_idx] = num_inds;
index_offsets_out[md_idx] = (void*)(iidx * sizeof(u32));
draw_ptrs_out[i] = ds;
md_idx++;
}
return num_tris;
}
u32 make_all_visible_index_list(std::pair<int, int>* group_out,
u32* idx_out,
const std::vector<tfrag3::ShrubDraw>& draws,
const u32* idx_in) {
int idx_buffer_ptr = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
std::pair<int, int> ds;
ds.first = idx_buffer_ptr;
memcpy(&idx_out[idx_buffer_ptr], idx_in + draw.first_index_index,
draw.num_indices * sizeof(u32));
idx_buffer_ptr += draw.num_indices;
ds.second = idx_buffer_ptr - ds.first;
group_out[i] = ds;
}
return idx_buffer_ptr;
}
u32 make_multidraws_from_vis_string(std::pair<int, int>* draw_ptrs_out,
GLsizei* counts_out,
void** index_offsets_out,
const std::vector<tfrag3::StripDraw>& draws,
const std::vector<u8>& vis_data) {
u64 md_idx = 0;
u32 num_tris = 0;
u32 sanity_check = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
u64 iidx = draw.unpacked.idx_of_first_idx_in_full_buffer;
ASSERT(sanity_check == iidx);
std::pair<int, int> ds;
ds.first = md_idx;
ds.second = 0;
bool building_run = false;
u64 run_start = 0;
for (auto& grp : draw.vis_groups) {
sanity_check += grp.num_inds;
bool vis = grp.vis_idx_in_pc_bvh == UINT16_MAX || vis_data[grp.vis_idx_in_pc_bvh];
if (vis) {
num_tris += grp.num_tris;
}
if (building_run) {
if (!vis) {
building_run = false;
counts_out[md_idx] = iidx - run_start;
index_offsets_out[md_idx] = (void*)(run_start * sizeof(u32));
ds.second++;
md_idx++;
}
} else {
if (vis) {
building_run = true;
run_start = iidx;
}
}
iidx += grp.num_inds;
}
if (building_run) {
building_run = false;
counts_out[md_idx] = iidx - run_start;
index_offsets_out[md_idx] = (void*)(run_start * sizeof(u32));
ds.second++;
md_idx++;
}
draw_ptrs_out[i] = ds;
}
return num_tris;
}
u32 make_multidraws_from_vis_and_proto_string(std::pair<int, int>* draw_ptrs_out,
GLsizei* counts_out,
void** index_offsets_out,
const std::vector<tfrag3::StripDraw>& draws,
const std::vector<u8>& vis_data,
const std::vector<u8>& proto_vis_data) {
u64 md_idx = 0;
u32 num_tris = 0;
u32 sanity_check = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
u64 iidx = draw.unpacked.idx_of_first_idx_in_full_buffer;
ASSERT(sanity_check == iidx);
std::pair<int, int> ds;
ds.first = md_idx;
ds.second = 0;
bool building_run = false;
u64 run_start = 0;
for (auto& grp : draw.vis_groups) {
sanity_check += grp.num_inds;
bool vis = (grp.vis_idx_in_pc_bvh == UINT16_MAX || vis_data[grp.vis_idx_in_pc_bvh]) &&
proto_vis_data[grp.tie_proto_idx];
if (vis) {
num_tris += grp.num_tris;
}
if (building_run) {
if (!vis) {
building_run = false;
counts_out[md_idx] = iidx - run_start;
index_offsets_out[md_idx] = (void*)(run_start * sizeof(u32));
ds.second++;
md_idx++;
}
} else {
if (vis) {
building_run = true;
run_start = iidx;
}
}
iidx += grp.num_inds;
}
if (building_run) {
building_run = false;
counts_out[md_idx] = iidx - run_start;
index_offsets_out[md_idx] = (void*)(run_start * sizeof(u32));
ds.second++;
md_idx++;
}
draw_ptrs_out[i] = ds;
}
return num_tris;
}
u32 make_index_list_from_vis_string(std::pair<int, int>* group_out,
u32* idx_out,
const std::vector<tfrag3::StripDraw>& draws,
const std::vector<u8>& vis_data,
const u32* idx_in,
u32* num_tris_out) {
int idx_buffer_ptr = 0;
u32 num_tris = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
int vtx_idx = 0;
std::pair<int, int> ds;
ds.first = idx_buffer_ptr;
bool building_run = false;
int run_start_out = 0;
int run_start_in = 0;
for (auto& grp : draw.vis_groups) {
bool vis = grp.vis_idx_in_pc_bvh == UINT16_MAX || vis_data[grp.vis_idx_in_pc_bvh];
if (vis) {
num_tris += grp.num_tris;
}
if (building_run) {
if (vis) {
idx_buffer_ptr += grp.num_inds;
} else {
building_run = false;
memcpy(&idx_out[run_start_out],
idx_in + draw.unpacked.idx_of_first_idx_in_full_buffer + run_start_in,
(idx_buffer_ptr - run_start_out) * sizeof(u32));
}
} else {
if (vis) {
building_run = true;
run_start_out = idx_buffer_ptr;
run_start_in = vtx_idx;
idx_buffer_ptr += grp.num_inds;
}
}
vtx_idx += grp.num_inds;
}
if (building_run) {
memcpy(&idx_out[run_start_out],
idx_in + draw.unpacked.idx_of_first_idx_in_full_buffer + run_start_in,
(idx_buffer_ptr - run_start_out) * sizeof(u32));
}
ds.second = idx_buffer_ptr - ds.first;
group_out[i] = ds;
}
*num_tris_out = num_tris;
return idx_buffer_ptr;
}
u32 make_index_list_from_vis_and_proto_string(std::pair<int, int>* group_out,
u32* idx_out,
const std::vector<tfrag3::StripDraw>& draws,
const std::vector<u8>& vis_data,
const std::vector<u8>& proto_vis_data,
const u32* idx_in,
u32* num_tris_out) {
int idx_buffer_ptr = 0;
u32 num_tris = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
int vtx_idx = 0;
std::pair<int, int> ds;
ds.first = idx_buffer_ptr;
bool building_run = false;
int run_start_out = 0;
int run_start_in = 0;
for (auto& grp : draw.vis_groups) {
bool vis = (grp.vis_idx_in_pc_bvh == UINT16_MAX || vis_data[grp.vis_idx_in_pc_bvh]) &&
proto_vis_data[grp.tie_proto_idx];
if (vis) {
num_tris += grp.num_tris;
}
if (building_run) {
if (vis) {
idx_buffer_ptr += grp.num_inds;
} else {
building_run = false;
memcpy(&idx_out[run_start_out],
idx_in + draw.unpacked.idx_of_first_idx_in_full_buffer + run_start_in,
(idx_buffer_ptr - run_start_out) * sizeof(u32));
}
} else {
if (vis) {
building_run = true;
run_start_out = idx_buffer_ptr;
run_start_in = vtx_idx;
idx_buffer_ptr += grp.num_inds;
}
}
vtx_idx += grp.num_inds;
}
if (building_run) {
memcpy(&idx_out[run_start_out],
idx_in + draw.unpacked.idx_of_first_idx_in_full_buffer + run_start_in,
(idx_buffer_ptr - run_start_out) * sizeof(u32));
}
ds.second = idx_buffer_ptr - ds.first;
group_out[i] = ds;
}
*num_tris_out = num_tris;
return idx_buffer_ptr;
}
u32 make_all_visible_index_list(std::pair<int, int>* group_out,
u32* idx_out,
const std::vector<tfrag3::StripDraw>& draws,
const u32* idx_in,
u32* num_tris_out) {
int idx_buffer_ptr = 0;
u32 num_tris = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
std::pair<int, int> ds;
ds.first = idx_buffer_ptr;
u32 num_inds = 0;
for (auto& grp : draw.vis_groups) {
num_inds += grp.num_inds;
num_tris += grp.num_tris;
}
memcpy(&idx_out[idx_buffer_ptr], idx_in + draw.unpacked.idx_of_first_idx_in_full_buffer,
num_inds * sizeof(u32));
idx_buffer_ptr += num_inds;
ds.second = idx_buffer_ptr - ds.first;
group_out[i] = ds;
}
*num_tris_out = num_tris;
return idx_buffer_ptr;
}
void update_render_state_from_pc_settings(SharedRenderState* state, const TfragPcPortData& data) {
if (!state->has_pc_data) {
for (int i = 0; i < 4; i++) {
state->camera_planes[i] = data.camera.planes[i];
state->camera_matrix[i] = data.camera.camera[i];
}
state->camera_pos = data.camera.trans;
state->camera_hvdf_off = data.camera.hvdf_off;
state->camera_fog = data.camera.fog;
state->has_pc_data = true;
}
}