[graphics] use multidraws in tie/tfrag/shrub (#1269)

This commit is contained in:
water111 2022-04-01 19:35:23 -04:00 committed by GitHub
parent 6f28633bc4
commit f8b00ea358
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 230 additions and 235 deletions

View file

@ -25,21 +25,12 @@ void StripDraw::serialize(Serializer& ser) {
ser.from_ptr(&num_triangles);
}
void StripDraw::unpack() {
ASSERT(unpacked.vertex_index_stream.empty());
for (auto& r : runs) {
for (int i = 0; i < r.length; i++) {
unpacked.vertex_index_stream.push_back(r.vertex0 + i);
}
unpacked.vertex_index_stream.push_back(UINT32_MAX);
}
}
void ShrubDraw::serialize(Serializer& ser) {
ser.from_ptr(&mode);
ser.from_ptr(&tree_tex_id);
ser.from_pod_vector(&vertex_index_stream);
ser.from_ptr(&num_triangles);
ser.from_ptr(&first_index_index);
ser.from_ptr(&num_indices);
}
void InstancedStripDraw::serialize(Serializer& ser) {
@ -109,6 +100,16 @@ void TieTree::unpack() {
}
}
}
for (auto& draw : static_draws) {
draw.unpacked.idx_of_first_idx_in_full_buffer = unpacked.indices.size();
for (auto& run : draw.runs) {
for (u32 ri = 0; ri < run.length; ri++) {
unpacked.indices.push_back(run.vertex0 + ri);
}
unpacked.indices.push_back(UINT32_MAX);
}
}
}
void ShrubTree::unpack() {
@ -154,6 +155,16 @@ void TfragTree::unpack() {
o.q = 1.f;
o.color_index = in.color_index;
}
for (auto& draw : draws) {
draw.unpacked.idx_of_first_idx_in_full_buffer = unpacked.indices.size();
for (auto& run : draw.runs) {
for (u32 ri = 0; ri < run.length; ri++) {
unpacked.indices.push_back(run.vertex0 + ri);
}
unpacked.indices.push_back(UINT32_MAX);
}
}
}
void TieTree::serialize(Serializer& ser) {
@ -191,6 +202,7 @@ void TieTree::serialize(Serializer& ser) {
void ShrubTree::serialize(Serializer& ser) {
ser.from_pod_vector(&time_of_day_colors);
ser.from_pod_vector(&indices);
packed_vertices.serialize(ser);
if (ser.is_saving()) {
ser.save<size_t>(static_draws.size());
@ -338,10 +350,7 @@ std::array<int, MemoryUsageCategory::NUM_CATEGORIES> Level::get_memory_usage() c
shrub_tree.packed_vertices.vertices.size() * sizeof(PackedShrubVertices::Vertex);
result[SHRUB_VERT] += shrub_tree.packed_vertices.instance_groups.size() *
sizeof(PackedShrubVertices::InstanceGroup);
for (const auto& draw : shrub_tree.static_draws) {
result[SHRUB_IND] += sizeof(u32) * draw.vertex_index_stream.size();
}
result[SHRUB_IND] += sizeof(u32) * shrub_tree.indices.size();
}
return result;

View file

@ -47,7 +47,7 @@ enum MemoryUsageCategory {
NUM_CATEGORIES
};
constexpr int TFRAG3_VERSION = 13;
constexpr int TFRAG3_VERSION = 14;
// These vertices should be uploaded to the GPU at load time and don't change
struct PreloadedVertex {
@ -135,13 +135,9 @@ struct StripDraw {
u32 tree_tex_id = 0; // the texture that should be bound for the draw
struct {
// the list of vertices in the draw. This includes the restart code of UINT32_MAX that OpenGL
// will use to start a new strip.
std::vector<u32> vertex_index_stream;
u32 idx_of_first_idx_in_full_buffer = 0;
} unpacked;
void unpack();
struct VertexRun {
u32 vertex0;
u16 length;
@ -152,7 +148,8 @@ struct StripDraw {
// to do culling, the above vertex stream is grouped.
// by following the visgroups and checking the visibility, you can leave out invisible vertices.
struct VisGroup {
u32 num = 0; // number of vertex indices in this group
u32 num_inds = 0; // number of vertex indices in this group
u32 num_tris = 0; // number of triangles
u32 vis_idx_in_pc_bvh = 0; // the visibility group they belong to (in BVH)
};
std::vector<VisGroup> vis_groups;
@ -166,9 +163,8 @@ struct ShrubDraw {
DrawMode mode; // the OpenGL draw settings.
u32 tree_tex_id = 0; // the texture that should be bound for the draw
// the list of vertices in the draw. This includes the restart code of UINT32_MAX that OpenGL
// will use to start a new strip.
std::vector<u32> vertex_index_stream;
u32 first_index_index;
u32 num_indices;
// for debug counting.
u32 num_triangles = 0;
@ -261,6 +257,7 @@ struct TfragTree {
struct {
std::vector<PreloadedVertex> vertices; // mesh vertices
std::vector<u32> indices;
} unpacked;
void unpack();
void serialize(Serializer& ser);
@ -286,6 +283,7 @@ struct TieTree {
struct {
std::vector<PreloadedVertex> vertices; // mesh vertices
std::vector<u32> indices;
} unpacked;
void serialize(Serializer& ser);
@ -298,6 +296,7 @@ struct ShrubTree {
PackedShrubVertices packed_vertices;
std::vector<ShrubDraw> static_draws; // the actual topology and settings
std::vector<u32> indices;
struct {
std::vector<ShrubGpuVertex> vertices; // mesh vertices

View file

@ -443,6 +443,7 @@ void make_draws(tfrag3::Level& lev,
tfrag3::ShrubTree& tree_out,
const std::vector<ShrubProtoInfo>& protos,
const TextureDB& tdb) {
std::vector<std::vector<u32>> indices_regrouped_by_draw;
std::unordered_map<u32, std::vector<u32>> static_draws_by_tex;
size_t global_vert_counter = 0;
for (auto& proto : protos) {
@ -528,10 +529,12 @@ void make_draws(tfrag3::Level& lev,
// okay, we now have a texture and draw mode, let's see if we can add to an existing...
auto existing_draws_in_tex = static_draws_by_tex.find(idx_in_lev_data);
tfrag3::ShrubDraw* draw_to_add_to = nullptr;
std::vector<u32>* verts_to_add_to = nullptr;
if (existing_draws_in_tex != static_draws_by_tex.end()) {
for (auto idx : existing_draws_in_tex->second) {
if (tree_out.static_draws.at(idx).mode == mode) {
draw_to_add_to = &tree_out.static_draws[idx];
verts_to_add_to = &indices_regrouped_by_draw[idx];
}
}
}
@ -543,6 +546,7 @@ void make_draws(tfrag3::Level& lev,
draw_to_add_to = &tree_out.static_draws.back();
draw_to_add_to->mode = mode;
draw_to_add_to->tree_tex_id = idx_in_lev_data;
verts_to_add_to = &indices_regrouped_by_draw.emplace_back();
}
// now we have a draw, time to add vertices
@ -556,25 +560,30 @@ void make_draws(tfrag3::Level& lev,
for (size_t vidx = 0; vidx < draw.vertices.size(); vidx++) {
if (draw.vertices[vidx].adc) {
draw_to_add_to->vertex_index_stream.push_back(vidx + global_vert_counter);
verts_to_add_to->push_back(vidx + global_vert_counter);
draw_to_add_to->num_triangles++;
} else {
draw_to_add_to->vertex_index_stream.push_back(UINT32_MAX);
draw_to_add_to->vertex_index_stream.push_back(vidx + global_vert_counter - 1);
draw_to_add_to->vertex_index_stream.push_back(vidx + global_vert_counter);
verts_to_add_to->push_back(UINT32_MAX);
verts_to_add_to->push_back(vidx + global_vert_counter - 1);
verts_to_add_to->push_back(vidx + global_vert_counter);
}
}
draw_to_add_to->vertex_index_stream.push_back(UINT32_MAX);
verts_to_add_to->push_back(UINT32_MAX);
global_vert_counter += draw.vertices.size();
}
}
}
}
for (auto& draw : tree_out.static_draws) {
draw.num_triangles = clean_up_vertex_indices(draw.vertex_index_stream);
for (size_t didx = 0; didx < tree_out.static_draws.size(); didx++) {
auto& draw = tree_out.static_draws[didx];
auto& inds = indices_regrouped_by_draw[didx];
draw.num_triangles = clean_up_vertex_indices(inds);
draw.num_indices = inds.size();
draw.first_index_index = tree_out.indices.size();
tree_out.indices.insert(tree_out.indices.end(), inds.begin(), inds.end());
}
tree_out.packed_vertices.total_vertex_count = global_vert_counter;
}

View file

@ -2043,7 +2043,8 @@ void make_tfrag3_data(std::map<u32, std::vector<GroupedDraw>>& draws,
for (auto& strip : draw.strips) {
tfrag3::StripDraw::VisGroup vgroup;
vgroup.vis_idx_in_pc_bvh = strip.tfrag_id; // associate with the tfrag for culling
vgroup.num = strip.verts.size() + 1; // one for the primitive restart!
vgroup.num_inds = strip.verts.size() + 1; // one for the primitive restart!
vgroup.num_tris = strip.verts.size() - 2;
tdraw.num_triangles += strip.verts.size() - 2;
tfrag3::StripDraw::VertexRun run;
@ -2127,7 +2128,8 @@ void merge_groups(std::vector<tfrag3::StripDraw::VisGroup>& grps) {
result.push_back(grps.at(0));
for (size_t i = 1; i < grps.size(); i++) {
if (grps[i].vis_idx_in_pc_bvh == result.back().vis_idx_in_pc_bvh) {
result.back().num += grps[i].num;
result.back().num_inds += grps[i].num_inds;
result.back().num_tris += grps[i].num_tris;
} else {
result.push_back(grps[i]);
}

View file

@ -2211,8 +2211,9 @@ void add_vertices_and_static_draw(tfrag3::TieTree& tree,
// now we have a draw, time to add vertices
tfrag3::StripDraw::VisGroup vgroup;
vgroup.vis_idx_in_pc_bvh = inst.vis_id; // associate with the instance for culling
vgroup.num = strip.verts.size() + 1; // one for the primitive restart!
vgroup.vis_idx_in_pc_bvh = inst.vis_id; // associate with the instance for culling
vgroup.num_inds = strip.verts.size() + 1; // one for the primitive restart!
vgroup.num_tris = strip.verts.size() - 2;
draw_to_add_to->num_triangles += strip.verts.size() - 2;
tfrag3::PackedTieVertices::MatrixGroup grp;
grp.matrix_idx = matrix_idx;
@ -2275,7 +2276,8 @@ void merge_groups(std::vector<tfrag3::StripDraw::VisGroup>& grps) {
result.push_back(grps.at(0));
for (size_t i = 1; i < grps.size(); i++) {
if (grps[i].vis_idx_in_pc_bvh == result.back().vis_idx_in_pc_bvh) {
result.back().num += grps[i].num;
result.back().num_tris += grps[i].num_tris;
result.back().num_inds += grps[i].num_inds;
} else {
result.push_back(grps[i]);
}

View file

@ -118,17 +118,11 @@ void Loader::loader_thread() {
for (auto& tie_tree : result->tie_trees) {
for (auto& tree : tie_tree) {
tree.unpack();
for (auto& d : tree.static_draws) {
d.unpack();
}
}
}
for (auto& t_tree : result->tfrag_trees) {
for (auto& tree : t_tree) {
tree.unpack();
for (auto& d : tree.draws) {
d.unpack();
}
}
}

View file

@ -69,13 +69,20 @@ void Shrub::update_load(const Loader::LevelData* loader_data) {
size_t max_draws = 0;
size_t time_of_day_count = 0;
size_t max_num_grps = 0;
for (u32 l_tree = 0; l_tree < lev_data->shrub_trees.size(); l_tree++) {
size_t idx_buffer_len = 0;
size_t num_grps = 0;
const auto& tree = lev_data->shrub_trees[l_tree];
max_draws = std::max(tree.static_draws.size(), max_draws);
for (auto& draw : tree.static_draws) {
idx_buffer_len += draw.vertex_index_stream.size();
(void)draw;
// num_grps += draw.vis_groups.size(); TODO
max_num_grps += 1;
}
max_num_grps = std::max(max_num_grps, num_grps);
time_of_day_count = std::max(tree.time_of_day_colors.size(), time_of_day_count);
u32 verts = tree.unpacked.vertices.size();
glGenVertexArrays(1, &m_trees[l_tree].vao);
@ -124,8 +131,8 @@ void Shrub::update_load(const Loader::LevelData* loader_data) {
glGenBuffers(1, &m_trees[l_tree].index_buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_trees[l_tree].index_buffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_len * sizeof(u32), nullptr, GL_STREAM_DRAW);
m_trees[l_tree].index_list.resize(idx_buffer_len);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, tree.indices.size() * sizeof(u32), tree.indices.data(),
GL_STATIC_DRAW);
glActiveTexture(GL_TEXTURE10);
glGenTextures(1, &m_trees[l_tree].time_of_day_texture);
@ -138,7 +145,9 @@ void Shrub::update_load(const Loader::LevelData* loader_data) {
glBindVertexArray(0);
}
m_cache.draw_idx_temp.resize(max_draws);
m_cache.multidraw_offset_per_stripdraw.resize(max_draws);
m_cache.multidraw_count_buffer.resize(max_num_grps);
m_cache.multidraw_index_offset_buffer.resize(max_num_grps);
ASSERT(time_of_day_count <= TIME_OF_DAY_COLOR_COUNT);
}
@ -198,7 +207,6 @@ void Shrub::render_tree(int idx,
auto& tree = m_trees.at(idx);
tree.perf.draws = 0;
tree.perf.verts = 0;
tree.perf.full_draws = 0;
tree.perf.wind_draws = 0;
if (!m_has_level) {
return;
@ -229,24 +237,22 @@ void Shrub::render_tree(int idx,
tree.perf.tod_time.add(setup_timer.getSeconds());
int last_texture = -1;
u32 idx_buffer_ptr = 0;
tree.perf.cull_time.add(0);
Timer index_timer;
idx_buffer_ptr = make_all_visible_index_list(m_cache.draw_idx_temp.data(), tree.index_list.data(),
*tree.draws);
make_all_visible_multidraws(m_cache.multidraw_offset_per_stripdraw.data(),
m_cache.multidraw_count_buffer.data(),
m_cache.multidraw_index_offset_buffer.data(), *tree.draws);
tree.perf.index_time.add(index_timer.getSeconds());
tree.perf.index_upload = sizeof(u32) * idx_buffer_ptr;
Timer draw_timer;
glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_ptr * sizeof(u32), tree.index_list.data(),
GL_STREAM_DRAW);
for (size_t draw_idx = 0; draw_idx < tree.draws->size(); draw_idx++) {
const auto& draw = tree.draws->operator[](draw_idx);
const auto& indices = m_cache.draw_idx_temp[draw_idx];
const auto& indices = m_cache.multidraw_offset_per_stripdraw[draw_idx];
if (indices.second <= indices.first) {
if (indices.second == 0) {
continue;
}
@ -257,20 +263,16 @@ void Shrub::render_tree(int idx,
auto double_draw = setup_tfrag_shader(render_state, draw.mode, ShaderId::SHRUB);
int draw_size = indices.second - indices.first;
void* offset = (void*)(indices.first * sizeof(u32));
prof.add_draw_call();
prof.add_tri(draw.num_triangles * (float)draw_size / draw.vertex_index_stream.size());
bool is_full = draw_size == (int)draw.vertex_index_stream.size();
prof.add_tri(draw.num_triangles);
tree.perf.draws++;
if (is_full) {
tree.perf.full_draws++;
}
tree.perf.verts += draw_size;
glDrawElements(GL_TRIANGLE_STRIP, draw_size, GL_UNSIGNED_INT, (void*)offset);
glMultiDrawElements(GL_TRIANGLE_STRIP, &m_cache.multidraw_count_buffer[indices.first],
GL_UNSIGNED_INT, &m_cache.multidraw_index_offset_buffer[indices.first],
indices.second);
switch (double_draw.kind) {
case DoubleDrawKind::NONE:
@ -278,9 +280,6 @@ void Shrub::render_tree(int idx,
case DoubleDrawKind::AFAIL_NO_DEPTH_WRITE:
tree.perf.draws++;
tree.perf.verts += draw_size;
if (is_full) {
tree.perf.full_draws++;
}
prof.add_draw_call();
prof.add_tri(draw_size);
glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::SHRUB].id(), "alpha_min"),
@ -288,7 +287,9 @@ void Shrub::render_tree(int idx,
glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::SHRUB].id(), "alpha_max"),
double_draw.aref_second);
glDepthMask(GL_FALSE);
glDrawElements(GL_TRIANGLE_STRIP, draw_size, GL_UNSIGNED_INT, (void*)offset);
glMultiDrawElements(GL_TRIANGLE_STRIP, &m_cache.multidraw_count_buffer[indices.first],
GL_UNSIGNED_INT, &m_cache.multidraw_index_offset_buffer[indices.first],
indices.second);
break;
default:
ASSERT(false);

View file

@ -31,7 +31,6 @@ class Shrub : public BucketRenderer {
GLuint vertex_buffer;
GLuint index_buffer;
GLuint time_of_day_texture;
std::vector<u32> index_list;
GLuint vao;
u32 vert_count;
const std::vector<tfrag3::ShrubDraw>* draws = nullptr;
@ -39,17 +38,9 @@ class Shrub : public BucketRenderer {
const std::vector<tfrag3::TimeOfDayColor>* colors = nullptr;
SwizzledTimeOfDay tod_cache;
std::vector<std::array<math::Vector4f, 4>> wind_matrix_cache;
bool has_wind = false;
GLuint wind_vertex_index_buffer;
std::vector<u32> wind_vertex_index_offsets;
struct {
u32 index_upload = 0;
u32 verts = 0;
u32 draws = 0;
u32 full_draws = 0; // ones that have all visible
u32 wind_draws = 0;
Filtered<float> cull_time;
Filtered<float> index_time;
@ -71,7 +62,9 @@ class Shrub : public BucketRenderer {
bool m_has_level = false;
struct Cache {
std::vector<std::pair<int, int>> draw_idx_temp;
std::vector<std::pair<int, int>> multidraw_offset_per_stripdraw;
std::vector<GLsizei> multidraw_count_buffer;
std::vector<void*> multidraw_index_offset_buffer;
} m_cache;
TfragPcPortData m_pc_port_data;
};

View file

@ -38,8 +38,6 @@ constexpr const char* level_names[] = {"bea", "cit", "dar", "fin", "int", "jub",
void TFragment::render(DmaFollower& dma,
SharedRenderState* render_state,
ScopedProfilerNode& prof) {
m_debug_string.clear();
if (!m_enabled) {
while (dma.current_tag_offset() != render_state->next_bucket) {
dma.read_and_advance();
@ -148,13 +146,9 @@ void TFragment::render(DmaFollower& dma,
m_tfrag3.render_matching_trees(m_tfrag3.lod(), m_tree_kinds, settings, render_state, t3prof);
}
m_debug_string += fmt::format("fail: {}\n", dma.current_tag().print());
while (dma.current_tag_offset() != render_state->next_bucket) {
auto tag = dma.current_tag().print();
auto data = dma.read_and_advance();
m_debug_string +=
fmt::format("DMA {} {} bytes, {}\n", tag, data.size_bytes, data.vifcode0().print());
dma.read_and_advance();
}
if (m_hack_test_many_levels) {
@ -211,8 +205,6 @@ void TFragment::draw_debug_window() {
}
m_tfrag3.draw_debug_window();
ImGui::TextUnformatted(m_debug_string.data());
}
void TFragment::handle_initialization(DmaFollower& dma) {
@ -238,7 +230,6 @@ void TFragment::handle_initialization(DmaFollower& dma) {
auto data_upload = dma.read_and_advance();
unpack_to_stcycl(&m_tfrag_data, data_upload, VifCode::Kind::UNPACK_V4_32, 4, 4, sizeof(TFragData),
TFragDataMem::TFragFrameData, false, false);
m_debug_string += fmt::format("Frame Data:\n {}\n", m_tfrag_data.print());
// call the setup program
auto mscal_setup = dma.read_and_advance();
@ -249,19 +240,6 @@ void TFragment::handle_initialization(DmaFollower& dma) {
memcpy(&m_pc_port_data, pc_port_data.data, sizeof(TfragPcPortData));
m_pc_port_data.level_name[11] = '\0';
for (int i = 0; i < 4; i++) {
m_debug_string += fmt::format("p[{}]: {}\n", i, m_pc_port_data.planes[i].to_string_aligned());
}
for (int i = 0; i < 4; i++) {
m_debug_string += fmt::format("t[{}]: {:x} {:x} {:x} {:x}\n", i, m_pc_port_data.itimes[i].x(),
m_pc_port_data.itimes[i].y(), m_pc_port_data.itimes[i].z(),
m_pc_port_data.itimes[i].w());
}
m_debug_string +=
fmt::format("level: {}, tree: {}\n", m_pc_port_data.level_name, m_pc_port_data.tree_idx);
// setup double buffering.
auto db_setup = dma.read_and_advance();
ASSERT(db_setup.size_bytes == 0);

View file

@ -46,7 +46,6 @@ class TFragment : public BucketRenderer {
private:
void handle_initialization(DmaFollower& dma);
std::string m_debug_string;
bool m_child_mode = false;
bool m_hack_test_many_levels = false;
bool m_override_time_of_day = false;

View file

@ -48,22 +48,24 @@ void Tfrag3::update_load(const std::vector<tfrag3::TFragmentTreeKind>& tree_kind
size_t time_of_day_count = 0;
size_t vis_temp_len = 0;
size_t max_draw = 0;
size_t max_draws = 0;
size_t max_num_grps = 0;
for (int geom = 0; geom < GEOM_MAX; ++geom) {
for (size_t tree_idx = 0; tree_idx < lev_data->tfrag_trees[geom].size(); tree_idx++) {
size_t idx_buffer_len = 0;
const auto& tree = lev_data->tfrag_trees[geom][tree_idx];
auto& tree_cache = m_cached_trees[geom].emplace_back();
tree_cache.kind = tree.kind;
if (std::find(tree_kinds.begin(), tree_kinds.end(), tree.kind) != tree_kinds.end()) {
max_draw = std::max(tree.draws.size(), max_draw);
max_draws = std::max(tree.draws.size(), max_draws);
size_t num_grps = 0;
for (auto& draw : tree.draws) {
idx_buffer_len += draw.unpacked.vertex_index_stream.size();
num_grps += draw.vis_groups.size();
}
max_num_grps = std::max(max_num_grps, num_grps);
time_of_day_count = std::max(tree.colors.size(), time_of_day_count);
u32 verts = tree.packed_vertices.vertices.size();
glGenVertexArrays(1, &tree_cache.vao);
@ -109,9 +111,8 @@ void Tfrag3::update_load(const std::vector<tfrag3::TFragmentTreeKind>& tree_kind
glGenBuffers(1, &tree_cache.index_buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, tree_cache.index_buffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_len * sizeof(u32), nullptr,
GL_STREAM_DRAW);
tree_cache.index_list.resize(idx_buffer_len);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, tree.unpacked.indices.size() * sizeof(u32),
tree.unpacked.indices.data(), GL_STREAM_DRAW);
glGenTextures(1, &tree_cache.time_of_day_texture);
glBindTexture(GL_TEXTURE_1D, tree_cache.time_of_day_texture);
@ -125,7 +126,9 @@ void Tfrag3::update_load(const std::vector<tfrag3::TFragmentTreeKind>& tree_kind
}
m_cache.vis_temp.resize(vis_temp_len);
m_cache.draw_idx_temp.resize(max_draw);
m_cache.multidraw_offset_per_stripdraw.resize(max_draws);
m_cache.multidraw_count_buffer.resize(max_num_grps);
m_cache.multidraw_index_offset_buffer.resize(max_num_grps);
ASSERT(time_of_day_count <= TIME_OF_DAY_COLOR_COUNT);
}
@ -196,17 +199,17 @@ void Tfrag3::render_tree(int geom,
cull_check_all_slow(settings.planes, tree.vis->vis_nodes, settings.occlusion_culling,
m_cache.vis_temp.data());
int idx_buffer_ptr = make_index_list_from_vis_string(
m_cache.draw_idx_temp.data(), tree.index_list.data(), *tree.draws, m_cache.vis_temp);
u32 total_tris = make_multidraws_from_vis_string(
m_cache.multidraw_offset_per_stripdraw.data(), m_cache.multidraw_count_buffer.data(),
m_cache.multidraw_index_offset_buffer.data(), *tree.draws, m_cache.vis_temp);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_ptr * sizeof(u32), tree.index_list.data(),
GL_STREAM_DRAW);
prof.add_tri(total_tris);
for (size_t draw_idx = 0; draw_idx < tree.draws->size(); draw_idx++) {
const auto& draw = tree.draws->operator[](draw_idx);
const auto& indices = m_cache.draw_idx_temp[draw_idx];
const auto& indices = m_cache.multidraw_offset_per_stripdraw[draw_idx];
if (indices.second <= indices.first) {
if (indices.second == 0) {
continue;
}
@ -215,13 +218,16 @@ void Tfrag3::render_tree(int geom,
auto double_draw = setup_tfrag_shader(render_state, draw.mode, ShaderId::TFRAG3);
tree.tris_this_frame += draw.num_triangles;
tree.draws_this_frame++;
int draw_size = indices.second - indices.first;
void* offset = (void*)(indices.first * sizeof(u32));
int draw_size = 0;
for (int i = 0; i < indices.second; i++) {
draw_size += m_cache.multidraw_count_buffer[indices.first + i];
}
prof.add_draw_call();
prof.add_tri(draw.num_triangles * (float)draw_size / draw.unpacked.vertex_index_stream.size());
glDrawElements(GL_TRIANGLE_STRIP, draw_size, GL_UNSIGNED_INT, (void*)offset);
glMultiDrawElements(GL_TRIANGLE_STRIP, &m_cache.multidraw_count_buffer[indices.first],
GL_UNSIGNED_INT, &m_cache.multidraw_index_offset_buffer[indices.first],
indices.second);
switch (double_draw.kind) {
case DoubleDrawKind::NONE:
@ -234,7 +240,9 @@ void Tfrag3::render_tree(int geom,
glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "alpha_max"),
double_draw.aref_second);
glDepthMask(GL_FALSE);
glDrawElements(GL_TRIANGLE_STRIP, draw_size, GL_UNSIGNED_INT, (void*)offset);
glMultiDrawElements(GL_TRIANGLE_STRIP, &m_cache.multidraw_count_buffer[indices.first],
GL_UNSIGNED_INT, &m_cache.multidraw_index_offset_buffer[indices.first],
indices.second);
break;
default:
ASSERT(false);

View file

@ -60,7 +60,6 @@ class Tfrag3 {
tfrag3::TFragmentTreeKind kind;
GLuint vertex_buffer = -1;
GLuint index_buffer = -1;
std::vector<u32> index_list;
GLuint time_of_day_texture;
GLuint vao;
u32 vert_count = 0;
@ -84,7 +83,9 @@ class Tfrag3 {
struct Cache {
std::vector<u8> vis_temp;
std::vector<std::pair<int, int>> draw_idx_temp;
std::vector<std::pair<int, int>> multidraw_offset_per_stripdraw;
std::vector<GLsizei> multidraw_count_buffer;
std::vector<void*> multidraw_index_offset_buffer;
} m_cache;
std::string m_level_name;

View file

@ -25,17 +25,19 @@ void Tie3::update_load(const Loader::LevelData* loader_data) {
size_t vis_temp_len = 0;
size_t max_draws = 0;
size_t max_num_grps = 0;
u16 max_wind_idx = 0;
size_t time_of_day_count = 0;
for (u32 l_geo = 0; l_geo < tfrag3::TIE_GEOS; l_geo++) {
for (u32 l_tree = 0; l_tree < lev_data->tie_trees[l_geo].size(); l_tree++) {
size_t idx_buffer_len = 0;
size_t wind_idx_buffer_len = 0;
size_t num_grps = 0;
const auto& tree = lev_data->tie_trees[l_geo][l_tree];
max_draws = std::max(tree.static_draws.size(), max_draws);
for (auto& draw : tree.static_draws) {
idx_buffer_len += draw.unpacked.vertex_index_stream.size();
num_grps += draw.vis_groups.size();
}
max_num_grps = std::max(max_num_grps, num_grps);
for (auto& draw : tree.instanced_wind_draws) {
wind_idx_buffer_len += draw.vertex_index_stream.size();
}
@ -86,8 +88,9 @@ void Tie3::update_load(const Loader::LevelData* loader_data) {
glGenBuffers(1, &lod_tree[l_tree].index_buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, lod_tree[l_tree].index_buffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_len * sizeof(u32), nullptr, GL_STREAM_DRAW);
lod_tree[l_tree].index_list.resize(idx_buffer_len);
// todo: move to loader, this will probably be quite slow.
glBufferData(GL_ELEMENT_ARRAY_BUFFER, tree.unpacked.indices.size() * sizeof(u32),
tree.unpacked.indices.data(), GL_STATIC_DRAW);
if (wind_idx_buffer_len > 0) {
lod_tree[l_tree].wind_matrix_cache.resize(tree.wind_instance_info.size());
@ -114,7 +117,9 @@ void Tie3::update_load(const Loader::LevelData* loader_data) {
}
m_cache.vis_temp.resize(vis_temp_len);
m_cache.draw_idx_temp.resize(max_draws);
m_cache.multidraw_offset_per_stripdraw.resize(max_draws);
m_cache.multidraw_count_buffer.resize(max_num_grps);
m_cache.multidraw_index_offset_buffer.resize(max_num_grps);
m_wind_vectors.resize(4 * max_wind_idx + 4); // 4x u32's per wind.
ASSERT(time_of_day_count <= TIME_OF_DAY_COLOR_COUNT);
}
@ -501,7 +506,6 @@ void Tie3::render_tree(int idx,
auto& tree = m_trees.at(geom).at(idx);
tree.perf.draws = 0;
tree.perf.verts = 0;
tree.perf.full_draws = 0;
tree.perf.wind_draws = 0;
if (!m_has_level) {
return;
@ -536,15 +540,15 @@ void Tie3::render_tree(int idx,
tree.perf.tod_time.add(setup_timer.getSeconds());
int last_texture = -1;
u32 idx_buffer_ptr = 0;
u32 num_tris;
if (m_debug_all_visible) {
tree.perf.cull_time.add(0);
Timer index_timer;
idx_buffer_ptr = make_all_visible_index_list(m_cache.draw_idx_temp.data(),
tree.index_list.data(), *tree.draws);
num_tris = make_all_visible_multidraws(
m_cache.multidraw_offset_per_stripdraw.data(), m_cache.multidraw_count_buffer.data(),
m_cache.multidraw_index_offset_buffer.data(), *tree.draws);
tree.perf.index_time.add(index_timer.getSeconds());
tree.perf.index_upload = sizeof(u32) * idx_buffer_ptr;
} else {
Timer cull_timer;
cull_check_all_slow(settings.planes, tree.vis->vis_nodes, settings.occlusion_culling,
@ -552,21 +556,20 @@ void Tie3::render_tree(int idx,
tree.perf.cull_time.add(cull_timer.getSeconds());
Timer index_timer;
idx_buffer_ptr = make_index_list_from_vis_string(
m_cache.draw_idx_temp.data(), tree.index_list.data(), *tree.draws, m_cache.vis_temp);
num_tris = make_multidraws_from_vis_string(
m_cache.multidraw_offset_per_stripdraw.data(), m_cache.multidraw_count_buffer.data(),
m_cache.multidraw_index_offset_buffer.data(), *tree.draws, m_cache.vis_temp);
tree.perf.index_time.add(index_timer.getSeconds());
tree.perf.index_upload = sizeof(u32) * idx_buffer_ptr;
}
Timer draw_timer;
glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_ptr * sizeof(u32), tree.index_list.data(),
GL_STREAM_DRAW);
prof.add_tri(num_tris);
for (size_t draw_idx = 0; draw_idx < tree.draws->size(); draw_idx++) {
const auto& draw = tree.draws->operator[](draw_idx);
const auto& indices = m_cache.draw_idx_temp[draw_idx];
const auto& indices = m_cache.multidraw_offset_per_stripdraw[draw_idx];
if (indices.second <= indices.first) {
if (indices.second == 0) {
continue;
}
@ -576,21 +579,19 @@ void Tie3::render_tree(int idx,
}
auto double_draw = setup_tfrag_shader(render_state, draw.mode, ShaderId::TFRAG3);
int draw_size = indices.second - indices.first;
void* offset = (void*)(indices.first * sizeof(u32));
int draw_size = 0;
for (int i = 0; i < indices.second; i++) {
draw_size += m_cache.multidraw_count_buffer[indices.first + i];
}
prof.add_draw_call();
prof.add_tri(draw.num_triangles * (float)draw_size / draw.unpacked.vertex_index_stream.size());
bool is_full = draw_size == (int)draw.unpacked.vertex_index_stream.size();
tree.perf.draws++;
if (is_full) {
tree.perf.full_draws++;
}
tree.perf.verts += draw_size;
glDrawElements(GL_TRIANGLE_STRIP, draw_size, GL_UNSIGNED_INT, (void*)offset);
glMultiDrawElements(GL_TRIANGLE_STRIP, &m_cache.multidraw_count_buffer[indices.first],
GL_UNSIGNED_INT, &m_cache.multidraw_index_offset_buffer[indices.first],
indices.second);
switch (double_draw.kind) {
case DoubleDrawKind::NONE:
@ -598,9 +599,6 @@ void Tie3::render_tree(int idx,
case DoubleDrawKind::AFAIL_NO_DEPTH_WRITE:
tree.perf.draws++;
tree.perf.verts += draw_size;
if (is_full) {
tree.perf.full_draws++;
}
prof.add_draw_call();
prof.add_tri(draw_size);
glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "alpha_min"),
@ -608,7 +606,9 @@ void Tie3::render_tree(int idx,
glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "alpha_max"),
double_draw.aref_second);
glDepthMask(GL_FALSE);
glDrawElements(GL_TRIANGLE_STRIP, draw_size, GL_UNSIGNED_INT, (void*)offset);
glMultiDrawElements(GL_TRIANGLE_STRIP, &m_cache.multidraw_count_buffer[indices.first],
GL_UNSIGNED_INT, &m_cache.multidraw_index_offset_buffer[indices.first],
indices.second);
break;
default:
ASSERT(false);
@ -628,7 +628,9 @@ void Tie3::render_tree(int idx,
settings.fog.x());
glDisable(GL_BLEND);
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
glDrawElements(GL_TRIANGLE_STRIP, draw_size, GL_UNSIGNED_INT, (void*)offset);
glMultiDrawElements(GL_TRIANGLE_STRIP, &m_cache.multidraw_count_buffer[indices.first],
GL_UNSIGNED_INT, &m_cache.multidraw_index_offset_buffer[indices.first],
indices.second);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
prof.add_draw_call();
prof.add_tri(draw_size);
@ -662,9 +664,8 @@ void Tie3::draw_debug_window() {
for (u32 i = 0; i < m_trees[lod()].size(); i++) {
auto& perf = m_trees[lod()][i].perf;
ImGui::Text("Tree: %d", i);
ImGui::Text("index data bytes: %d", perf.index_upload);
ImGui::Text("time of days: %d", (int)m_trees[lod()][i].colors->size());
ImGui::Text("draw: %d, full: %d, verts: %d", perf.draws, perf.full_draws, perf.verts);
ImGui::Text("draw: %d, verts: %d", perf.draws, perf.verts);
ImGui::Text("wind draw: %d", perf.wind_draws);
ImGui::Text("total: %.2f", perf.tree_time.get());
ImGui::Text("cull: %.2f index: %.2f tod: %.2f setup: %.2f draw: %.2f",

View file

@ -52,7 +52,6 @@ class Tie3 : public BucketRenderer {
GLuint vertex_buffer;
GLuint index_buffer;
GLuint time_of_day_texture;
std::vector<u32> index_list;
GLuint vao;
u32 vert_count;
const std::vector<tfrag3::StripDraw>* draws = nullptr;
@ -69,10 +68,8 @@ class Tie3 : public BucketRenderer {
std::vector<u32> wind_vertex_index_offsets;
struct {
u32 index_upload = 0;
u32 verts = 0;
u32 draws = 0;
u32 full_draws = 0; // ones that have all visible
u32 wind_draws = 0;
Filtered<float> cull_time;
Filtered<float> index_time;
@ -90,7 +87,9 @@ class Tie3 : public BucketRenderer {
struct Cache {
std::vector<u8> vis_temp;
std::vector<std::pair<int, int>> draw_idx_temp;
std::vector<std::pair<int, int>> multidraw_offset_per_stripdraw;
std::vector<GLsizei> multidraw_count_buffer;
std::vector<void*> multidraw_index_offset_buffer;
} m_cache;
std::vector<math::Vector<u8, 4>> m_color_result;

View file

@ -483,82 +483,77 @@ void cull_check_all_slow(const math::Vector4f* planes,
}
}
u32 make_all_visible_index_list(std::pair<int, int>* group_out,
u32* idx_out,
const std::vector<tfrag3::StripDraw>& draws) {
int idx_buffer_ptr = 0;
void make_all_visible_multidraws(std::pair<int, int>* draw_ptrs_out,
GLsizei* counts_out,
void** index_offsets_out,
const std::vector<tfrag3::ShrubDraw>& draws) {
u64 md_idx = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
u64 iidx = draw.first_index_index;
std::pair<int, int> ds;
ds.first = idx_buffer_ptr;
memcpy(&idx_out[idx_buffer_ptr], draw.unpacked.vertex_index_stream.data(),
draw.unpacked.vertex_index_stream.size() * sizeof(u32));
idx_buffer_ptr += draw.unpacked.vertex_index_stream.size();
ds.second = idx_buffer_ptr;
group_out[i] = ds;
ds.first = md_idx;
ds.second = 1;
counts_out[md_idx] = draw.num_indices;
index_offsets_out[md_idx] = (void*)(iidx * sizeof(u32));
md_idx++;
draw_ptrs_out[i] = ds;
}
return idx_buffer_ptr;
}
u32 make_all_visible_index_list(std::pair<int, int>* group_out,
u32* idx_out,
const std::vector<tfrag3::ShrubDraw>& draws) {
int idx_buffer_ptr = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
std::pair<int, int> ds;
ds.first = idx_buffer_ptr;
memcpy(&idx_out[idx_buffer_ptr], draw.vertex_index_stream.data(),
draw.vertex_index_stream.size() * sizeof(u32));
idx_buffer_ptr += draw.vertex_index_stream.size();
ds.second = idx_buffer_ptr;
group_out[i] = ds;
}
return idx_buffer_ptr;
}
u32 make_index_list_from_vis_string(std::pair<int, int>* group_out,
u32* idx_out,
u32 make_multidraws_from_vis_string(std::pair<int, int>* draw_ptrs_out,
GLsizei* counts_out,
void** index_offsets_out,
const std::vector<tfrag3::StripDraw>& draws,
const std::vector<u8>& vis_data) {
int idx_buffer_ptr = 0;
u64 md_idx = 0;
u32 num_tris = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
int vtx_idx = 0;
u64 iidx = draw.unpacked.idx_of_first_idx_in_full_buffer;
std::pair<int, int> ds;
ds.first = idx_buffer_ptr;
bool building_run = false;
int run_start_out = 0;
int run_start_in = 0;
ds.first = md_idx;
ds.second = 0;
for (auto& grp : draw.vis_groups) {
bool vis = grp.vis_idx_in_pc_bvh == 0xffffffff || vis_data[grp.vis_idx_in_pc_bvh];
if (building_run) {
if (vis) {
idx_buffer_ptr += grp.num;
} else {
building_run = false;
idx_buffer_ptr += grp.num;
memcpy(&idx_out[run_start_out], &draw.unpacked.vertex_index_stream[run_start_in],
(idx_buffer_ptr - run_start_out) * sizeof(u32));
}
} else {
if (vis) {
building_run = true;
run_start_out = idx_buffer_ptr;
run_start_in = vtx_idx;
idx_buffer_ptr += grp.num;
} else {
}
if (grp.vis_idx_in_pc_bvh == 0xffffffff || vis_data[grp.vis_idx_in_pc_bvh]) {
// visible!
// let's use a multidraw
counts_out[md_idx] = grp.num_inds;
index_offsets_out[md_idx] = (void*)(iidx * sizeof(u32));
ds.second++;
md_idx++;
num_tris += grp.num_tris;
}
vtx_idx += grp.num;
iidx += grp.num_inds;
}
if (building_run) {
memcpy(&idx_out[run_start_out], &draw.unpacked.vertex_index_stream[run_start_in],
(idx_buffer_ptr - run_start_out) * sizeof(u32));
}
ds.second = idx_buffer_ptr;
group_out[i] = ds;
draw_ptrs_out[i] = ds;
}
return idx_buffer_ptr;
return num_tris;
}
u32 make_all_visible_multidraws(std::pair<int, int>* draw_ptrs_out,
GLsizei* counts_out,
void** index_offsets_out,
const std::vector<tfrag3::StripDraw>& draws) {
u64 md_idx = 0;
u32 num_tris = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
u64 iidx = draw.unpacked.idx_of_first_idx_in_full_buffer;
std::pair<int, int> ds;
ds.first = md_idx;
ds.second = 0;
for (auto& grp : draw.vis_groups) {
// visible!
// let's use a multidraw
counts_out[md_idx] = grp.num_inds;
index_offsets_out[md_idx] = (void*)(iidx * sizeof(u32));
ds.second++;
md_idx++;
num_tris += grp.num_tris;
iidx += grp.num_inds;
}
draw_ptrs_out[i] = ds;
}
return num_tris;
}

View file

@ -60,13 +60,18 @@ struct TfragPcPortData {
u32 tree_idx;
};
u32 make_index_list_from_vis_string(std::pair<int, int>* group_out,
u32* idx_out,
const std::vector<tfrag3::StripDraw>& draws,
const std::vector<u8>& vis_data);
u32 make_all_visible_index_list(std::pair<int, int>* group_out,
u32* idx_out,
void make_all_visible_multidraws(std::pair<int, int>* draw_ptrs_out,
GLsizei* counts_out,
void** index_offsets_out,
const std::vector<tfrag3::ShrubDraw>& draws);
u32 make_all_visible_multidraws(std::pair<int, int>* draw_ptrs_out,
GLsizei* counts_out,
void** index_offsets_out,
const std::vector<tfrag3::StripDraw>& draws);
u32 make_all_visible_index_list(std::pair<int, int>* group_out,
u32* idx_out,
const std::vector<tfrag3::ShrubDraw>& draws);
u32 make_multidraws_from_vis_string(std::pair<int, int>* draw_ptrs_out,
GLsizei* counts_out,
void** index_offsets_out,
const std::vector<tfrag3::StripDraw>& draws,
const std::vector<u8>& vis_data);