mirror of
https://github.com/open-goal/jak-project.git
synced 2024-10-20 11:26:18 -04:00
284fb09997
fixes #1983 First functioning enemy! https://user-images.githubusercontent.com/13153231/210184845-f1d93491-11f5-4a64-b69c-41e535992ebc.mp4
2400 lines
90 KiB
C++
2400 lines
90 KiB
C++
#include "extract_tie.h"
|
|
|
|
#include <array>
|
|
|
|
#include "common/log/log.h"
|
|
#include "common/util/FileUtil.h"
|
|
|
|
#include "decompiler/ObjectFile/LinkedObjectFile.h"
|
|
|
|
// Jak 2 notes:
|
|
// - proto flags are currently ignored, but stored.
|
|
|
|
namespace decompiler {
|
|
|
|
/// <summary>
|
|
/// Get the index of the first draw node in an array. Works for node or tfrag.
|
|
/// </summary>
|
|
/// <param name="array"></param>
|
|
/// <returns></returns>
|
|
u16 get_first_idx(const level_tools::DrawableInlineArray* array) {
|
|
auto as_tie_instances = dynamic_cast<const level_tools::DrawableInlineArrayInstanceTie*>(array);
|
|
auto as_nodes = dynamic_cast<const level_tools::DrawableInlineArrayNode*>(array);
|
|
if (as_tie_instances) {
|
|
return as_tie_instances->instances.at(0).id;
|
|
} else if (as_nodes) {
|
|
return as_nodes->draw_nodes.at(0).id;
|
|
} else {
|
|
ASSERT(false);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Verify node indices follow the patterns we expect. Takes start as the expected first, writes the
|
|
/// end.
|
|
/// </summary>
|
|
/// <param name="array"></param>
|
|
/// <param name="start"></param>
|
|
/// <param name="end"></param>
|
|
/// <returns></returns>
|
|
bool verify_node_indices_from_array(const level_tools::DrawableInlineArray* array,
|
|
u16 start,
|
|
u16* end) {
|
|
auto as_tie_instances = dynamic_cast<const level_tools::DrawableInlineArrayInstanceTie*>(array);
|
|
auto as_nodes = dynamic_cast<const level_tools::DrawableInlineArrayNode*>(array);
|
|
|
|
if (as_tie_instances) {
|
|
for (auto& elt : as_tie_instances->instances) {
|
|
if (elt.id != start) {
|
|
lg::error("bad inst: exp {} got {}", start, elt.id);
|
|
return false;
|
|
}
|
|
start++;
|
|
}
|
|
*end = start;
|
|
return true;
|
|
} else if (as_nodes) {
|
|
for (auto& elt : as_nodes->draw_nodes) {
|
|
if (elt.id != start) {
|
|
lg::error("bad node: exp {} got {}", start, elt.id);
|
|
return false;
|
|
}
|
|
start++;
|
|
}
|
|
*end = start;
|
|
return true;
|
|
} else {
|
|
lg::error("bad node array type: {}", array->my_type());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* Verify all node indices in a tree.
|
|
*/
|
|
bool verify_node_indices(const level_tools::DrawableTreeInstanceTie* tree) {
|
|
u16 start = get_first_idx(tree->arrays.at(0).get());
|
|
for (auto& array : tree->arrays) {
|
|
if (!verify_node_indices_from_array(array.get(), start, &start)) {
|
|
return false;
|
|
}
|
|
start = (start + 31) & ~(31);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/*!
|
|
* Extract the visibility tree.
|
|
* This does not insert nodes for the bottom level.
|
|
*/
|
|
void extract_vis_data(const level_tools::DrawableTreeInstanceTie* tree,
|
|
u16 first_child,
|
|
tfrag3::TieTree& out) {
|
|
out.bvh.first_leaf_node = first_child;
|
|
out.bvh.last_leaf_node = first_child;
|
|
|
|
if (tree->arrays.size() == 0) {
|
|
// shouldn't hit this?
|
|
} else if (tree->arrays.size() == 1) {
|
|
auto array =
|
|
dynamic_cast<const level_tools::DrawableInlineArrayInstanceTie*>(tree->arrays.at(0).get());
|
|
ASSERT(array);
|
|
out.bvh.first_root = array->instances.at(0).id;
|
|
out.bvh.num_roots = array->instances.size();
|
|
out.bvh.only_children = true;
|
|
} else {
|
|
auto array =
|
|
dynamic_cast<const level_tools::DrawableInlineArrayNode*>(tree->arrays.at(0).get());
|
|
ASSERT(array);
|
|
out.bvh.first_root = array->draw_nodes.at(0).id;
|
|
out.bvh.num_roots = array->draw_nodes.size();
|
|
out.bvh.only_children = false;
|
|
}
|
|
|
|
out.bvh.vis_nodes.resize(first_child - out.bvh.first_root);
|
|
|
|
// may run 0 times, if there are only children.
|
|
for (int i = 0; i < ((int)tree->arrays.size()) - 1; i++) {
|
|
bool expecting_leaves = i == ((int)tree->arrays.size()) - 2;
|
|
|
|
auto array =
|
|
dynamic_cast<const level_tools::DrawableInlineArrayNode*>(tree->arrays.at(i).get());
|
|
ASSERT(array);
|
|
u16 idx = first_child;
|
|
for (auto& elt : array->draw_nodes) {
|
|
auto& vis = out.bvh.vis_nodes.at(elt.id - out.bvh.first_root);
|
|
ASSERT(vis.num_kids == 0xff);
|
|
for (int j = 0; j < 4; j++) {
|
|
vis.bsphere[j] = elt.bsphere.data[j];
|
|
}
|
|
vis.num_kids = elt.child_count;
|
|
vis.flags = elt.flags;
|
|
vis.my_id = elt.id;
|
|
ASSERT(vis.flags == expecting_leaves ? 0 : 1);
|
|
ASSERT(vis.num_kids > 0);
|
|
ASSERT(vis.num_kids <= 8);
|
|
ASSERT(elt.children.size() == vis.num_kids);
|
|
if (expecting_leaves) {
|
|
for (int leaf = 0; leaf < (int)vis.num_kids; leaf++) {
|
|
auto l = dynamic_cast<level_tools::InstanceTie*>(elt.children.at(leaf).get());
|
|
ASSERT(l);
|
|
|
|
ASSERT(idx == l->id);
|
|
|
|
ASSERT(l->id >= out.bvh.first_leaf_node);
|
|
if (leaf == 0) {
|
|
vis.child_id = l->id;
|
|
}
|
|
out.bvh.last_leaf_node = std::max((u16)l->id, out.bvh.last_leaf_node);
|
|
idx++;
|
|
}
|
|
|
|
} else {
|
|
u16 arr_idx = 0;
|
|
for (int child = 0; child < (int)vis.num_kids; child++) {
|
|
auto l = dynamic_cast<level_tools::DrawNode*>(elt.children.at(child).get());
|
|
ASSERT(l);
|
|
if (child == 0) {
|
|
arr_idx = l->id;
|
|
} else {
|
|
ASSERT(arr_idx < l->id);
|
|
arr_idx = l->id;
|
|
}
|
|
if (child == 0) {
|
|
vis.child_id = l->id;
|
|
}
|
|
|
|
ASSERT(l->id < out.bvh.first_leaf_node);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
constexpr int GEOM_MAX = 4; // the amount of geoms
|
|
|
|
// Each TIE prototype is broken up into "fragments". These "fragments" have some maximum size based
|
|
// on the VU memory limit, so an instance may have multiple fragments, depending on how many
|
|
// vertices are in the model.
|
|
|
|
// Each instance has different set of time of day colors per fragment in the prototype.
|
|
// this type contains the indicies of these colors.
|
|
// For the PC port we combine all colors into a single "big palette".
|
|
// this stores the indices as indices into the original game's per fragment palette.
|
|
// and an offset for where this palette is located in the big palette.
|
|
struct TieInstanceFragInfo {
|
|
// the color index table uploaded to VU.
|
|
// this contains indices into the shared palette.
|
|
std::vector<u8> color_indices;
|
|
|
|
// in the PC port format, we upload a single giant time of day color. this points to the offset
|
|
// of the colors from this frag instance.
|
|
u16 color_index_offset_in_big_palette = -1;
|
|
};
|
|
|
|
// Each TIE instance has one of these. This is reorganized/unpacked data from the instance-tie type.
|
|
struct TieInstanceInfo {
|
|
// The index of the prototype (the geometry) that is used by this instance
|
|
u16 prototype_idx = 0;
|
|
|
|
// our bsphere's index in the BVH tree
|
|
u16 vis_id = 0;
|
|
|
|
// not totally sure if we'll use this (currently unused in tfrag, but probably worth if we
|
|
// actually cull using the tree)
|
|
math::Vector4f bsphere;
|
|
|
|
// the transformation matrix, unpacked from the weird TIE format.
|
|
// this can be used to transform points directly to world-space points that work
|
|
// with the normal math camera stuff.
|
|
std::array<math::Vector4f, 4> mat;
|
|
|
|
// this value is stashed inside the above matrix. It tells which "wind" we should use
|
|
// we just need to pass this along to the C++ rendering code.
|
|
u16 wind_index = 0;
|
|
|
|
std::vector<TieInstanceFragInfo> frags; // per-instance per-fragment info (just colors)
|
|
};
|
|
|
|
// The 5 qw of adgif data contains draw settings, and they also snuck in some extra data.
|
|
struct AdgifInfo {
|
|
// secret stuff they snuck in
|
|
u32 first_w; // VU memory offset
|
|
u32 second_w; // some size
|
|
u32 third_w; // unused, at least for not-near TIE
|
|
|
|
// the draw settings we care about:
|
|
u32 combo_tex; // PC texture ID
|
|
u64 alpha_val; // alpha blend settings
|
|
u64 clamp_val; // texture clamp settings
|
|
};
|
|
|
|
// When the prototype is uploaded, it places a bunch of strgif tags in VU memory.
|
|
// we'll need to remember where these are.
|
|
struct StrGifInfo {
|
|
u16 address; // vu memory address
|
|
u16 nloop; // the nloop field of this strgif (how much to send)
|
|
u16 mode; // not yet fully understood, but can allow the use of other templates
|
|
bool eop; // end of packet flag
|
|
};
|
|
|
|
// data per vertex in a tie prototype
|
|
struct TieProtoVertex {
|
|
math::Vector<float, 3> pos; // position
|
|
math::Vector<float, 3> tex; // texture coordinate
|
|
|
|
// NOTE: this is a double lookup.
|
|
// first you look up the index in the _instance_ color table
|
|
// then you look up the color in the _proto_'s interpolated color palette.
|
|
u32 color_index_index;
|
|
};
|
|
|
|
// a tie fragment is made up of strips. Each strip has a single adgif info, and vertices
|
|
// the vertices make up a triangle strip
|
|
struct TieStrip {
|
|
AdgifInfo adgif;
|
|
std::vector<TieProtoVertex> verts;
|
|
};
|
|
|
|
// the tie fragment
|
|
// this is a per-prototype (all instances share the same TieFrags)
|
|
struct TieFrag {
|
|
bool has_magic_tex0_bit = false; // use decal mode (todo)
|
|
std::vector<AdgifInfo>
|
|
adgifs; // the adgifs that come with this tiefrag (different strips can hve different)
|
|
|
|
std::vector<u8> other_gif_data; // data sent from EE asm code, sizes/offsets/metadata
|
|
std::vector<u8> points_data; // data sent from EE asm code, actual vertex data
|
|
|
|
// number of "dverts" expected from game's metadata. we check our extraction from this.
|
|
u32 expected_dverts = 0;
|
|
|
|
// all the strips in this fragment
|
|
std::vector<TieStrip> strips;
|
|
|
|
// this contains vertices, key is the address of the actual xyzf/st/rgbaq data in VU1 memory
|
|
// after the prototype program runs
|
|
std::unordered_map<u32, TieProtoVertex> vertex_by_dest_addr;
|
|
|
|
// simulate a load in the points data (using vu mem addr)
|
|
math::Vector<float, 4> lq_points(u32 qw) const {
|
|
ASSERT(qw >= 50);
|
|
qw -= 50;
|
|
ASSERT((qw * 16) + 16 <= points_data.size());
|
|
math::Vector<float, 4> result;
|
|
memcpy(result.data(), points_data.data() + (qw * 16), 16);
|
|
return result;
|
|
}
|
|
|
|
// simulate a load from points, but don't die if we load past the end
|
|
// this can happen when pipelining.
|
|
math::Vector<float, 4> lq_points_allow_past_end(u32 qw) const {
|
|
ASSERT(qw >= 50);
|
|
qw -= 50;
|
|
if ((qw * 16) + 16 <= points_data.size()) {
|
|
math::Vector<float, 4> result;
|
|
memcpy(result.data(), points_data.data() + (qw * 16), 16);
|
|
return result;
|
|
} else {
|
|
return math::Vector4f(-1, -1, -1, -1);
|
|
}
|
|
}
|
|
|
|
// store data into points. annoyingly the points have to be unpacked
|
|
// and they are modified in place.
|
|
void sq_points(u32 qw, const math::Vector4f& data) {
|
|
ASSERT(qw >= 50);
|
|
qw -= 50;
|
|
ASSERT((qw * 16) + 16 <= points_data.size());
|
|
memcpy(points_data.data() + (qw * 16), data.data(), 16);
|
|
}
|
|
|
|
// do a ilw from the other gif data.
|
|
u16 ilw_other_gif(u32 qw, u32 offset) const {
|
|
// unpacked with v8.
|
|
int qwi = qw;
|
|
qwi -= (adgifs.size() * 5);
|
|
ASSERT(qwi >= 0);
|
|
return other_gif_data.at(qwi * 4 + offset);
|
|
}
|
|
|
|
// reg values from the prototype program that are used by the instance program.
|
|
struct ProgramInfo {
|
|
std::vector<u16> adgif_offset_in_gif_buf_qw;
|
|
std::vector<StrGifInfo> str_gifs;
|
|
u16 skip_bp2 = 0;
|
|
u16 skip_ips = 0;
|
|
u16 tgt_bp1_ptr = 0;
|
|
u16 tgt_bp2_ptr = 0;
|
|
u16 tgt_ip1_ptr = 0;
|
|
u16 tgt_ip2_ptr = 0;
|
|
u16 kick_addr = 0;
|
|
// u16 clr_ptr = 0;
|
|
u16 point_ptr = 0;
|
|
u16 misc_x = 0; // at 971's x.
|
|
math::Vector4f gifbufs;
|
|
math::Vector4f extra;
|
|
} prog_info;
|
|
};
|
|
|
|
// main instance type
|
|
// unlike the GOAL type, we store all instances info in here too.
|
|
struct TieProtoInfo {
|
|
std::string name;
|
|
std::vector<TieInstanceInfo> instances;
|
|
bool uses_generic = false;
|
|
u32 proto_flag;
|
|
float stiffness = 0; // wind
|
|
u32 generic_flag;
|
|
std::vector<tfrag3::TimeOfDayColor> time_of_day_colors; // c++ type for time of day data
|
|
std::vector<TieFrag> frags; // the fragments of the prototype
|
|
};
|
|
|
|
/*!
|
|
* Convert TIE packed matrix to normal one. this was figured out from the EE asm.
|
|
*/
|
|
std::array<math::Vector4f, 4> extract_tie_matrix(const u16* data) {
|
|
std::array<math::Vector4f, 4> result;
|
|
for (int i = 0; i < 4; i++) {
|
|
s32 x = data[12 + i];
|
|
x <<= 16;
|
|
x >>= 10;
|
|
result[3][i] = x;
|
|
}
|
|
|
|
for (int vec = 0; vec < 3; vec++) {
|
|
for (int i = 0; i < 4; i++) {
|
|
s32 x = data[vec * 4 + i];
|
|
x <<= 16;
|
|
x >>= 16;
|
|
result[vec][i] = (float)x / 4096.f;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*!
|
|
* Confirm that the initial value of all wind vectors is 0.
|
|
* If this is true, we don't have to actually save them to the fr3 file, we can just create
|
|
* a bunch of 0 vectors in the TIE setup.
|
|
*/
|
|
void check_wind_vectors_zero(const std::vector<TieProtoInfo>& protos, Ref wind_ref) {
|
|
u16 max_wind = 0;
|
|
for (auto& proto : protos) {
|
|
for (auto& inst : proto.instances) {
|
|
max_wind = std::max(inst.wind_index, max_wind);
|
|
}
|
|
}
|
|
u32 wind_words = max_wind;
|
|
wind_words *= 4;
|
|
for (size_t i = 0; i < wind_words; i++) {
|
|
auto& word = wind_ref.data->words_by_seg.at(wind_ref.seg).at(wind_ref.byte_offset / 4 + i);
|
|
ASSERT(word.kind() == LinkedWord::PLAIN_DATA);
|
|
ASSERT(word.data == 0);
|
|
}
|
|
}
|
|
|
|
// get per-instance info from the level data
|
|
std::vector<TieProtoInfo> collect_instance_info(
|
|
const level_tools::DrawableInlineArrayInstanceTie* instances,
|
|
const std::vector<level_tools::PrototypeBucketTie>* protos,
|
|
int geo) {
|
|
std::vector<TieProtoInfo> result;
|
|
|
|
// loop over instances in level
|
|
for (auto& instance : instances->instances) {
|
|
// copy basic data.
|
|
TieInstanceInfo info;
|
|
info.prototype_idx = instance.bucket_index;
|
|
info.vis_id = instance.id;
|
|
for (int i = 0; i < 4; i++) {
|
|
info.bsphere[i] = instance.bsphere.data[i];
|
|
}
|
|
// from ee asm
|
|
info.mat = extract_tie_matrix(instance.origin.data);
|
|
info.mat[3][0] += info.bsphere[0];
|
|
info.mat[3][1] += info.bsphere[1];
|
|
info.mat[3][2] += info.bsphere[2];
|
|
info.wind_index = instance.wind_index;
|
|
|
|
info.mat[0][3] = 0.f;
|
|
|
|
// each fragment has its own color data (3 dmatags)
|
|
|
|
// the number of colors (qwc) is stored in the prototype, in the color-index-qwc array of bytes.
|
|
// at an offset of index-start[geom] + frag_idx.
|
|
|
|
// the actual data is located at the instance's color-indices + (proto.base-qw[geom] * 16)
|
|
|
|
// and this is only the indices.... there's yet another lookup on the VU
|
|
auto& proto = protos->at(info.prototype_idx);
|
|
u32 offset_bytes = proto.base_qw[geo] * 16;
|
|
// loop over frags. this is only the per-instance info so only colors indices. We know the
|
|
// location/layout of the color data from the EE asm code.
|
|
for (int frag_idx = 0; frag_idx < proto.frag_count[geo]; frag_idx++) {
|
|
TieInstanceFragInfo frag_info;
|
|
// read the number of quadwords
|
|
u32 num_color_qwc = proto.color_index_qwc.at(proto.index_start[geo] + frag_idx);
|
|
// loop over 4-byte words
|
|
for (u32 i = 0; i < num_color_qwc * 4; i++) {
|
|
// loop over bytes in word
|
|
for (u32 j = 0; j < 4; j++) {
|
|
frag_info.color_indices.push_back(
|
|
instance.color_indices.data->words_by_seg.at(instance.color_indices.seg)
|
|
.at(((offset_bytes + instance.color_indices.byte_offset) / 4) + i)
|
|
.get_byte(j));
|
|
}
|
|
}
|
|
info.frags.push_back(std::move(frag_info));
|
|
ASSERT(info.frags.back().color_indices.size() > 0);
|
|
|
|
offset_bytes += num_color_qwc * 16;
|
|
}
|
|
|
|
if (result.size() <= info.prototype_idx) {
|
|
result.resize(info.prototype_idx + 1);
|
|
}
|
|
result[info.prototype_idx].instances.push_back(info);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*!
|
|
* adgif shader texture id's can be "remapped". I think it allows textures to be shared.
|
|
* So far we haven't seen this feature used, but we do have the texture map and we check it here.
|
|
*/
|
|
u32 remap_texture(u32 original, const std::vector<level_tools::TextureRemap>& map) {
|
|
auto masked = original & 0xffffff00;
|
|
for (auto& t : map) {
|
|
if (t.original_texid == masked) {
|
|
ASSERT_MSG(false, "OKAY! remapped!");
|
|
return t.new_texid | 20;
|
|
}
|
|
}
|
|
return original;
|
|
}
|
|
|
|
/*!
|
|
* Update per-proto information.
|
|
*/
|
|
void update_proto_info(std::vector<TieProtoInfo>* out,
|
|
const std::vector<level_tools::TextureRemap>& map,
|
|
const TextureDB& tdb,
|
|
const std::vector<level_tools::PrototypeBucketTie>& protos,
|
|
int geo) {
|
|
out->resize(std::max(out->size(), protos.size()));
|
|
for (size_t i = 0; i < protos.size(); i++) {
|
|
const auto& proto = protos[i];
|
|
auto& info = out->at(i);
|
|
info.proto_flag = proto.flags;
|
|
// flag of 2 means it should use the generic renderer (determined from EE asm)
|
|
// for now, we ignore this and use TIE on everything.
|
|
info.uses_generic = (proto.flags == 2); // possibly different in jak 2
|
|
// for debug, remember the name
|
|
info.name = proto.name;
|
|
// wind "stiffness" nonzero value means it has the wind effect
|
|
info.stiffness = proto.stiffness;
|
|
info.generic_flag = proto.flags & 2;
|
|
// the actual colors (rgba) used by time of day interpolation
|
|
// there are "height" colors. Each color is actually 8 colors that are interpolated.
|
|
info.time_of_day_colors.resize(proto.time_of_day.height);
|
|
for (int k = 0; k < (int)proto.time_of_day.height; k++) {
|
|
for (int j = 0; j < 8; j++) {
|
|
memcpy(info.time_of_day_colors[k].rgba[j].data(), &proto.time_of_day.colors[k * 8 + j], 4);
|
|
}
|
|
}
|
|
|
|
// loop over fragments in the proto. This is the actual mesh data data and drawing settings
|
|
for (int frag_idx = 0; frag_idx < proto.frag_count[geo]; frag_idx++) {
|
|
TieFrag frag_info;
|
|
|
|
// loop over adgif shaders
|
|
for (int tex_idx = 0; tex_idx < proto.geometry[geo].tie_fragments.at(frag_idx).tex_count / 5;
|
|
tex_idx++) {
|
|
// this adgif shader data is modified in the real game by the login methods.
|
|
// all TIE things have pretty normal adgif shaders
|
|
|
|
// all the useful adgif data will be saved into this AdgifInfo
|
|
AdgifInfo adgif;
|
|
|
|
// pointer to the level data
|
|
auto& gif_data = proto.geometry[geo].tie_fragments[frag_idx].gif_data;
|
|
|
|
// address for the first adgif shader qw.
|
|
u8 ra_tex0 = gif_data.at(16 * (tex_idx * 5 + 0) + 8);
|
|
// data for the first adgif shader qw.
|
|
u64 ra_tex0_val;
|
|
memcpy(&ra_tex0_val, &gif_data.at(16 * (tex_idx * 5 + 0)), 8);
|
|
|
|
// always expecting TEX0_1
|
|
ASSERT(ra_tex0 == (u8)GsRegisterAddress::TEX0_1);
|
|
|
|
// the value is overwritten by the login function. We don't care about this value, it's
|
|
// specific to the PS2's texture system.
|
|
ASSERT(ra_tex0_val == 0 || ra_tex0_val == 0x800000000); // note: decal
|
|
// the original value is a flag. this means to use decal texture mode (todo)
|
|
frag_info.has_magic_tex0_bit = ra_tex0_val == 0x800000000;
|
|
// there's also a hidden value in the unused bits of the a+d data. it'll be used by the
|
|
// VU program.
|
|
memcpy(&adgif.first_w, &gif_data.at(16 * (tex_idx * 5 + 0) + 12), 4);
|
|
|
|
// Second adgif. Similar to the first, except the original data value is a texture ID.
|
|
u8 ra_tex1 = gif_data.at(16 * (tex_idx * 5 + 1) + 8);
|
|
u64 ra_tex1_val;
|
|
memcpy(&ra_tex1_val, &gif_data.at(16 * (tex_idx * 5 + 1)), 8);
|
|
ASSERT(ra_tex1 == (u8)GsRegisterAddress::TEX1_1);
|
|
ASSERT(ra_tex1_val == 0x120); // some flag
|
|
u32 original_tex;
|
|
memcpy(&original_tex, &gif_data.at(16 * (tex_idx * 5 + 1) + 8), 4);
|
|
// try remapping it
|
|
u32 new_tex = remap_texture(original_tex, map);
|
|
if (original_tex != new_tex) {
|
|
lg::info("map from 0x{:x} to 0x{:x}", original_tex, new_tex);
|
|
}
|
|
// texture the texture page/texture index, and convert to a PC port texture ID
|
|
u32 tpage = new_tex >> 20;
|
|
u32 tidx = (new_tex >> 8) & 0b1111'1111'1111;
|
|
u32 tex_combo = (((u32)tpage) << 16) | tidx;
|
|
// look up the texture to make sure it's valid
|
|
auto tex = tdb.textures.find(tex_combo);
|
|
ASSERT(tex != tdb.textures.end());
|
|
// remember the texture id
|
|
adgif.combo_tex = tex_combo;
|
|
// and the hidden value in the unused a+d
|
|
memcpy(&adgif.second_w, &gif_data.at(16 * (tex_idx * 5 + 1) + 12), 4);
|
|
// todo: figure out if this matters. maybe this is decal?
|
|
if (ra_tex0_val == 0x800000000) {
|
|
// lg::print("texture {} in {} has weird tex setting\n", tex->second.name, proto.name);
|
|
}
|
|
|
|
// mipmap settings. we ignore, but get the hidden value
|
|
u8 ra_mip = gif_data.at(16 * (tex_idx * 5 + 2) + 8);
|
|
ASSERT(ra_mip == (u8)GsRegisterAddress::MIPTBP1_1);
|
|
memcpy(&adgif.third_w, &gif_data.at(16 * (tex_idx * 5 + 2) + 12), 4);
|
|
// who cares about the value
|
|
|
|
// clamp settings. we care about these. no hidden value.
|
|
u8 ra_clamp = gif_data.at(16 * (tex_idx * 5 + 3) + 8);
|
|
ASSERT(ra_clamp == (u8)GsRegisterAddress::CLAMP_1);
|
|
u64 clamp;
|
|
memcpy(&clamp, &gif_data.at(16 * (tex_idx * 5 + 3)), 8);
|
|
adgif.clamp_val = clamp;
|
|
|
|
// alpha settings. we care about these, but no hidden value
|
|
u8 ra_alpha = gif_data.at(16 * (tex_idx * 5 + 4) + 8);
|
|
ASSERT(ra_alpha == (u8)GsRegisterAddress::ALPHA_1);
|
|
u64 alpha;
|
|
memcpy(&alpha, &gif_data.at(16 * (tex_idx * 5 + 4)), 8);
|
|
adgif.alpha_val = alpha;
|
|
frag_info.adgifs.push_back(adgif);
|
|
}
|
|
|
|
// they store a vertex count. we later use this to sanity check out mesh extraction
|
|
frag_info.expected_dverts = proto.geometry[geo].tie_fragments[frag_idx].num_dverts;
|
|
|
|
// each frag also has "other" data. This is some index data that the VU program uses.
|
|
// it comes in gif_data, after tex_qwc (determined from EE program)
|
|
int tex_qwc = proto.geometry[geo].tie_fragments.at(frag_idx).tex_count;
|
|
int other_qwc = proto.geometry[geo].tie_fragments.at(frag_idx).gif_count;
|
|
frag_info.other_gif_data.resize(16 * other_qwc);
|
|
memcpy(frag_info.other_gif_data.data(),
|
|
proto.geometry[geo].tie_fragments[frag_idx].gif_data.data() + (16 * tex_qwc),
|
|
16 * other_qwc);
|
|
|
|
// each frag's "point" data. These are stored as int16's, but get unpacked to 32-bit ints by
|
|
// the VIF. (determined from EE program)
|
|
const auto& pr = proto.geometry[geo].tie_fragments[frag_idx].point_ref;
|
|
int in_qw = pr.size() / 16;
|
|
int out_qw = in_qw * 2;
|
|
frag_info.points_data.resize(out_qw * 16);
|
|
{
|
|
const s16* in_ptr = (const s16*)pr.data();
|
|
s32* out_ptr = (s32*)frag_info.points_data.data();
|
|
for (int ii = 0; ii < out_qw * 4; ii++) {
|
|
out_ptr[ii] = in_ptr[ii];
|
|
}
|
|
}
|
|
|
|
info.frags.push_back(std::move(frag_info));
|
|
}
|
|
}
|
|
}
|
|
|
|
// List of dma tags from the EE code.
|
|
// upload-palette/upload-model happen per prototype.
|
|
// (palette may happen per prototype, model per geometry, but we only use 1 geom)
|
|
|
|
// upload-palette-0: just a flusha
|
|
// no data
|
|
|
|
// upload-palette-1: stmod 1 (add row), unpack v4 (32 qw in, 128 qw out), imm = usn, 0x346
|
|
// colors (after time of day interpolation)
|
|
// NOTE: adds row
|
|
|
|
// upload-model-0: stmod = 0, unpack-v4-32 imm = 0 (upload to 0?) (usn doesn't matter for v4-32)
|
|
// adgifs, size of adgifs.
|
|
|
|
// upload-model-1:
|
|
// mscal 4
|
|
// unpack-v4-8 imm = right after adgifs, usn.
|
|
// extra gif stuff
|
|
|
|
// upload-model-2:
|
|
// unpack-v4-16 imm = 32, signed.
|
|
// points
|
|
|
|
// upload-model-3
|
|
// mscal 6 <- this runs a VU program that unpacks the model data
|
|
// call the models!
|
|
|
|
// These upload-color's happen per instance. They only happen after the upload-palette/model's
|
|
// happen for the given model.
|
|
|
|
// upload-color-0
|
|
// 6 qw of matrix plus flag stuff
|
|
// to 198 (relative to TOP)
|
|
|
|
// upload-color-1
|
|
// to 204 unsigned (relative to TOP)
|
|
|
|
// upload-color-2/ret
|
|
// mscal 0 <- this runs a VU program that generates GS data to draw the instance.
|
|
|
|
// MEMORY MAP of TIE
|
|
// these are quadword addresses.
|
|
// some things are double/triple buffered.
|
|
// we ignore this for the most part and by convention use the lower address.
|
|
|
|
// 0 gif tags
|
|
// extra gifs
|
|
// 32 model
|
|
// 198 instance matrix
|
|
// 204 instance colors
|
|
// 242 instance matrix again
|
|
// 248 instance colors again
|
|
// 286 gifbuf
|
|
// 470 gifbuf again
|
|
// 654 ??
|
|
// 838 color palette
|
|
// 966 tie-consts
|
|
// 966 adgif
|
|
// 967 strgid
|
|
// 968 extra
|
|
// 969 gifbufs
|
|
// 970 clrbufs
|
|
// 971 misc
|
|
// 972 atestgif
|
|
// 973 atest-tra
|
|
// 974 atest-def
|
|
|
|
// the vu program emulation will fill out the vertex positions/draw settings for each instance.
|
|
|
|
// helper functions for the vu programs
|
|
math::Vector4f itof0(const math::Vector4f& vec) {
|
|
math::Vector4f result;
|
|
for (int i = 0; i < 4; i++) {
|
|
s32 val;
|
|
memcpy(&val, vec.data() + i, 4);
|
|
result[i] = val;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
math::Vector4f itof12xyz_0w(const math::Vector4f& vec) {
|
|
math::Vector4f result;
|
|
for (int i = 0; i < 4; i++) {
|
|
s32 val;
|
|
memcpy(&val, vec.data() + i, 4);
|
|
result[i] = val;
|
|
}
|
|
result.x() /= 4096.f;
|
|
result.y() /= 4096.f;
|
|
result.z() /= 4096.f;
|
|
return result;
|
|
}
|
|
|
|
math::Vector4f muli64_xyz(const math::Vector4f& vec) {
|
|
math::Vector4f result = vec;
|
|
result.x() *= 64.f;
|
|
result.y() *= 64.f;
|
|
result.z() *= 64.f;
|
|
return result;
|
|
}
|
|
|
|
void emulate_tie_prototype_program(std::vector<TieProtoInfo>& protos) {
|
|
using math::Vector4f;
|
|
|
|
// our convention here is to use the lower buffer for everything double buffered.
|
|
|
|
// because double buffering was too easy, the xgkick output buffer is triple buffered!
|
|
// the normal double buffering approach would not allow one prototype to be in setup
|
|
// while the second is being kicked. Each prototype gets two gif bufs and the third gif buf
|
|
// is used to xgkick whatever is left over from the previous prototype.
|
|
float gifbuf_start = 8388894.f; // 0x4b00011e. The 0x11e in the mantissa is 286.
|
|
float gifbuf_middle = 8389078.f; // 0x4b0001d6. The 0x1d6 in the mantissa is 470.
|
|
float gifbuf_end = 8389262.f; // 0x4b00028e. The 0x28e in the mantissa is 654.
|
|
|
|
Vector4f vf_gifbufs(gifbuf_end, gifbuf_middle, gifbuf_end, gifbuf_middle);
|
|
|
|
float gifbuf_sum = gifbuf_start + gifbuf_middle + gifbuf_end;
|
|
Vector4f vf_extra(gifbuf_sum, 0, gifbuf_sum, 0);
|
|
|
|
// u16 misc_x = 0;
|
|
// u16 misc_y = 1;
|
|
|
|
// First, we will emulate the program that runs after model uploads. (L1, imm = 6)
|
|
// it runs once per fragment
|
|
for (auto& proto : protos) {
|
|
// loop over fragments in this proto
|
|
for (u32 frag_idx = 0; frag_idx < proto.frags.size(); frag_idx++) {
|
|
auto& frag = proto.frags[frag_idx];
|
|
|
|
// this basically sets up some templates in memory.
|
|
// we're going to track the memory addresses of where certain tags are placed.
|
|
|
|
// there are 6qw gif packets that do an adgif-shader upload.
|
|
// this vector will store the location of these adgif shaders, relative to the start
|
|
// of the gif output buffer being used.
|
|
|
|
// this starts off pointing to 0, which is the adgif shaders for this fragment (input data)
|
|
u16 vi_point_ptr = 0;
|
|
|
|
// this fiddles with the triple buffering magic for gif bufs
|
|
// todo: figure out the trick and just use a fixed addr.
|
|
vf_gifbufs.z() = vf_extra.z() - vf_gifbufs.x();
|
|
vf_gifbufs.x() = vf_extra.x() - vf_gifbufs.x();
|
|
|
|
// L1:
|
|
// lq.xyz vf01, 966(vi00) | nop vf01 = adgif header.
|
|
// ilwr.w vi04, vi_point_ptr | nop
|
|
// some integers are hidden in the upper 32-bits of the adgif data.
|
|
// the first one has the offset in the gif buffer.
|
|
// we expect this to be 0 for the first one - we should start with adgif shaders always.
|
|
u16 vi04 = frag.adgifs.at(0).first_w;
|
|
ASSERT(vi04 == 0);
|
|
|
|
// ilw.w vi_ind, 1(vi_point_ptr) | nop
|
|
// the next hidden integer is the number of adgif shaders used in this fragment.
|
|
// we already know this, so check it.
|
|
u16 vi_ind = frag.adgifs.at(0).second_w;
|
|
ASSERT(vi_ind == frag.adgifs.size());
|
|
|
|
// mtir vi06, vf_gifbufs.y | nop
|
|
// vi06 will be one of our gifbufs we can use.
|
|
u16 vi06;
|
|
memcpy(&vi06, &vf_gifbufs.y(), sizeof(u16));
|
|
// lg::print("vi06: {}\n", vi06);
|
|
ASSERT(vi06 == 470 || vi06 == 286 || vi06 == 654); // should be one of the three gifbufs.
|
|
|
|
// lqi.xyzw vf02, vi_point_ptr | suby.xz vf_gifbufs, vf_gifbufs, vf_gifbufs
|
|
// lqi.xyzw vf03, vi_point_ptr | nop
|
|
// lqi.xyzw vf04, vi_point_ptr | nop
|
|
// lqi.xyzw vf05, vi_point_ptr | nop
|
|
// mtir vi05, vf_gifbufs.x | nop
|
|
// lqi.xyzw vf06, vi_point_ptr | subw.w vf01, vf01, vf01
|
|
|
|
// loads the adgif data into vf02 -> vf06
|
|
// the subw.w is to clear out the secret integer (I think the gs ignores this anyway)
|
|
vf_gifbufs.x() -= vf_gifbufs.y();
|
|
vf_gifbufs.z() -= vf_gifbufs.y();
|
|
// and vi05 is our other buffer.
|
|
u16 vi05;
|
|
memcpy(&vi05, &vf_gifbufs.x(), sizeof(u16));
|
|
// lg::print("vi05: {}\n", vi05);
|
|
// check that we understand the buffer rotation.
|
|
if (vi06 == 470) {
|
|
ASSERT(vi05 == 286);
|
|
} else if (vi06 == 286) {
|
|
ASSERT(vi05 == 654);
|
|
} else {
|
|
ASSERT(vi05 == 470);
|
|
}
|
|
vi_point_ptr += 5;
|
|
|
|
// this loop copies the adgifs to the gif buf at the appropriate address.
|
|
// Note: the final iteration through the loop does a load that's past the end of the
|
|
// adgif array, and vf02 is the first qw of the "extra gif data"
|
|
u32 adgif_load_idx = 1;
|
|
adgif_setup_loop_top:
|
|
// L2:
|
|
// iadd vi03, vi04, vi05 | nop
|
|
// vi04 is the adgif offset, vi05 is the buffer.
|
|
u16 vi03 = vi04 + vi05;
|
|
|
|
// iadd vi04, vi04, vi06 | nop
|
|
// set vi04 to the offset for the adgif in the second buffer.
|
|
vi04 += vi06;
|
|
|
|
// iaddi vi_ind, vi_ind, -0x1 | nop
|
|
vi_ind--; // decrement remaining adgifs
|
|
|
|
// store adgifs in one buffer.
|
|
frag.prog_info.adgif_offset_in_gif_buf_qw.push_back(vi03 - vi05);
|
|
// lg::print("adgifs at offset {}\n", frag.prog_info.adgif_offset_in_gif_buf_qw.back());
|
|
// sqi.xyzw vf01, vi03 | nop
|
|
// sqi.xyzw vf02, vi03 | nop
|
|
// sqi.xyzw vf03, vi03 | nop
|
|
// sqi.xyzw vf04, vi03 | nop
|
|
// sqi.xyzw vf05, vi03 | nop
|
|
// sqi.xyzw vf06, vi03 | nop
|
|
vi03 += 5;
|
|
|
|
// and the other buffer
|
|
// sqi.xyzw vf01, vi04 | nop
|
|
// sqi.xyzw vf02, vi04 | nop
|
|
// sqi.xyzw vf03, vi04 | nop
|
|
// sqi.xyzw vf04, vi04 | nop
|
|
// sqi.xyzw vf05, vi04 | nop
|
|
// sqi.xyzw vf06, vi04 | nop
|
|
vi04 += 5;
|
|
|
|
// ilwr.w vi04, vi_point_ptr | nop
|
|
// get the offset of the next adgif
|
|
// vi04 = frag.ilw_points(vi_point_ptr, 3);
|
|
|
|
// lqi.xyzw vf02, vi_point_ptr | nop
|
|
// lqi.xyzw vf03, vi_point_ptr | nop
|
|
// lqi.xyzw vf04, vi_point_ptr | nop
|
|
// lqi.xyzw vf05, vi_point_ptr | nop
|
|
vi_point_ptr += 5;
|
|
|
|
// ibgtz vi_ind, L2 | nop
|
|
if (((s16)vi_ind) > 0) {
|
|
// moved down
|
|
vi04 = frag.adgifs.at(adgif_load_idx++).first_w;
|
|
goto adgif_setup_loop_top;
|
|
}
|
|
// lqi.xyzw vf06, vi_point_ptr | nop (adgif load)
|
|
|
|
// Extra gif stuff
|
|
// this part builds the headers for the actual drawing packets.
|
|
// again, we do it in two parts. The extra gif data gives us offsets,
|
|
// The extra gif stuff is unpacked immediately after adgifs. Unpacked with v8 4.
|
|
// the above adgif loop will run off the end and vf02 will have the first byte in it's w.
|
|
ASSERT(frag.other_gif_data.size() > 1);
|
|
// mtir vi_ind, vf02.w | nop
|
|
// vi_ind will contain the number of drawing packets for this fragment.
|
|
vi_ind = frag.other_gif_data.at(3);
|
|
u16 vf02_x = frag.other_gif_data.at(0);
|
|
u16 vf02_y = frag.other_gif_data.at(1);
|
|
// u16 vf02_z = frag.other_gif_data.at(2);
|
|
u16 vf03_x = frag.other_gif_data.at(4);
|
|
u16 vf03_y = frag.other_gif_data.at(5);
|
|
u16 vf03_z = frag.other_gif_data.at(6);
|
|
u16 vf03_w = frag.other_gif_data.at(7);
|
|
u16 vf04_x = frag.other_gif_data.at(8);
|
|
u16 vf04_y = frag.other_gif_data.at(9);
|
|
u16 vf04_z = frag.other_gif_data.at(10);
|
|
// u16 vf04_w = frag.other_gif_data.at(11);
|
|
ASSERT(vi_ind >= frag.adgifs.size()); // at least 1 draw per shader.
|
|
ASSERT(vi_ind < 1000); // check for insane value.
|
|
// lg::print("got: {}, other size: {}\n", vi_ind, frag.other_gif_data.size());
|
|
|
|
// iaddi vi_point_ptr, vi_point_ptr, -0x2 | subw.w vf07, vf07, vf07
|
|
vi_point_ptr -= 2;
|
|
// vf07.w = 0
|
|
|
|
// setup for tag building loop.
|
|
|
|
// ilwr.x vi07, vi_point_ptr | nop
|
|
u16 vi07 = frag.ilw_other_gif(vi_point_ptr, 0);
|
|
// vi07 is the nloop/eop.
|
|
|
|
// ilwr.y vi08, vi_point_ptr | nop
|
|
u16 vi08 = frag.ilw_other_gif(vi_point_ptr, 1);
|
|
// this can toggle to a different mode but I don't understand it yet.
|
|
ASSERT(vi08 == 0);
|
|
|
|
// ilwr.z vi04, vi_point_ptr | nop
|
|
vi04 = frag.ilw_other_gif(vi_point_ptr, 2);
|
|
// offset
|
|
|
|
// lg::print("[{}] 7: {} 8: {} 4: {}, for {}\n", vi_point_ptr, vi07, vi08, vi04, vi_ind - 1);
|
|
|
|
// iaddi vi_ind, vi_ind, -0x1 | nop
|
|
vi_ind--;
|
|
|
|
// iaddi vi_point_ptr, vi_point_ptr, 0x1 | nop
|
|
vi_point_ptr++;
|
|
|
|
// ibeq vi00, vi_ind, L4 | nop
|
|
// lq.xyz vf07, 967(vi08) | nop
|
|
u16 next_mode = vi08;
|
|
|
|
// todo: can we rely on a strgif from a previous fragment?
|
|
while (vi_ind) {
|
|
StrGifInfo info;
|
|
// L3:
|
|
// iadd vi03, vi04, vi05 | nop
|
|
vi03 = vi04 + vi05; // addr in one buf
|
|
// iadd vi04, vi04, vi06 | nop
|
|
vi04 = vi04 + vi06; // addr in other buf
|
|
// iaddi vi_ind, vi_ind, -0x1 | nop
|
|
vi_ind--; // dec remaining tag
|
|
// sq.xyzw vf07, 0(vi03) | nop
|
|
info.address = vi03 - vi05; // store the template. but this doesn't have size or anything.
|
|
// lg::print("strgif at {}, {}\n", vi03, vi04);
|
|
|
|
// iswr.x vi07, vi03 | nop
|
|
info.nloop = vi07 & 0x7fff;
|
|
info.eop = vi07 & 0x8000;
|
|
ASSERT(!info.eop); // seems like we handle this manually after the loop
|
|
info.mode = next_mode;
|
|
|
|
// sq.xyzw vf07, 0(vi04) | nop
|
|
// iswr.x vi07, vi04 | nop
|
|
// and the same for the other tag in the other buffer
|
|
|
|
// ilwr.x vi07, vi_point_ptr | nop
|
|
vi07 = frag.ilw_other_gif(vi_point_ptr, 0);
|
|
|
|
// ilwr.y vi08, vi_point_ptr | nop
|
|
vi08 = frag.ilw_other_gif(vi_point_ptr, 1);
|
|
|
|
// ilwr.z vi04, vi_point_ptr | nop
|
|
vi04 = frag.ilw_other_gif(vi_point_ptr, 2);
|
|
|
|
// iaddi vi_point_ptr, vi_point_ptr, 0x1 | nop
|
|
vi_point_ptr++;
|
|
|
|
// ibne vi00, vi_ind, L3 | nop
|
|
// lq.xyz vf07, 967(vi08) | nop
|
|
next_mode = vi08;
|
|
// lg::print("[{}] 7: {} 8: {} 4: {}, for {}\n", vi_point_ptr, vi07, vi08, vi04, vi_ind);
|
|
frag.prog_info.str_gifs.push_back(info);
|
|
}
|
|
|
|
// and now, the final tag, which ends the drawing packet!
|
|
// L4:
|
|
// iaddiu vi07, vi07, 0x4000 | nop
|
|
vi07 += 0x8000;
|
|
// iaddiu vi07, vi07, 0x4000 | nop
|
|
StrGifInfo info;
|
|
info.eop = true; // the 0x8000 sets the eop bit.
|
|
|
|
// compute addresses
|
|
// iadd vi03, vi04, vi05 | nop
|
|
vi03 = vi04 + vi05;
|
|
// iadd vi04, vi04, vi06 | nop
|
|
vi04 += vi06;
|
|
|
|
// store and set nloop/eop
|
|
// sq.xyzw vf07, 0(vi03) | nop
|
|
info.address = vi03 - vi05;
|
|
// iswr.x vi07, vi03 | nop
|
|
info.nloop = vi07 & 0x7fff;
|
|
// sq.xyzw vf07, 0(vi04) | nop
|
|
// iswr.x vi07, vi04 | nop
|
|
frag.prog_info.str_gifs.push_back(info);
|
|
|
|
// mtir vi06, vf04.x | nop
|
|
vi06 = vf04_x;
|
|
|
|
// lq.xyzw vf05, 50(vi00) | nop
|
|
auto vf05 = frag.lq_points(50);
|
|
// lq.xyzw vf15, 51(vi00) | nop
|
|
auto vf15 = frag.lq_points(51);
|
|
// iaddiu vi05, vi00, 0x34 | nop
|
|
vi05 = 0x34; // points to after the two qw's we just loaded
|
|
// nop | nop
|
|
// iaddiu vi06, vi06, 0x32 | itof0.xyzw vf05, vf05
|
|
vi06 += 0x32;
|
|
vf05 = itof0(vf05);
|
|
|
|
// lqi.xyzw vf06, vi05 | itof12.xyz vf15, vf15
|
|
auto vf06 = frag.lq_points(vi05);
|
|
vi05++;
|
|
vf15 = itof12xyz_0w(vf15);
|
|
|
|
// lqi.xyzw vf16, vi05 | itof0.w vf15, vf15
|
|
auto vf16 = frag.lq_points(vi05);
|
|
vi05++;
|
|
// itof0 already done by previous
|
|
|
|
// 64.0 | nop :i
|
|
// ibeq vi06, vi05, L6 | muli.xyz vf05, vf05, I
|
|
vf05 = muli64_xyz(vf05);
|
|
// mtir vi07, vf04.y | itof0.xyzw vf06, vf06
|
|
vi07 = vf04_y;
|
|
// lg::print("bonus points: {}\n", vi07);
|
|
vf06 = itof0(vf06);
|
|
|
|
// L5:
|
|
Vector4f vf07;
|
|
top_of_points_loop:
|
|
// lg::print("{}/{}\n", vi05, vi06);
|
|
// lqi.xyzw vf07, vi05 | itof12.xyz vf16, vf16
|
|
vf07 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
vf16 = itof12xyz_0w(vf16);
|
|
|
|
// lqi.xyzw vf17, vi05 | itof0.w vf16, vf16
|
|
auto vf17 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
// itof done above.
|
|
|
|
// sq.xyzw vf15, -5(vi05) | nop
|
|
frag.sq_points(vi05 - 5, vf15);
|
|
|
|
// ibeq vi06, vi05, L6 | muli.xyz vf06, vf06, I
|
|
// sq.xyzw vf05, -6(vi05) | itof0.xyzw vf07, vf07
|
|
vf06 = muli64_xyz(vf06);
|
|
frag.sq_points(vi05 - 6, vf05);
|
|
vf07 = itof0(vf07);
|
|
if (vi05 == vi06) {
|
|
goto end_of_int_to_float_loop;
|
|
}
|
|
|
|
// lqi.xyzw vf05, vi05 | itof12.xyz vf17, vf17
|
|
vf05 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
vf17 = itof12xyz_0w(vf17);
|
|
|
|
// lqi.xyzw vf15, vi05 | itof0.w vf17, vf17
|
|
vf15 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
// itof doen above
|
|
|
|
// sq.xyzw vf16, -5(vi05) | nop
|
|
frag.sq_points(vi05 - 5, vf16);
|
|
// ibeq vi06, vi05, L6 | muli.xyz vf07, vf07, I
|
|
vf07 = muli64_xyz(vf07);
|
|
// sq.xyzw vf06, -6(vi05) | itof0.xyzw vf05, vf05
|
|
frag.sq_points(vi05 - 6, vf06);
|
|
vf05 = itof0(vf05);
|
|
if (vi05 == vi06) {
|
|
goto end_of_int_to_float_loop;
|
|
}
|
|
|
|
// lqi.xyzw vf06, vi05 | itof12.xyz vf15, vf15
|
|
vf06 = frag.lq_points_allow_past_end(vi05);
|
|
vf15 = itof12xyz_0w(vf15);
|
|
vi05++;
|
|
|
|
// lqi.xyzw vf16, vi05 | itof0.w vf15, vf15
|
|
vf16 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
// itof done above
|
|
|
|
// sq.xyzw vf17, -5(vi05) | nop
|
|
frag.sq_points(vi05 - 5, vf17);
|
|
|
|
// ibne vi06, vi05, L5 | muli.xyz vf05, vf05, I
|
|
// sq.xyzw vf07, -6(vi05) | itof0.xyzw vf06, vf06
|
|
vf05 = muli64_xyz(vf05);
|
|
frag.sq_points(vi05 - 6, vf07);
|
|
vf06 = itof0(vf06);
|
|
if (vi05 != vi06) {
|
|
goto top_of_points_loop;
|
|
}
|
|
|
|
end_of_int_to_float_loop:
|
|
// another points loop
|
|
Vector4f vf10;
|
|
|
|
// L6:
|
|
// lq.xyzw vf09, -4(vi05) | nop
|
|
auto vf09 = frag.lq_points_allow_past_end(vi05 - 4);
|
|
// lq.xyzw vf05, -3(vi05) | nop
|
|
vf05 = frag.lq_points_allow_past_end(vi05 - 3);
|
|
// lq.xyzw vf15, -2(vi05) | nop
|
|
vf15 = frag.lq_points_allow_past_end(vi05 - 2);
|
|
// iadd vi07, vi07, vi05 | nop
|
|
vi07 += vi05;
|
|
// iaddi vi07, vi07, -0x4 | nop
|
|
vi07 -= 4;
|
|
// iaddi vi05, vi05, -0x1 | nop
|
|
vi05 -= 1;
|
|
// iaddi vi08, vi05, -0x3 | nop
|
|
vi08 = vi05 - 3;
|
|
// ibeq vi07, vi05, L8 | nop
|
|
// nop | itof0.xyzw vf09, vf09
|
|
vf09 = itof0(vf09);
|
|
if (vi07 == vi05) {
|
|
goto end_of_points2;
|
|
}
|
|
|
|
// lqi.xyzw vf10, vi05 | itof0.xyzw vf05, vf05
|
|
vf10 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
vf05 = itof0(vf05);
|
|
|
|
// lqi.xyzw vf06, vi05 | itof0.w vf15, vf15
|
|
vf06 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
vf15 = itof12xyz_0w(vf15);
|
|
|
|
// lqi.xyzw vf16, vi05 | itof12.xyz vf15, vf15
|
|
vf16 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++; // itof done above
|
|
|
|
// nop | nop
|
|
// nop | muli.xyz vf09, vf09, I
|
|
vf09 = muli64_xyz(vf09);
|
|
|
|
// ibeq vi07, vi05, L8 | muli.xyz vf05, vf05, I
|
|
// nop | itof0.xyzw vf10, vf10
|
|
vf05 = muli64_xyz(vf05);
|
|
vf10 = itof0(vf10);
|
|
if (vi05 == vi07) {
|
|
goto end_of_points2;
|
|
}
|
|
|
|
Vector4f vf11;
|
|
top_of_points2:
|
|
// L7:
|
|
// lqi.xyzw vf11, vi05 | itof0.xyzw vf06, vf06
|
|
vf11 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
vf06 = itof0(vf06);
|
|
// lqi.xyzw vf07, vi05 | itof0.w vf16, vf16
|
|
vf07 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
vf16 = itof12xyz_0w(vf16);
|
|
|
|
// lqi.xyzw vf17, vi05 | itof12.xyz vf16, vf16
|
|
vf17 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
// sqi.xyzw vf09, vi08 | nop
|
|
frag.sq_points(vi08, vf09);
|
|
vi08++;
|
|
// sqi.xyzw vf05, vi08 | muli.xyz vf10, vf10, I
|
|
frag.sq_points(vi08, vf05);
|
|
vi08++;
|
|
vf10 = muli64_xyz(vf10);
|
|
// ibeq vi07, vi05, L8 | muli.xyz vf06, vf06, I
|
|
vf06 = muli64_xyz(vf06);
|
|
// sqi.xyzw vf15, vi08 | itof0.xyzw vf11, vf11
|
|
frag.sq_points(vi08, vf15);
|
|
vi08++;
|
|
vf11 = itof0(vf11);
|
|
if (vi07 == vi05) {
|
|
goto end_of_points2;
|
|
}
|
|
|
|
// lqi.xyzw vf09, vi05 | itof0.xyzw vf07, vf07
|
|
vf09 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
vf07 = itof0(vf07);
|
|
// lqi.xyzw vf05, vi05 | itof0.w vf17, vf17
|
|
vf05 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
vf17 = itof12xyz_0w(vf17);
|
|
// lqi.xyzw vf15, vi05 | itof12.xyz vf17, vf17
|
|
vf15 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
|
|
// sqi.xyzw vf10, vi08 | nop
|
|
frag.sq_points(vi08, vf10);
|
|
vi08++;
|
|
// sqi.xyzw vf06, vi08 | muli.xyz vf11, vf11, I
|
|
frag.sq_points(vi08, vf06);
|
|
vi08++;
|
|
vf11 = muli64_xyz(vf11);
|
|
// ibeq vi07, vi05, L8 | muli.xyz vf07, vf07, I
|
|
// sqi.xyzw vf16, vi08 | itof0.xyzw vf09, vf09
|
|
vf07 = muli64_xyz(vf07);
|
|
frag.sq_points(vi08, vf16);
|
|
vi08++;
|
|
vf09 = itof0(vf09);
|
|
if (vi07 == vi05) {
|
|
goto end_of_points2;
|
|
}
|
|
|
|
// lqi.xyzw vf10, vi05 | itof0.xyzw vf05, vf05
|
|
vf10 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
vf05 = itof0(vf05);
|
|
// lqi.xyzw vf06, vi05 | itof0.w vf15, vf15
|
|
vf06 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
vf15 = itof12xyz_0w(vf15);
|
|
// lqi.xyzw vf16, vi05 | itof12.xyz vf15, vf15
|
|
vf16 = frag.lq_points_allow_past_end(vi05);
|
|
vi05++;
|
|
|
|
// sqi.xyzw vf11, vi08 | nop
|
|
frag.sq_points(vi08, vf11);
|
|
vi08++;
|
|
// sqi.xyzw vf07, vi08 | muli.xyz vf09, vf09, I
|
|
frag.sq_points(vi08, vf07);
|
|
vi08++;
|
|
vf09 = muli64_xyz(vf09);
|
|
// ibne vi07, vi05, L7 | muli.xyz vf05, vf05, I
|
|
// sqi.xyzw vf17, vi08 | itof0.xyzw vf10, vf10
|
|
vf05 = muli64_xyz(vf05);
|
|
frag.sq_points(vi08, vf17);
|
|
vi08++;
|
|
vf10 = itof0(vf10);
|
|
if (vi07 != vi05) {
|
|
goto top_of_points2;
|
|
}
|
|
|
|
end_of_points2:
|
|
// L8:
|
|
// mtir vi01, vf04.z | nop
|
|
u16 vi01 = vf04_z;
|
|
// mtir vi05, vf02.x | nop
|
|
frag.prog_info.skip_bp2 = vf02_x;
|
|
|
|
// mtir vi14, vf02.y | nop
|
|
frag.prog_info.skip_ips = vf02_y;
|
|
// mtir vi04, vf03.x | nop
|
|
frag.prog_info.tgt_bp1_ptr = vf03_x;
|
|
// mtir vi06, vf03.y | nop
|
|
frag.prog_info.tgt_bp2_ptr = vf03_y;
|
|
// mtir vi07, vf03.z | nop
|
|
frag.prog_info.tgt_ip1_ptr = vf03_z;
|
|
// mtir vi08, vf03.w | nop
|
|
frag.prog_info.tgt_ip2_ptr = vf03_w;
|
|
// isw.x vi01, 971(vi00) | nop
|
|
frag.prog_info.misc_x = vi01;
|
|
// iaddi vi15, vi00, 0x0 | nop
|
|
frag.prog_info.kick_addr = 0;
|
|
// mtir vi03, vf_clrbuf.x | nop
|
|
// frag.prog_info.clr_ptr = 198; // just forcing it to one buffer for now
|
|
// iaddiu vi_point_ptr, vi00, 0x32 | nop
|
|
frag.prog_info.point_ptr = 0x32;
|
|
|
|
// mr32.xyzw vf_gifbufs, vf_gifbufs | nop
|
|
// mfir.y vf_extra, vi00 | nop :e
|
|
// mfir.w vf_extra, vi00 | nop
|
|
float temp = vf_gifbufs.x();
|
|
vf_gifbufs.x() = vf_gifbufs.y();
|
|
vf_gifbufs.y() = vf_gifbufs.z();
|
|
vf_gifbufs.z() = vf_gifbufs.w();
|
|
vf_gifbufs.w() = temp;
|
|
vf_extra.y() = 0;
|
|
vf_extra.w() = 0;
|
|
frag.prog_info.gifbufs = vf_gifbufs;
|
|
frag.prog_info.extra = vf_extra;
|
|
// todo: maybe we need more.
|
|
}
|
|
|
|
// ASSERT(false);
|
|
}
|
|
}
|
|
|
|
void debug_print_info(const std::vector<TieProtoInfo>& out) {
|
|
for (auto& proto : out) {
|
|
lg::debug("[{:40}]", proto.name);
|
|
lg::debug(" generic: {}", proto.uses_generic);
|
|
lg::debug(" use count: {}", proto.instances.size());
|
|
lg::debug(" stiffness: {}", proto.stiffness);
|
|
}
|
|
}
|
|
|
|
u16 float_to_u16(float f) {
|
|
u16 result;
|
|
memcpy(&result, &f, 2);
|
|
return result;
|
|
}
|
|
|
|
int get_fancy_base(int draw1, int draw2) {
|
|
int total = draw1 + draw2;
|
|
total += 3;
|
|
total /= 4;
|
|
total *= 4;
|
|
return total;
|
|
}
|
|
|
|
void emulate_tie_instance_program(std::vector<TieProtoInfo>& protos) {
|
|
for (auto& proto : protos) {
|
|
// bool first_instance = true;
|
|
// for (auto& instance : proto.instances) {
|
|
for (u32 frag_idx = 0; frag_idx < proto.frags.size(); frag_idx++) {
|
|
auto& frag = proto.frags.at(frag_idx);
|
|
// for these sections, see the TIE Instance VU Program Doc.
|
|
int draw_1_count = 0;
|
|
int draw_2_count = 0;
|
|
int ip_1_count = 0;
|
|
|
|
/////////////////////////////////////
|
|
// SETUP
|
|
/////////////////////////////////////
|
|
// this is some basic register setup for the TIE instance
|
|
// ad also for the pipelined Draw1 loop.
|
|
// we omit the pipeline startup here.
|
|
|
|
// this was set by the previous program that sets up this prototype frag
|
|
// u16 clr_ptr = frag.prog_info.clr_ptr;
|
|
u16 tgt_bp1_ptr = frag.prog_info.tgt_bp1_ptr;
|
|
u16 tgt_bp2_ptr = frag.prog_info.tgt_bp2_ptr;
|
|
u16 tgt_ip1_ptr = frag.prog_info.tgt_ip1_ptr;
|
|
u16 tgt_ip2_ptr = frag.prog_info.tgt_ip2_ptr;
|
|
u16 skip_bp2 = frag.prog_info.skip_bp2;
|
|
u16 kick_addr = frag.prog_info.kick_addr;
|
|
u16 dest_ptr = 0; // they never initialized this... seems like a bug
|
|
|
|
// lqi.xyzw vtx_0, vi_point_ptr | nop
|
|
// use hard-coded lower buffer for model data
|
|
u16 point_ptr = 0x32;
|
|
// lq.xyzw vf_inds, 6(vi_clr_ptr) | nop
|
|
// pipeline
|
|
|
|
// lq.xyzw vf_clr2, 3(vi_clr_ptr) | nop
|
|
// lq.xyzw vf_mtx0, 0(vi_clr_ptr) | nop
|
|
// lq.xyzw vf_mtx1, 1(vi_clr_ptr) | nop
|
|
// lq.xyzw vf_clr1, 2(vi_clr_ptr) | nop
|
|
// this is the matrix
|
|
|
|
// mtir vi_ind, vf_inds.x | nop
|
|
// pipeline
|
|
|
|
// lqi.xyzw vf_tex0, vi_point_ptr | mulaw.xyzw ACC, vf_clr2, vf00
|
|
// pipeline
|
|
// lq.xyzw vf_morph, 4(vi_clr_ptr) | maddax.xyzw ACC, vf_mtx0, vtx_0
|
|
// we're going to ignore the "morph" and use hi-res everywehere
|
|
|
|
// ilw.x vi01, 5(vi_clr_ptr) | madday.xyzw ACC, vf_mtx1, vtx_0
|
|
// the vi01 is unused here. (indicates if we're generic or not)
|
|
|
|
// lq.xyzw vf_clr0, 838(vi_ind) | maddz.xyzw vf_pos02, vf_clr1, vtx_0
|
|
// pipeline
|
|
|
|
// lqi.xyzw vf_vtx1, vi_point_ptr | nop
|
|
// pipeline
|
|
|
|
// lq.xyzw vf_res02, 5(vi_clr_ptr) | nop
|
|
// loading the flags and stuff, which we will ignore too
|
|
|
|
// iaddi vi_clr_ptr, vi_clr_ptr, 0x7 | nop
|
|
// u16 clr_ptr_base = clr_ptr;
|
|
// clr_ptr += 6; // it says 7, but we want to point to the first index data.
|
|
|
|
// mtir vi_ind, vf_inds.y | addx.w vf_res13, vf_res02, vf00 <- flags crap
|
|
// div Q, vf00.w, vf_pos02.w | mulaw.xyzw ACC, vf_clr2, vf00
|
|
// lqi.xyzw vf_tex1, vi_point_ptr | maddax.xyzw ACC, vf_mtx0, vf_vtx1
|
|
// mtir vi01, vf_gifbufs.x | madday.xyzw ACC, vf_mtx1, vf_vtx1
|
|
u16 vi01 = float_to_u16(frag.prog_info.gifbufs.x());
|
|
|
|
// lq.xyzw vf_mtx2, 838(vi_ind) | maddz.xyzw vf_pos13, vf_clr1, vf_vtx1
|
|
|
|
// isub vi01, vi01, vi_kick_addr | ftoi4.w vf_res02, vf_res02
|
|
vi01 -= kick_addr;
|
|
|
|
// iadd vi_tgt_bp1_ptr, vi_tgt_bp1_ptr, vi01 | ftoi4.w vf_res13, vf_res13
|
|
tgt_bp1_ptr += vi01;
|
|
// iadd vi_tgt_bp2_ptr, vi_tgt_bp2_ptr, vi01 | nop
|
|
tgt_bp2_ptr += vi01;
|
|
|
|
// lg::print("b tgts: {} {}\n", tgt_bp1_ptr, tgt_bp2_ptr);
|
|
// lqi.xyzw vf_vtx2, vi_point_ptr | mul.xyz vf_pos02, vf_pos02, Q
|
|
// div Q, vf00.w, vf_pos13.w | mul.xyz vf_tex0, vf_tex0, Q
|
|
// mtir vi_ind, vf_inds.z | addx.w vtx_0, vtx_0, vf_gifbufs
|
|
// lqi.xyzw vf_tex2, vi_point_ptr | mulaw.xyzw ACC, vf_clr2, vf00
|
|
// iadd vi_tgt_ip1_ptr, vi_tgt_ip1_ptr, vi01 | maddax.xyzw ACC, vf_mtx0, vf_vtx2
|
|
// iadd vi_tgt_ip2_ptr, vi_tgt_ip2_ptr, vi01 | madday.xyzw ACC, vf_mtx1, vf_vtx2
|
|
tgt_ip1_ptr += vi01;
|
|
tgt_ip2_ptr += vi01;
|
|
// lg::print("i tgts: {} {}\n", tgt_ip1_ptr, tgt_ip2_ptr);
|
|
// lq.xyzw vf_mtx3, 838(vi_ind) | ftoi4.xyz vf_res02, vf_pos02
|
|
// ibeq vi_tgt_bp1_ptr, vi_dest_ptr, L40 | maddz.xyzw vf_pos02, vf_clr1, vf_vtx2
|
|
// iadd vi_kick_addr, vi_kick_addr, vi01 | nop
|
|
kick_addr += vi01;
|
|
if (tgt_bp1_ptr == dest_ptr) {
|
|
lg::info("DRAW FINISH 1 (no points)");
|
|
goto program_end;
|
|
}
|
|
|
|
/////////////////////////////////////
|
|
// DRAW 1
|
|
/////////////////////////////////////
|
|
{
|
|
// Draw 1 computes and sets vertices that appear once.
|
|
// Note that it does 3 more vertices after reaching the target pointer.
|
|
bool reached_target = false;
|
|
int past_target = 0;
|
|
while (past_target < 3) {
|
|
// there's 1 load of colors per 4x verts.
|
|
// (lqi.xyzw vf_inds, vi_clr_ptr | nop)
|
|
// these are different per instance, but index into a palette shared by all instances
|
|
// for the i-th point, we just load the i-th color index.
|
|
|
|
// This is reordered.
|
|
// A "T" means it is part of transformation and we leave it out.
|
|
// A number corresponds to the line below.
|
|
|
|
// (4) mtir vi_dest_ptr, vtx_0.w | nop
|
|
// (2) lqi.xyzw vi_vtx3, vi_point_ptr | (T) mul.xyz vf_pos13, vf_pos13, Q
|
|
// (T) div Q, vf00.w, vf_pos02.w | (T) mul.xyz vf_tex1, vf_tex1, Q
|
|
// (1) mtir vi_ind, vf_inds.w | (3) addx.w vf_vtx1, vf_vtx1, vf_gifbufs
|
|
// (5) lqi.xyzw vi_tex3, vi_point_ptr | (T) mulaw.xyzw ACC, vf_clr2, vf00
|
|
// (7) sq.xyzw vf_tex0, 0(vi_dest_ptr) | (T) maddax.xyzw ACC, vf_mtx0, vi_vtx3
|
|
// (7) sq.xyzw vf_clr0, 1(vi_dest_ptr) | (T) madday.xyzw ACC, vf_mtx1, vi_vtx3
|
|
// (6) lq.xyzw vi_clr3, 838(vi_ind) | (T) ftoi4.xyz vf_res13, vf_pos13
|
|
// ibeq vi_tgt_bp1_ptr, vi_dest_ptr, L13 | (T) maddz.xyzw vf_pos13, vf_clr1, vi_vtx3
|
|
// (7) sq.xyzw vf_res02, 2(vi_dest_ptr) | nop
|
|
|
|
// 01 - grab the index for this vertex color
|
|
// we don't want to actually do the lookup here, just remember where we would have
|
|
// looked.
|
|
u32 clr_idx_idx = draw_1_count;
|
|
|
|
// 02 - load the floating point vertex values
|
|
auto vert_pos = frag.lq_points(point_ptr);
|
|
point_ptr++;
|
|
|
|
// 03 - do the weird gifbuf triple buffer with floats crap
|
|
float vtx_w = vert_pos.w() + frag.prog_info.gifbufs.x();
|
|
|
|
// 04 - now get the destination
|
|
dest_ptr = float_to_u16(vtx_w);
|
|
|
|
// 05 - load tex coords
|
|
auto tex_coord = frag.lq_points(point_ptr);
|
|
point_ptr++;
|
|
|
|
// 06 - actually do the color load in the palette. (skip)
|
|
|
|
// 07 - set vertex
|
|
TieProtoVertex vertex_info;
|
|
vertex_info.color_index_index = clr_idx_idx;
|
|
vertex_info.pos.x() = vert_pos.x();
|
|
vertex_info.pos.y() = vert_pos.y();
|
|
vertex_info.pos.z() = vert_pos.z();
|
|
vertex_info.tex.x() = tex_coord.x();
|
|
vertex_info.tex.y() = tex_coord.y();
|
|
vertex_info.tex.z() = tex_coord.z();
|
|
|
|
bool inserted = frag.vertex_by_dest_addr.insert({(u32)dest_ptr, vertex_info}).second;
|
|
ASSERT(inserted);
|
|
|
|
if (reached_target) {
|
|
past_target++;
|
|
}
|
|
|
|
if (dest_ptr == tgt_bp1_ptr) {
|
|
reached_target = true;
|
|
}
|
|
|
|
draw_1_count++;
|
|
}
|
|
}
|
|
|
|
if (!skip_bp2) {
|
|
// bp2 setup:
|
|
// The BP2 drawing is similar to BP1, but duplicate draws vertices.
|
|
bool reached_target = false;
|
|
int past_target = 0;
|
|
while (past_target < 2) {
|
|
u32 clr_idx_idx = draw_1_count + draw_2_count;
|
|
auto vert_pos = frag.lq_points(point_ptr);
|
|
point_ptr++;
|
|
float vtx_w = vert_pos.w() + frag.prog_info.gifbufs.x();
|
|
dest_ptr = float_to_u16(vtx_w);
|
|
auto tex_coord = frag.lq_points(point_ptr);
|
|
// lg::print("texw: [{}] {}\n", point_ptr, tex_coord.w());
|
|
point_ptr++;
|
|
float tex_w = tex_coord.w() + frag.prog_info.gifbufs.x();
|
|
u16 dest2_ptr = float_to_u16(tex_w);
|
|
|
|
TieProtoVertex vertex_info;
|
|
vertex_info.color_index_index = clr_idx_idx;
|
|
vertex_info.pos.x() = vert_pos.x();
|
|
vertex_info.pos.y() = vert_pos.y();
|
|
vertex_info.pos.z() = vert_pos.z();
|
|
vertex_info.tex.x() = tex_coord.x();
|
|
vertex_info.tex.y() = tex_coord.y();
|
|
vertex_info.tex.z() = tex_coord.z();
|
|
|
|
// lg::print("double draw: {} {}\n", dest_ptr, dest2_ptr);
|
|
bool inserted = frag.vertex_by_dest_addr.insert({(u32)dest_ptr, vertex_info}).second;
|
|
ASSERT(inserted);
|
|
|
|
bool inserted2 = frag.vertex_by_dest_addr.insert({(u32)dest2_ptr, vertex_info}).second;
|
|
ASSERT(inserted2);
|
|
|
|
if (reached_target) {
|
|
past_target++;
|
|
}
|
|
|
|
if (dest_ptr == tgt_bp2_ptr) {
|
|
reached_target = true;
|
|
}
|
|
|
|
draw_2_count++;
|
|
}
|
|
|
|
// setup
|
|
// ibne vi00, vi_skip_bp2, L24 | mul.xyz vf_pos13, vf_pos13, Q
|
|
// lqi.xyzw vi_vtx3, vi_point_ptr | mul.xyz vf_tex1, vf_tex1, Q
|
|
// div Q, vf00.w, vf_pos02.w | addx.w vf_vtx1, vf_vtx1, vf_gifbufs
|
|
// mtir vi_ind, vf_inds.w | mulaw.xyzw ACC, vf_clr2, vf00
|
|
// lqi.xyzw vf_inds, vi_clr_ptr | nop
|
|
// sq.xyzw vf_tex0, 0(vi_dest_ptr) | addx.w vf_vtx2, vf_vtx2, vf_gifbufs
|
|
// sq.xyzw vf_clr0, 1(vi_dest_ptr) | maddax.xyzw ACC, vf_mtx0, vi_vtx3
|
|
// lqi.xyzw vi_tex3, vi_point_ptr | madday.xyzw ACC, vf_mtx1, vi_vtx3
|
|
// lq.xyzw vi_clr3, 838(vi_ind) | ftoi4.xyz vf_res13, vf_pos13
|
|
// lqi.xyzw vtx_0, vi_point_ptr | maddz.xyzw vf_pos13, vf_clr1, vi_vtx3
|
|
// sq.xyzw vf_res02, 2(vi_dest_ptr) | mul.xyz vf_pos02, vf_pos02, Q
|
|
// mtir vi_dest_ptr, vf_vtx1.w | mul.xyz vf_tex2, vf_tex2, Q
|
|
// lqi.xyzw vf_tex0, vi_point_ptr | mulaw.xyzw ACC, vf_clr2, vf00
|
|
// mtir vi_ind, vf_inds.x | maddax.xyzw ACC, vf_mtx0, vtx_0
|
|
// nop | madday.xyzw ACC, vf_mtx1, vtx_0
|
|
// div Q, vf00.w, vf_pos13.w | ftoi4.xyz vf_res02, vf_pos02
|
|
// sq.xyzw vf_tex1, 0(vi_dest_ptr) | maddz.xyzw vf_pos02, vf_clr1, vtx_0
|
|
// sq.xyzw vf_mtx2, 1(vi_dest_ptr) | nop
|
|
// sq.xyzw vf_res13, 2(vi_dest_ptr) | nop
|
|
// mtir vi_dest_ptr, vf_vtx2.w | nop
|
|
// lq.xyzw vf_clr0, 838(vi_ind) | addx.w vi_vtx3, vi_vtx3, vf_gifbufs
|
|
// div Q, vf00.w, vf_pos02.w | mul.xyz vf_pos13, vf_pos13, Q
|
|
// sq.xyzw vf_tex2, 0(vi_dest_ptr) | mul.xyz vi_tex3, vi_tex3, Q
|
|
// sq.xyzw vf_mtx3, 1(vi_dest_ptr) | addx.w vi_tex3, vi_tex3, vf_gifbufs
|
|
// sq.xyzw vf_res02, 2(vi_dest_ptr) | nop
|
|
// b L14 | ftoi4.xyz vf_res13, vf_pos13
|
|
// mtir vi_dest_ptr, vi_vtx3.w | nop
|
|
|
|
// bp2 chunk (out of 4)
|
|
// lqi.xyzw vf_vtx1, vi_point_ptr | nop
|
|
// mtir vi_ind, vf_inds.y | nop
|
|
// mtir vi13, vi_tex3.w | mulaw.xyzw ACC, vf_clr2, vf00
|
|
// sq.xyzw vi_tex3, 0(vi_dest_ptr) | addx.w vtx_0, vtx_0, vf_gifbufs
|
|
// sq.xyzw vi_clr3, 1(vi_dest_ptr) | maddax.xyzw ACC, vf_mtx0, vf_vtx1
|
|
// sq.xyzw vf_res13, 2(vi_dest_ptr) | madday.xyzw ACC, vf_mtx1, vf_vtx1
|
|
// lqi.xyzw vf_tex1, vi_point_ptr | maddz.xyzw vf_pos13, vf_clr1, vf_vtx1
|
|
// lq.xyzw vf_mtx2, 838(vi_ind) | mul.xyz vf_pos02, vf_pos02, Q
|
|
// sq.xyzw vi_tex3, 0(vi13) | mul.xyz vf_tex0, vf_tex0, Q
|
|
// sq.xyzw vi_clr3, 1(vi13) | addx.w vf_tex0, vf_tex0, vf_gifbufs
|
|
// sq.xyzw vf_res13, 2(vi13) | nop
|
|
// div Q, vf00.w, vf_pos13.w | nop
|
|
// ibeq vi_tgt_bp2_ptr, vi_dest_ptr, L18 | ftoi4.xyz vf_res02, vf_pos02
|
|
// mtir vi_dest_ptr, vtx_0.w | nop
|
|
}
|
|
|
|
if (!frag.prog_info.skip_ips) {
|
|
// Sadly TIE has no special case for highest lod.
|
|
// this is surprising to me, but really does seem to be the case.
|
|
|
|
// L31
|
|
// lqi.xyzw vf_vtx1, vi_point_ptr | mulaw.xyzw ACC, vf_clr2, vf00
|
|
// lqi.xyz vf_xyofs, vi_point_ptr | maddax.xyzw ACC, vf_mtx0, vtx_0
|
|
// lqi.xyzw vf_tex1, vi_point_ptr | madday.xyzw ACC, vf_mtx1, vtx_0
|
|
|
|
// we have an additional "xyofs" here, but otherwise similar
|
|
|
|
// mtir vi_dest_ptr, vf_vtx2.w | maddz.xyzw vf_pos02, vf_clr1, vtx_0
|
|
// as usual, using vtx.w for dest pointer.
|
|
|
|
// mtir vi_ind, vf_inds.x | mulaw.xyzw ACC, vf_clr_val1, vf_morph
|
|
// mtir vi10, vf_inds.y | maddz.xyzw vf_clr0, vf_clr0, vf_morph
|
|
// mtir vi11, vf_inds.z | mulx.xyz vf_vtx1, vf_vtx1, vf_morph
|
|
// inds works differently. There is a qw per vertex, containing 3 indices.
|
|
// the formula is a pain, so I will ignore it for today.
|
|
// ideally we can figure out the constant value of vf_morph first, to simplify all this.
|
|
//
|
|
|
|
// sq.xyzw vf_tex2, 0(vi_dest_ptr) | mul.xyz vf_res13, vf_pos13, Q
|
|
// lq.xyzw vf_mtx2, 838(vi_ind) | mul.xyz vi_tex3, vi_tex3, Q
|
|
// lq.xyzw vf_clr_val1, 838(vi10) | nop
|
|
// lq.xyzw vf_clr_val2, 838(vi11) | nop
|
|
// div Q, vf00.w, vf_pos02.w | ftoi4.xyz vf_res13, vf_res13
|
|
// sq.xyzw vf_mtx3, 1(vi_dest_ptr) | add.xyzw vf_vtx1, vf_vtx1, vf_xyofs
|
|
// lqi.xyzw vf_inds, vi_clr_ptr | mulay.xyzw ACC, vf_clr_val1, vf_morph
|
|
// ibeq vi_tgt_ip1_ptr, vi_dest_ptr, L35 | nop
|
|
// sq.xyzw vf_res02, 2(vi_dest_ptr) | maddy.xyzw vf_clr_val1, vf_clr_val2, vf_morph
|
|
|
|
int base = get_fancy_base(draw_1_count, draw_2_count);
|
|
|
|
while (dest_ptr != tgt_ip1_ptr) {
|
|
// todo - might be some rounding here.
|
|
u32 clr_idx_idx = base + ip_1_count * 4 + 0;
|
|
auto vert_pos = frag.lq_points(point_ptr);
|
|
point_ptr++;
|
|
auto xy_offs = frag.lq_points(point_ptr);
|
|
point_ptr++;
|
|
float vtx_w = vert_pos.w() + frag.prog_info.gifbufs.x();
|
|
dest_ptr = float_to_u16(vtx_w);
|
|
auto tex_coord = frag.lq_points(point_ptr);
|
|
point_ptr++;
|
|
|
|
TieProtoVertex vertex_info;
|
|
vertex_info.color_index_index = clr_idx_idx;
|
|
// random guess
|
|
vert_pos = xy_offs;
|
|
vertex_info.pos.x() = vert_pos.x();
|
|
vertex_info.pos.y() = vert_pos.y();
|
|
vertex_info.pos.z() = vert_pos.z();
|
|
vertex_info.tex.x() = tex_coord.x();
|
|
vertex_info.tex.y() = tex_coord.y();
|
|
vertex_info.tex.z() = tex_coord.z();
|
|
|
|
bool inserted = frag.vertex_by_dest_addr.insert({(u32)dest_ptr, vertex_info}).second;
|
|
ASSERT(inserted);
|
|
|
|
ip_1_count++;
|
|
}
|
|
|
|
bool first_iter = true;
|
|
while (dest_ptr != tgt_ip2_ptr) {
|
|
// todo - might be some rounding here.
|
|
u32 clr_idx_idx = base + ip_1_count * 4 + 0;
|
|
auto vert_pos = frag.lq_points(point_ptr);
|
|
point_ptr++;
|
|
auto xy_offs = frag.lq_points(point_ptr);
|
|
point_ptr++;
|
|
float vtx_w = vert_pos.w() + frag.prog_info.gifbufs.x();
|
|
dest_ptr = float_to_u16(vtx_w);
|
|
auto tex_coord = frag.lq_points(point_ptr);
|
|
point_ptr++;
|
|
float tex_w = tex_coord.w() + frag.prog_info.gifbufs.x();
|
|
u16 dest2_ptr = float_to_u16(tex_w);
|
|
|
|
TieProtoVertex vertex_info;
|
|
vertex_info.color_index_index = clr_idx_idx;
|
|
// random guess
|
|
vert_pos = xy_offs;
|
|
vertex_info.pos.x() = vert_pos.x();
|
|
vertex_info.pos.y() = vert_pos.y();
|
|
vertex_info.pos.z() = vert_pos.z();
|
|
vertex_info.tex.x() = tex_coord.x();
|
|
vertex_info.tex.y() = tex_coord.y();
|
|
vertex_info.tex.z() = tex_coord.z();
|
|
|
|
bool inserted = frag.vertex_by_dest_addr.insert({(u32)dest_ptr, vertex_info}).second;
|
|
ASSERT(inserted);
|
|
|
|
// first iteration of ip2 is a bit strange because how it jumps from loop to loop.
|
|
// in some cases it uses ip2 on a point that should have used ip1 with the same addr
|
|
// twice. I am pretty sure it's not our fault because we get exactly the right dvert.
|
|
bool inserted2 = frag.vertex_by_dest_addr.insert({(u32)dest2_ptr, vertex_info}).second;
|
|
if (!first_iter) {
|
|
ASSERT(inserted2);
|
|
}
|
|
first_iter = false;
|
|
ip_1_count++;
|
|
}
|
|
}
|
|
|
|
// now, let's check count:
|
|
ASSERT(frag.vertex_by_dest_addr.size() == frag.expected_dverts);
|
|
|
|
program_end:;
|
|
// ASSERT(false);
|
|
}
|
|
|
|
// }
|
|
}
|
|
}
|
|
|
|
// the final step of the VU program emulation is the "xgkick" instruction.
|
|
// there is a signal xgkick per fragment and it goes through the entire gif buf, hitting
|
|
// strgifs and adgifs. We look at the memory map for each frag and figure out which strips
|
|
// go with which adgifs, then copy vertices
|
|
void emulate_kicks(std::vector<TieProtoInfo>& protos) {
|
|
for (auto& proto : protos) {
|
|
for (auto& frag : proto.frags) {
|
|
// we iterate over both adgifs/stgifs. sometimes you can have multiple strgifs that use the
|
|
// same adgif. But we never expect to see multiple adgifs in a row.
|
|
auto adgif_it = frag.prog_info.adgif_offset_in_gif_buf_qw.begin();
|
|
auto adgif_end = frag.prog_info.adgif_offset_in_gif_buf_qw.end();
|
|
auto str_it = frag.prog_info.str_gifs.begin();
|
|
auto str_end = frag.prog_info.str_gifs.end();
|
|
|
|
// but, we should always start with an adgif (otherwise we'd use the draw settings from
|
|
// the last model, which we don't know)
|
|
ASSERT(frag.prog_info.adgif_offset_in_gif_buf_qw.at(0) == 0);
|
|
// and we expect that the VU program placed all adgifs somewhere
|
|
ASSERT(frag.prog_info.adgif_offset_in_gif_buf_qw.size() == frag.adgifs.size());
|
|
|
|
const AdgifInfo* adgif_info = nullptr;
|
|
int expected_next_tag = 0;
|
|
|
|
// loop over strgifs
|
|
while (str_it != str_end) {
|
|
// try to see if we got a adgif here
|
|
if (adgif_it != adgif_end && (*adgif_it) == expected_next_tag) {
|
|
// yep
|
|
int idx = adgif_it - frag.prog_info.adgif_offset_in_gif_buf_qw.begin();
|
|
adgif_info = &frag.adgifs.at(idx);
|
|
// the next strgif should come 6 qw's after
|
|
expected_next_tag += 6;
|
|
adgif_it++;
|
|
}
|
|
ASSERT(adgif_info);
|
|
|
|
// make sure the next str is where we expect
|
|
ASSERT(expected_next_tag == str_it->address);
|
|
// the next tag (either str/adgif) should be located at the end of this tag's data.
|
|
expected_next_tag += 3 * str_it->nloop + 1;
|
|
// here we have the right str and adgif.
|
|
|
|
// kinda stupid, but we have to guess the base address of the gifbuf we're using.
|
|
// 286 gifbuf
|
|
// 470 gifbuf again
|
|
// 654 ??
|
|
ASSERT(!frag.vertex_by_dest_addr.empty());
|
|
int gifbuf_addr = frag.vertex_by_dest_addr.begin()->first;
|
|
int base_address = 286;
|
|
if (gifbuf_addr >= 654) {
|
|
base_address = 654;
|
|
} else if (gifbuf_addr >= 470) {
|
|
base_address = 470;
|
|
}
|
|
|
|
// now, we can add the vertices!
|
|
frag.strips.emplace_back();
|
|
auto& strip = frag.strips.back();
|
|
strip.adgif = *adgif_info;
|
|
// loop over all the vertices the strgif says we'll have
|
|
for (int vtx = 0; vtx < str_it->nloop; vtx++) {
|
|
// compute the address of this vertex (stored after the strgif)
|
|
u32 vtx_addr = str_it->address + 1 + (3 * vtx) + base_address;
|
|
// and grab it from the vertex map we made earlier.
|
|
strip.verts.push_back(frag.vertex_by_dest_addr.at(vtx_addr));
|
|
}
|
|
|
|
str_it++;
|
|
}
|
|
|
|
ASSERT(adgif_it == adgif_end);
|
|
}
|
|
}
|
|
}
|
|
|
|
// from here on, we are mostly converting the "info" formats to the C++ renderer format (tfrag3)
|
|
|
|
/*!
|
|
* Just used to debug, save a proto as an .obj mesh file.
|
|
*/
|
|
std::string debug_dump_proto_to_obj(const TieProtoInfo& proto) {
|
|
std::vector<math::Vector<float, 3>> verts;
|
|
std::vector<math::Vector<float, 2>> tcs;
|
|
std::vector<math::Vector<int, 3>> faces;
|
|
|
|
for (auto& frag : proto.frags) {
|
|
for (auto& strip : frag.strips) {
|
|
// add verts...
|
|
ASSERT(strip.verts.size() >= 3);
|
|
|
|
int vert_idx = 0;
|
|
|
|
int vtx_idx_queue[3];
|
|
|
|
int q_idx = 0;
|
|
int startup = 0;
|
|
while (vert_idx < (int)strip.verts.size()) {
|
|
verts.push_back(strip.verts.at(vert_idx).pos / 65536); // no idea
|
|
tcs.push_back(math::Vector<float, 2>{strip.verts.at(vert_idx).tex.x(),
|
|
strip.verts.at(vert_idx).tex.y()});
|
|
vert_idx++;
|
|
vtx_idx_queue[q_idx++] = verts.size();
|
|
|
|
// wrap the index
|
|
if (q_idx == 3) {
|
|
q_idx = 0;
|
|
}
|
|
|
|
// bump the startup
|
|
if (startup < 3) {
|
|
startup++;
|
|
}
|
|
|
|
if (startup >= 3) {
|
|
faces.push_back(
|
|
math::Vector<int, 3>{vtx_idx_queue[0], vtx_idx_queue[1], vtx_idx_queue[2]});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
std::string result;
|
|
for (auto& vert : verts) {
|
|
result += fmt::format("v {} {} {}\n", vert.x(), vert.y(), vert.z());
|
|
}
|
|
for (auto& tc : tcs) {
|
|
result += fmt::format("vt {} {}\n", tc.x(), tc.y());
|
|
}
|
|
for (auto& face : faces) {
|
|
result += fmt::format("f {}/{} {}/{} {}/{}\n", face.x(), face.x(), face.y(), face.y(), face.z(),
|
|
face.z());
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*!
|
|
* Transform a point in a prototype to the actual point location in the game world.
|
|
*/
|
|
math::Vector<float, 3> transform_tie(const std::array<math::Vector4f, 4> mat,
|
|
const math::Vector3f& pt) {
|
|
auto temp = mat[0] * pt.x() + mat[1] * pt.y() + mat[2] * pt.z() + mat[3];
|
|
math::Vector3f result;
|
|
result.x() = temp.x();
|
|
result.y() = temp.y();
|
|
result.z() = temp.z();
|
|
return result;
|
|
}
|
|
|
|
/*!
|
|
* Dump the entire tie tree to an obj. Used to debug the transform_tie function. If we get this
|
|
* right, it should fit in with .obj's produced from the tfrag debug.
|
|
*/
|
|
std::string dump_full_to_obj(const std::vector<TieProtoInfo>& protos) {
|
|
std::vector<math::Vector<float, 3>> verts;
|
|
std::vector<math::Vector<float, 2>> tcs;
|
|
std::vector<math::Vector<int, 3>> faces;
|
|
|
|
for (auto& proto : protos) {
|
|
for (auto& inst : proto.instances) {
|
|
auto& mat = inst.mat;
|
|
for (auto& frag : proto.frags) {
|
|
for (auto& strip : frag.strips) {
|
|
// add verts...
|
|
ASSERT(strip.verts.size() >= 3);
|
|
|
|
int vert_idx = 0;
|
|
|
|
int vtx_idx_queue[3];
|
|
|
|
int q_idx = 0;
|
|
int startup = 0;
|
|
while (vert_idx < (int)strip.verts.size()) {
|
|
verts.push_back(transform_tie(mat, strip.verts.at(vert_idx).pos) / 65536); // no idea
|
|
tcs.push_back(math::Vector<float, 2>{strip.verts.at(vert_idx).tex.x(),
|
|
strip.verts.at(vert_idx).tex.y()});
|
|
vert_idx++;
|
|
vtx_idx_queue[q_idx++] = verts.size();
|
|
|
|
// wrap the index
|
|
if (q_idx == 3) {
|
|
q_idx = 0;
|
|
}
|
|
|
|
// bump the startup
|
|
if (startup < 3) {
|
|
startup++;
|
|
}
|
|
|
|
if (startup >= 3) {
|
|
faces.push_back(
|
|
math::Vector<int, 3>{vtx_idx_queue[0], vtx_idx_queue[1], vtx_idx_queue[2]});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
std::string result;
|
|
for (auto& vert : verts) {
|
|
result += fmt::format("v {} {} {}\n", vert.x(), vert.y(), vert.z());
|
|
}
|
|
for (auto& tc : tcs) {
|
|
result += fmt::format("vt {} {}\n", tc.x(), tc.y());
|
|
}
|
|
for (auto& face : faces) {
|
|
result += fmt::format("f {}/{} {}/{} {}/{}\n", face.x(), face.x(), face.y(), face.y(), face.z(),
|
|
face.z());
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
// The time of day stuff has a lot of lookups
|
|
// Each prototype has a palette. This palette is generated based on the time of day, blending
|
|
// together 8 colors from 8 times.
|
|
|
|
// Each instance is made up of fragments.
|
|
// The instance provides a color list per fragment. These are indices into the palette.
|
|
|
|
// So, to know the color we need:
|
|
// - which prototype
|
|
// - which instance
|
|
// - which fragment
|
|
// - which color within the fragment
|
|
// and this tells us an index in the time of day palette.
|
|
|
|
struct BigPalette {
|
|
std::vector<tfrag3::TimeOfDayColor> colors;
|
|
};
|
|
|
|
// combine all individual time of day palettes into one giant one.
|
|
BigPalette make_big_palette(std::vector<TieProtoInfo>& protos) {
|
|
BigPalette result;
|
|
|
|
for (u32 proto_idx = 0; proto_idx < protos.size(); proto_idx++) {
|
|
auto& proto = protos[proto_idx];
|
|
u32 base_color_of_proto = result.colors.size();
|
|
|
|
// add all colors
|
|
for (auto& color : proto.time_of_day_colors) {
|
|
result.colors.push_back(color);
|
|
}
|
|
|
|
for (u32 instance_idx = 0; instance_idx < proto.instances.size(); instance_idx++) {
|
|
auto& instance = proto.instances[instance_idx];
|
|
ASSERT(proto.frags.size() == instance.frags.size());
|
|
for (u32 frag_idx = 0; frag_idx < proto.frags.size(); frag_idx++) {
|
|
auto& ifrag = instance.frags.at(frag_idx);
|
|
ifrag.color_index_offset_in_big_palette = base_color_of_proto;
|
|
}
|
|
}
|
|
}
|
|
|
|
ASSERT(result.colors.size() < UINT16_MAX);
|
|
return result;
|
|
}
|
|
|
|
/*!
|
|
* Given a current draw mode, update the alpha settings from a gs-alpha register value.
|
|
*/
|
|
void update_mode_from_alpha1(u64 val, DrawMode& mode) {
|
|
GsAlpha reg(val);
|
|
if (reg.a_mode() == GsAlpha::BlendMode::SOURCE && reg.b_mode() == GsAlpha::BlendMode::DEST &&
|
|
reg.c_mode() == GsAlpha::BlendMode::SOURCE && reg.d_mode() == GsAlpha::BlendMode::DEST) {
|
|
// (Cs - Cd) * As + Cd
|
|
// Cs * As + (1 - As) * Cd
|
|
mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_DST_SRC_DST);
|
|
|
|
} else if (reg.a_mode() == GsAlpha::BlendMode::SOURCE &&
|
|
reg.b_mode() == GsAlpha::BlendMode::ZERO_OR_FIXED &&
|
|
reg.c_mode() == GsAlpha::BlendMode::SOURCE &&
|
|
reg.d_mode() == GsAlpha::BlendMode::DEST) {
|
|
// (Cs - 0) * As + Cd
|
|
// Cs * As + (1) * CD
|
|
mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_0_SRC_DST);
|
|
} else if (reg.a_mode() == GsAlpha::BlendMode::SOURCE &&
|
|
reg.b_mode() == GsAlpha::BlendMode::ZERO_OR_FIXED &&
|
|
reg.c_mode() == GsAlpha::BlendMode::ZERO_OR_FIXED &&
|
|
reg.d_mode() == GsAlpha::BlendMode::DEST) {
|
|
ASSERT(reg.fix() == 128);
|
|
// Cv = (Cs - 0) * FIX + Cd
|
|
// if fix = 128, it works out to 1.0
|
|
mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_0_FIX_DST);
|
|
// src plus dest
|
|
} else if (reg.a_mode() == GsAlpha::BlendMode::SOURCE &&
|
|
reg.b_mode() == GsAlpha::BlendMode::DEST &&
|
|
reg.c_mode() == GsAlpha::BlendMode::ZERO_OR_FIXED &&
|
|
reg.d_mode() == GsAlpha::BlendMode::DEST) {
|
|
// Cv = (Cs - Cd) * FIX + Cd
|
|
ASSERT(reg.fix() == 64);
|
|
mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_DST_FIX_DST);
|
|
}
|
|
|
|
else {
|
|
lg::error("unsupported blend: a {} b {} c {} d {}", (int)reg.a_mode(), (int)reg.b_mode(),
|
|
(int)reg.c_mode(), (int)reg.d_mode());
|
|
mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_DST_SRC_DST);
|
|
ASSERT(false);
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* Convert adgif info into a C++ renderer DrawMode.
|
|
*/
|
|
DrawMode process_draw_mode(const AdgifInfo& info, bool use_atest, bool use_decal) {
|
|
DrawMode mode;
|
|
// some of these are set up once as part of tie initialization
|
|
mode.set_alpha_test(DrawMode::AlphaTest::GEQUAL);
|
|
|
|
// the atest giftag is set up at the end of the VU program.
|
|
if (use_atest) {
|
|
mode.enable_at();
|
|
mode.set_aref(0x26);
|
|
mode.set_alpha_fail(GsTest::AlphaFail::KEEP);
|
|
mode.set_alpha_test(DrawMode::AlphaTest::GEQUAL);
|
|
} else {
|
|
mode.disable_at();
|
|
}
|
|
if (use_decal) {
|
|
mode.enable_decal();
|
|
}
|
|
// set up once.
|
|
mode.enable_depth_write();
|
|
mode.enable_zt(); // :zte #x1
|
|
mode.set_depth_test(GsTest::ZTest::GEQUAL); // :ztst (gs-ztest greater-equal))
|
|
mode.disable_ab();
|
|
mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_DST_SRC_DST);
|
|
|
|
// the alpha matters
|
|
update_mode_from_alpha1(info.alpha_val, mode);
|
|
|
|
// the clamp matters
|
|
if (!(info.clamp_val == 0b101 || info.clamp_val == 0 || info.clamp_val == 1 ||
|
|
info.clamp_val == 0b100)) {
|
|
ASSERT_MSG(false, fmt::format("clamp: 0x{:x}", info.clamp_val));
|
|
}
|
|
|
|
mode.set_clamp_s_enable(info.clamp_val & 0b1);
|
|
mode.set_clamp_t_enable(info.clamp_val & 0b100);
|
|
|
|
return mode;
|
|
}
|
|
|
|
/*!
|
|
* Convert TieProtoInfo's to C++ renderer format
|
|
*/
|
|
void add_vertices_and_static_draw(tfrag3::TieTree& tree,
|
|
tfrag3::Level& lev,
|
|
const TextureDB& tdb,
|
|
const std::vector<TieProtoInfo>& protos) {
|
|
// our current approach for static draws is just to flatten to giant mesh, except for wind stuff.
|
|
// this map sorts these two types of draws by texture.
|
|
std::unordered_map<u32, std::vector<u32>> static_draws_by_tex;
|
|
std::unordered_map<u32, std::vector<u32>> wind_draws_by_tex;
|
|
|
|
// loop over all prototypes
|
|
for (auto& proto : protos) {
|
|
if (proto.uses_generic) {
|
|
// generic ties go through generic
|
|
continue;
|
|
}
|
|
// bool using_wind = true; // hack, for testing
|
|
bool using_wind = proto.stiffness != 0.f;
|
|
|
|
// create the model first
|
|
std::vector<std::vector<std::pair<int, int>>> packed_vert_indices;
|
|
for (size_t frag_idx = 0; frag_idx < proto.frags.size(); frag_idx++) {
|
|
packed_vert_indices.emplace_back();
|
|
auto& frag_vert_indices = packed_vert_indices.back();
|
|
auto& frag = proto.frags[frag_idx]; // shared info for all instances of this frag
|
|
for (auto& strip : frag.strips) {
|
|
int start = tree.packed_vertices.vertices.size();
|
|
for (auto& vert : strip.verts) {
|
|
tree.packed_vertices.vertices.push_back(
|
|
{vert.pos.x(), vert.pos.y(), vert.pos.z(), vert.tex.x(), vert.tex.y()});
|
|
lg::warn("SKIPPING ASSERT in extract_tie! TODO!");
|
|
// ASSERT(vert.tex.z() == 1.);
|
|
}
|
|
int end = tree.packed_vertices.vertices.size();
|
|
frag_vert_indices.emplace_back(start, end);
|
|
}
|
|
}
|
|
|
|
// loop over instances of the prototypes
|
|
for (auto& inst : proto.instances) {
|
|
// if we're using wind, we use the instanced renderer, which requires some extra info
|
|
// and we should remember which instance ID we are.
|
|
// Note: this is different from the game's instance index - we don't draw everything instanced
|
|
// so the non-instanced models don't get a C++ renderer instance ID
|
|
u32 wind_instance_idx = tree.wind_instance_info.size();
|
|
u32 matrix_idx = tree.packed_vertices.matrices.size();
|
|
if (using_wind) {
|
|
tfrag3::TieWindInstance wind_instance_info;
|
|
wind_instance_info.wind_idx = inst.wind_index; // which wind value to apply in the table
|
|
wind_instance_info.stiffness = proto.stiffness; // wind stiffness (how much we move)
|
|
wind_instance_info.matrix = inst.mat; // instance transformation matrix.
|
|
tree.wind_instance_info.push_back(wind_instance_info);
|
|
} else {
|
|
tree.packed_vertices.matrices.push_back(inst.mat);
|
|
}
|
|
|
|
// loop over fragments of the prototype
|
|
for (size_t frag_idx = 0; frag_idx < proto.frags.size(); frag_idx++) {
|
|
auto& frag = proto.frags[frag_idx]; // shared info for all instances of this frag
|
|
auto& ifrag = inst.frags.at(frag_idx); // color info for this instance of the frag
|
|
// loop over triangle strips within the fragment
|
|
for (size_t strip_idx = 0; strip_idx < frag.strips.size(); strip_idx++) {
|
|
auto& strip = frag.strips[strip_idx];
|
|
// what texture are we using?
|
|
u32 combo_tex = strip.adgif.combo_tex;
|
|
|
|
// try looking it up in the existing textures that we have in the C++ renderer data.
|
|
// (this is shared with tfrag)
|
|
u32 idx_in_lev_data = UINT32_MAX;
|
|
for (u32 i = 0; i < lev.textures.size(); i++) {
|
|
if (lev.textures[i].combo_id == combo_tex) {
|
|
idx_in_lev_data = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (idx_in_lev_data == UINT32_MAX) {
|
|
// didn't find it, have to add a new one texture.
|
|
auto tex_it = tdb.textures.find(combo_tex);
|
|
if (tex_it == tdb.textures.end()) {
|
|
bool ok_to_miss = false; // for TIE, there's no missing textures.
|
|
if (ok_to_miss) {
|
|
// we're missing a texture, just use the first one.
|
|
tex_it = tdb.textures.begin();
|
|
} else {
|
|
ASSERT_MSG(
|
|
false,
|
|
fmt::format(
|
|
"texture {} wasn't found. make sure it is loaded somehow. You may need to "
|
|
"include ART.DGO or GAME.DGO in addition to the level DGOs for shared "
|
|
"textures. tpage is {}. id is {} (0x{:x})",
|
|
combo_tex, combo_tex >> 16, combo_tex & 0xffff, combo_tex & 0xffff));
|
|
}
|
|
}
|
|
// add a new texture to the level data
|
|
idx_in_lev_data = lev.textures.size();
|
|
lev.textures.emplace_back();
|
|
auto& new_tex = lev.textures.back();
|
|
new_tex.combo_id = combo_tex;
|
|
new_tex.w = tex_it->second.w;
|
|
new_tex.h = tex_it->second.h;
|
|
new_tex.debug_name = tex_it->second.name;
|
|
new_tex.debug_tpage_name = tdb.tpage_names.at(tex_it->second.page);
|
|
new_tex.data = tex_it->second.rgba_bytes;
|
|
}
|
|
|
|
// determine the draw mode
|
|
DrawMode mode =
|
|
process_draw_mode(strip.adgif, frag.prog_info.misc_x == 0, frag.has_magic_tex0_bit);
|
|
|
|
if (using_wind) {
|
|
// okay, we now have a texture and draw mode, let's see if we can add to an existing...
|
|
auto existing_draws_in_tex = wind_draws_by_tex.find(idx_in_lev_data);
|
|
tfrag3::InstancedStripDraw* draw_to_add_to = nullptr;
|
|
if (existing_draws_in_tex != wind_draws_by_tex.end()) {
|
|
for (auto idx : existing_draws_in_tex->second) {
|
|
if (tree.instanced_wind_draws.at(idx).mode == mode) {
|
|
draw_to_add_to = &tree.instanced_wind_draws[idx];
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!draw_to_add_to) {
|
|
// nope no existing draw for these settings, need to create a new draw
|
|
tree.instanced_wind_draws.emplace_back();
|
|
wind_draws_by_tex[idx_in_lev_data].push_back(tree.instanced_wind_draws.size() - 1);
|
|
draw_to_add_to = &tree.instanced_wind_draws.back();
|
|
draw_to_add_to->mode = mode;
|
|
draw_to_add_to->tree_tex_id = idx_in_lev_data;
|
|
}
|
|
|
|
// now we have a draw, time to add vertices. We make a vertex "group" which is a group
|
|
// of vertices that the renderer can decide to not draw based on visibility data.
|
|
tfrag3::InstancedStripDraw::InstanceGroup igroup;
|
|
// needs to be associated with this instance.
|
|
igroup.vis_idx = inst.vis_id; // associate with the instance for culling
|
|
// number of vertices. The +1 is for the primitive restart index, which tells opengl
|
|
// that the triangle strip is done.
|
|
igroup.num = strip.verts.size() + 1;
|
|
// groups for instances also need the instance idx to grab the appropriate wind/matrix
|
|
// data.
|
|
igroup.instance_idx = wind_instance_idx;
|
|
draw_to_add_to->num_triangles += strip.verts.size() - 2;
|
|
// note: this is a bit wasteful to duplicate the xyz/stq.
|
|
tfrag3::PackedTieVertices::MatrixGroup grp;
|
|
grp.matrix_idx = -1;
|
|
grp.start_vert = packed_vert_indices.at(frag_idx).at(strip_idx).first;
|
|
grp.end_vert = packed_vert_indices.at(frag_idx).at(strip_idx).second;
|
|
tree.packed_vertices.matrix_groups.push_back(grp);
|
|
for (auto& vert : strip.verts) {
|
|
u16 color_index = 0;
|
|
if (vert.color_index_index == UINT32_MAX) {
|
|
color_index = 0;
|
|
} else {
|
|
color_index = ifrag.color_indices.at(vert.color_index_index);
|
|
ASSERT(vert.color_index_index < ifrag.color_indices.size());
|
|
color_index += ifrag.color_index_offset_in_big_palette;
|
|
}
|
|
|
|
size_t vert_idx = tree.packed_vertices.color_indices.size();
|
|
tree.packed_vertices.color_indices.push_back(color_index);
|
|
draw_to_add_to->vertex_index_stream.push_back(vert_idx);
|
|
}
|
|
|
|
// the primitive restart index
|
|
draw_to_add_to->vertex_index_stream.push_back(UINT32_MAX);
|
|
draw_to_add_to->instance_groups.push_back(igroup);
|
|
|
|
} else {
|
|
// okay, we now have a texture and draw mode, let's see if we can add to an existing...
|
|
auto existing_draws_in_tex = static_draws_by_tex.find(idx_in_lev_data);
|
|
tfrag3::StripDraw* draw_to_add_to = nullptr;
|
|
if (existing_draws_in_tex != static_draws_by_tex.end()) {
|
|
for (auto idx : existing_draws_in_tex->second) {
|
|
if (tree.static_draws.at(idx).mode == mode) {
|
|
draw_to_add_to = &tree.static_draws[idx];
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!draw_to_add_to) {
|
|
// nope, need to create a new draw
|
|
tree.static_draws.emplace_back();
|
|
static_draws_by_tex[idx_in_lev_data].push_back(tree.static_draws.size() - 1);
|
|
draw_to_add_to = &tree.static_draws.back();
|
|
draw_to_add_to->mode = mode;
|
|
draw_to_add_to->tree_tex_id = idx_in_lev_data;
|
|
}
|
|
|
|
// now we have a draw, time to add vertices
|
|
tfrag3::StripDraw::VisGroup vgroup;
|
|
vgroup.vis_idx_in_pc_bvh = inst.vis_id; // associate with the instance for culling
|
|
vgroup.num_inds = strip.verts.size() + 1; // one for the primitive restart!
|
|
vgroup.num_tris = strip.verts.size() - 2;
|
|
draw_to_add_to->num_triangles += strip.verts.size() - 2;
|
|
tfrag3::PackedTieVertices::MatrixGroup grp;
|
|
grp.matrix_idx = matrix_idx;
|
|
grp.start_vert = packed_vert_indices.at(frag_idx).at(strip_idx).first;
|
|
grp.end_vert = packed_vert_indices.at(frag_idx).at(strip_idx).second;
|
|
tree.packed_vertices.matrix_groups.push_back(grp);
|
|
tfrag3::StripDraw::VertexRun run;
|
|
run.vertex0 = tree.packed_vertices.color_indices.size();
|
|
run.length = strip.verts.size();
|
|
for (auto& vert : strip.verts) {
|
|
u16 color_index = 0;
|
|
if (vert.color_index_index == UINT32_MAX) {
|
|
color_index = 0;
|
|
} else {
|
|
color_index = ifrag.color_indices.at(vert.color_index_index);
|
|
ASSERT(vert.color_index_index < ifrag.color_indices.size());
|
|
color_index += ifrag.color_index_offset_in_big_palette;
|
|
}
|
|
|
|
tree.packed_vertices.color_indices.push_back(color_index);
|
|
// draw_to_add_to->vertex_index_stream.push_back(vert_idx);
|
|
}
|
|
draw_to_add_to->runs.push_back(run);
|
|
// draw_to_add_to->vertex_index_stream.push_back(UINT32_MAX);
|
|
draw_to_add_to->vis_groups.push_back(vgroup);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// sort draws by texture. no idea if this really matters, but will reduce the number of
|
|
// times the renderer changes textures. it at least makes the rendererdoc debugging easier.
|
|
std::stable_sort(tree.static_draws.begin(), tree.static_draws.end(),
|
|
[](const tfrag3::StripDraw& a, const tfrag3::StripDraw& b) {
|
|
return a.tree_tex_id < b.tree_tex_id;
|
|
});
|
|
}
|
|
|
|
/*!
|
|
* The groups are created per-fragment, but usually you have a few fragments per instance, so there
|
|
* are often consecutive groups that can be merged.
|
|
*/
|
|
void merge_groups(std::vector<tfrag3::InstancedStripDraw::InstanceGroup>& grps) {
|
|
std::vector<tfrag3::InstancedStripDraw::InstanceGroup> result;
|
|
result.push_back(grps.at(0));
|
|
for (size_t i = 1; i < grps.size(); i++) {
|
|
if (grps[i].vis_idx == result.back().vis_idx &&
|
|
grps[i].instance_idx == result.back().instance_idx) {
|
|
result.back().num += grps[i].num;
|
|
} else {
|
|
result.push_back(grps[i]);
|
|
}
|
|
}
|
|
std::swap(result, grps);
|
|
}
|
|
|
|
void merge_groups(std::vector<tfrag3::StripDraw::VisGroup>& grps) {
|
|
std::vector<tfrag3::StripDraw::VisGroup> result;
|
|
result.push_back(grps.at(0));
|
|
for (size_t i = 1; i < grps.size(); i++) {
|
|
if (grps[i].vis_idx_in_pc_bvh == result.back().vis_idx_in_pc_bvh) {
|
|
result.back().num_tris += grps[i].num_tris;
|
|
result.back().num_inds += grps[i].num_inds;
|
|
} else {
|
|
result.push_back(grps[i]);
|
|
}
|
|
}
|
|
std::swap(result, grps);
|
|
}
|
|
|
|
void extract_tie(const level_tools::DrawableTreeInstanceTie* tree,
|
|
const std::string& debug_name,
|
|
const std::vector<level_tools::TextureRemap>& tex_map,
|
|
const TextureDB& tex_db,
|
|
tfrag3::Level& out,
|
|
bool dump_level,
|
|
GameVersion version) {
|
|
for (int geo = 0; geo < GEOM_MAX; ++geo) {
|
|
tfrag3::TieTree this_tree;
|
|
|
|
// sanity check the vis tree (not a perfect check, but this is used in game and should be right)
|
|
ASSERT(tree->length == (int)tree->arrays.size());
|
|
ASSERT(tree->length > 0);
|
|
auto last_array = tree->arrays.back().get();
|
|
auto as_instance_array = dynamic_cast<level_tools::DrawableInlineArrayInstanceTie*>(last_array);
|
|
ASSERT(as_instance_array);
|
|
ASSERT(as_instance_array->length == (int)as_instance_array->instances.size());
|
|
ASSERT(as_instance_array->length > 0);
|
|
u16 idx = as_instance_array->instances.front().id;
|
|
for (auto& elt : as_instance_array->instances) {
|
|
ASSERT(elt.id == idx);
|
|
idx++;
|
|
}
|
|
bool ok = verify_node_indices(tree);
|
|
ASSERT(ok);
|
|
|
|
// extract the vis tree. Note that this extracts the tree only down to the last draw node, a
|
|
// parent of between 1 and 8 instances.
|
|
extract_vis_data(tree, as_instance_array->instances.front().id, this_tree);
|
|
|
|
// we use the index of the instance in the instance list as its index. But this is different
|
|
// from its visibility index. This map goes from instance index to the parent node in the vis
|
|
// tree. later, we can use this to remap from instance idx to the visiblity node index.
|
|
std::unordered_map<int, int> instance_parents;
|
|
for (size_t node_idx = 0; node_idx < this_tree.bvh.vis_nodes.size(); node_idx++) {
|
|
const auto& node = this_tree.bvh.vis_nodes[node_idx];
|
|
if (node.flags == 0) {
|
|
for (int i = 0; i < node.num_kids; i++) {
|
|
instance_parents[node.child_id + i] = node_idx;
|
|
}
|
|
}
|
|
}
|
|
|
|
// convert level format data to a nicer format
|
|
auto info =
|
|
collect_instance_info(as_instance_array, &tree->prototypes.prototype_array_tie.data, geo);
|
|
update_proto_info(&info, tex_map, tex_db, tree->prototypes.prototype_array_tie.data, geo);
|
|
if (version != GameVersion::Jak2) {
|
|
check_wind_vectors_zero(info, tree->prototypes.wind_vectors);
|
|
}
|
|
// determine draws from VU program
|
|
emulate_tie_prototype_program(info);
|
|
emulate_tie_instance_program(info);
|
|
emulate_kicks(info);
|
|
|
|
// debug save to .obj
|
|
if (dump_level) {
|
|
auto dir =
|
|
file_util::get_file_path({fmt::format("debug_out/lod{}-tie-{}/", geo, debug_name)});
|
|
file_util::create_dir_if_needed(dir);
|
|
for (auto& proto : info) {
|
|
auto data = debug_dump_proto_to_obj(proto);
|
|
file_util::write_text_file(fmt::format("{}/{}.obj", dir, proto.name), data);
|
|
}
|
|
|
|
auto full = dump_full_to_obj(info);
|
|
file_util::write_text_file(fmt::format("{}/ALL.obj", dir), full);
|
|
}
|
|
|
|
// create time of day data.
|
|
auto full_palette = make_big_palette(info);
|
|
|
|
// create draws
|
|
add_vertices_and_static_draw(this_tree, out, tex_db, info);
|
|
|
|
// remap vis indices and merge
|
|
for (auto& draw : this_tree.static_draws) {
|
|
for (auto& str : draw.vis_groups) {
|
|
auto it = instance_parents.find(str.vis_idx_in_pc_bvh);
|
|
if (it == instance_parents.end()) {
|
|
str.vis_idx_in_pc_bvh = UINT32_MAX;
|
|
} else {
|
|
str.vis_idx_in_pc_bvh = it->second;
|
|
}
|
|
}
|
|
merge_groups(draw.vis_groups);
|
|
}
|
|
|
|
for (auto& draw : this_tree.instanced_wind_draws) {
|
|
for (auto& str : draw.instance_groups) {
|
|
auto it = instance_parents.find(str.vis_idx);
|
|
if (it == instance_parents.end()) {
|
|
str.vis_idx = UINT32_MAX;
|
|
} else {
|
|
str.vis_idx = it->second;
|
|
}
|
|
}
|
|
|
|
merge_groups(draw.instance_groups);
|
|
}
|
|
|
|
this_tree.colors = full_palette.colors;
|
|
out.tie_trees[geo].push_back(std::move(this_tree));
|
|
}
|
|
}
|
|
} // namespace decompiler
|