#include "audio_formats.h" #include "common/log/log.h" #include "common/util/BinaryWriter.h" #include "fmt/core.h" /*! * Write a wave file from a vector of samples. */ void write_wave_file(const std::vector& left_samples, const std::vector& right_samples, s32 sample_rate, const fs::path& name) { WaveFileHeader header; memcpy(header.chunk_id, "RIFF", 4); header.chunk_size = 36 + ((left_samples.size() + right_samples.size()) * sizeof(s16)); memcpy(header.format, "WAVE", 4); // now the format memcpy(header.subchunk1_id, "fmt ", 4); header.subchunk1_size = 16; header.aud_format = 1; if (right_samples.empty()) { header.num_channels = 1; // mono } else { header.num_channels = 2; // stereo } header.sample_rate = sample_rate; header.byte_rate = sample_rate * header.num_channels * sizeof(s16); header.block_align = header.num_channels * sizeof(s16); header.bits_per_sample = 16; memcpy(header.subchunk2_id, "data", 4); header.subchunk2_size = (left_samples.size() + right_samples.size()) * sizeof(s16); BinaryWriter writer; writer.add(header); if (right_samples.empty()) { for (const auto& sample : left_samples) { writer.add(sample); } } else { for (size_t i = 0; i < left_samples.size(); i++) { writer.add(left_samples.at(i)); if (i < right_samples.size()) { writer.add(right_samples.at(i)); } else { writer.add(0); } } } writer.write_to_file(name); } std::pair, std::vector> decode_adpcm(BinaryReader& reader, const bool stereo) { std::vector left_samples; std::vector right_samples; s32 left_sample_prev[2] = {0, 0}; s32 right_sample_prev[2] = {0, 0}; bool first_left = true; bool first_right = true; constexpr s32 f1[5] = {0, 60, 115, 98, 122}; constexpr s32 f2[5] = {0, 0, -52, -55, -60}; [[maybe_unused]] int block_idx = 0; // 16 byte blocks int bytes_read = reader.get_seek(); // we've already read n bytes into the file // Jak VAG's don't interleave the samples because of course they don't // instead they are partitioned into contiguous 8kb (thats 8192 bytes) chunks // alternating left/right bool processing_left_chunk = true; // We need to skip the vag header for each channel if (first_left) { reader.ffwd(48); bytes_read += 48; first_left = false; } while (true) { if (!reader.bytes_left()) { break; } if (stereo && bytes_read == 0x2000) { // switch streams processing_left_chunk = !processing_left_chunk; bytes_read = 0; // We need to skip the vag header for each channel if (first_right) { // skip the header reader.ffwd(48); bytes_read += 48; first_right = false; } } u8 shift_filter = reader.read(); u8 shift = shift_filter & 0b1111; u8 filter = shift_filter >> 4; u8 flags = reader.read(); (void)flags; // removed assertions here (and that's probably why the audio doesn't sound right) u8 input_buffer[14]; for (int i = 0; i < 14; i++) { input_buffer[i] = reader.read(); } for (int i = 0; i < 28; i++) { int16_t nibble = input_buffer[i / 2]; if (i % 2 == 0) { nibble = (nibble & 0x0f); } else { nibble = (nibble & 0xf0) >> 4; } s32 sample = (s32)(s16)(nibble << 12); sample >>= shift; if (!stereo || processing_left_chunk) { sample += (left_sample_prev[0] * f1[filter] + left_sample_prev[1] * f2[filter] + 32) / 64; if (sample > 0x7fff) { sample = 0x7fff; } if (sample < -0x8000) { sample = -0x8000; } left_sample_prev[1] = left_sample_prev[0]; left_sample_prev[0] = sample; left_samples.push_back(sample); } else { sample += (right_sample_prev[0] * f1[filter] + right_sample_prev[1] * f2[filter] + 32) / 64; if (sample > 0x7fff) { sample = 0x7fff; } if (sample < -0x8000) { sample = -0x8000; } right_sample_prev[1] = right_sample_prev[0]; right_sample_prev[0] = sample; right_samples.push_back(sample); } } bytes_read += 16; block_idx++; } return {left_samples, right_samples}; } // I attempted to write an encoder below, which works, but has some limitations. // - In some cases we can't recover the original data exactly because the decode saturates the // the output to fit in a signed 16-bit integer. // - There are some cases when there are multiple ways to encode the same data. // The break_filter_ties function attempts to handle this, but doesn't work 100% of the time. template T saturate(T in, T minimum, T maximum) { if (in < minimum) { return minimum; } if (in > maximum) { return maximum; } return in; } constexpr int SAMPLES_PER_BLOCK = 28; void encode_block_with_filter(int filter_idx, const s16* samples_in, s32* out, const s32* prev_samples_in) { constexpr s32 f1[5] = {0, 60, 115, 98, 122}; constexpr s32 f2[5] = {0, 0, -52, -55, -60}; s32 prev_samples[2] = {prev_samples_in[0], prev_samples_in[1]}; for (int sample_idx = 0; sample_idx < SAMPLES_PER_BLOCK; sample_idx++) { s32 sample = samples_in[sample_idx]; s32 delta = sample - (prev_samples[0] * f1[filter_idx] + prev_samples[1] * f2[filter_idx] + 32) / 64; out[sample_idx] = delta; prev_samples[1] = prev_samples[0]; prev_samples[0] = sample; } } int get_shift_error(int shift, const s32* samples, bool /*debug*/) { int result = 0; for (int sample_idx = 0; sample_idx < SAMPLES_PER_BLOCK; sample_idx++) { int left_shift = 32 - (12 + 4 - shift); ASSERT(left_shift >= 0); s32 sample_left = samples[sample_idx] << left_shift; s32 sample_right = sample_left >> (32 - 4); s32 sample_compressed = sample_right << (12 - shift); s32 err = std::abs(sample_compressed - samples[sample_idx]); result += err; } return result; } int get_max_bits(s32 value) { int result = 0; if (value >= 0) { int last = 1; while (value) { result++; last = value & 1; value >>= 1; } if (last) { result++; } } else { int last = 0; while (value != -1) { result++; last = value & 1; value >>= 1; } if (!last) { result++; } } return result; } int break_filter_ties(s32* errors, s32* filter_shifts) { s32 best_error = INT32_MAX; for (int filter_idx = 0; filter_idx < 5; filter_idx++) { if (errors[filter_idx] < best_error) { best_error = errors[filter_idx]; } } s32 best_shift = INT32_MAX; int best_filter = -1; for (int filter_idx = 5; filter_idx-- > 0;) { if (errors[filter_idx] == best_error) { if (filter_shifts[filter_idx] <= best_shift) { best_shift = filter_shifts[filter_idx]; best_filter = filter_idx; } } } return best_filter; } void test_encode_adpcm(const std::vector& samples, const std::vector& filter_debug, const std::vector& shift_debug) { // the data is made of blocks. // Each block decodes to 28 samples. // each block has a shift and FIR filter. // the window is continuous across blocks. // we could try all combinations of filters / shifts and pick the best, but that's slow and // we don't know how to break ties if multiple are the same. // we will try all 5 filters, then be smart about picking the best shift from there. // filter coefficients. // there are 5x FIR filters that you can pick between. // last two samples from chosen encoding of the previous block // init to 0, like the decoder s32 prev_block_samples[2] = {0, 0}; // TODO - this will drop some samples at the end, if we don't use a multiple of 28. // probably best to go back and pad with zeros or something. int block_count = samples.size() / SAMPLES_PER_BLOCK; for (int block_idx = 0; block_idx < block_count; block_idx++) { // try each filter s32 pre_shift_samples_per_filter[5][SAMPLES_PER_BLOCK]; for (int filter_idx = 0; filter_idx < 5; filter_idx++) { encode_block_with_filter(filter_idx, samples.data() + SAMPLES_PER_BLOCK * block_idx, pre_shift_samples_per_filter[filter_idx], prev_block_samples); } // this is somewhat arbitrary, but we will require that the largest delta in the previous encode // can be represented. s32 filter_errors[5] = {0, 0, 0, 0, 0}; s32 filter_shifts[5] = {-1, -1, -1, -1}; for (int filter_idx = 0; filter_idx < 5; filter_idx++) { // find the largest value s32 max_sample = INT32_MIN; s32 min_sample = INT32_MAX; bool debug = block_idx == 10966 && filter_idx == 4; for (int sample_idx = 0; sample_idx < SAMPLES_PER_BLOCK; sample_idx++) { s32 s = pre_shift_samples_per_filter[filter_idx][sample_idx]; max_sample = std::max(s, max_sample); min_sample = std::min(s, min_sample); } if (debug) { lg::debug("Range: {}", max_sample - min_sample); } // see how many bits we need and pick shift. auto bits_for_max = std::max(4, std::max(get_max_bits(min_sample), get_max_bits(max_sample))); filter_shifts[filter_idx] = 4 + 12 - bits_for_max; filter_errors[filter_idx] = get_shift_error(filter_shifts[filter_idx], pre_shift_samples_per_filter[filter_idx], debug); if (filter_errors[filter_idx] == 0) { while (filter_shifts[filter_idx] >= 0) { int next_error = get_shift_error(filter_shifts[filter_idx] - 1, pre_shift_samples_per_filter[filter_idx], false); if (next_error == 0) { filter_shifts[filter_idx]--; } else { break; } } } } int best_filter = break_filter_ties(filter_errors, filter_shifts); s32 best_shift = filter_shifts[best_filter]; if (filter_errors[best_filter] || best_filter != filter_debug[block_idx] || best_shift != shift_debug[block_idx]) { lg::error("Block {} me {}, {} : answer {} {}: ERR {}", block_idx, best_filter, best_shift, filter_debug[block_idx], shift_debug[block_idx], filter_errors[best_filter]); lg::error("filter errors:"); for (int i = 0; i < 5; i++) { lg::error(" [{}] {} {}", i, filter_errors[i], filter_shifts[i]); } ASSERT_MSG(false, fmt::format("prev: {} {}", prev_block_samples[0], prev_block_samples[1])); } prev_block_samples[0] = samples.at(block_idx * 28 + 27); prev_block_samples[1] = samples.at(block_idx * 28 + 26); } // end loop over blocks }