diff --git a/.vs/launch.vs.json b/.vs/launch.vs.json index b38cc2822..8c65df0c7 100644 --- a/.vs/launch.vs.json +++ b/.vs/launch.vs.json @@ -131,7 +131,7 @@ "project" : "CMakeLists.txt", "projectTarget" : "extractor.exe (bin\\extractor.exe)", "name" : "Run - Extractor - Extract", - "args" : [ "\"E:\\ISOs\\Jak\\Jak 1.iso\"", "--extract", "-proj-path", "C:\\Users\\xtvas\\Repositories\\opengoal\\launcher\\bundle-test\\data"] + "args" : [ "\"E:\\ISOs\\Jak\\Jak 1.iso\"", "--extract", "--proj-path", "C:\\Users\\xtvas\\Repositories\\opengoal\\launcher\\bundle-test\\data"] } ] } diff --git a/common/util/read_iso_file.cpp b/common/util/read_iso_file.cpp index 837ee5f82..85ef213c6 100644 --- a/common/util/read_iso_file.cpp +++ b/common/util/read_iso_file.cpp @@ -76,12 +76,15 @@ void add_from_dir(FILE* fp, u32 sector, u32 size, IsoFile::Entry* parent) { } } -void unpack_entry(FILE* fp, const IsoFile::Entry& entry, const std::filesystem::path& dest) { +void unpack_entry(FILE* fp, + IsoFile& iso, + const IsoFile::Entry& entry, + const std::filesystem::path& dest) { std::filesystem::path path_to_entry = dest / entry.name; if (entry.is_dir) { std::filesystem::create_directory(path_to_entry); for (const auto& child : entry.children) { - unpack_entry(fp, child, path_to_entry); + unpack_entry(fp, iso, child, path_to_entry); } } else { std::vector buffer(entry.size); @@ -92,6 +95,11 @@ void unpack_entry(FILE* fp, const IsoFile::Entry& entry, const std::filesystem:: ASSERT_MSG(false, "Failed to fread iso when unpacking"); } file_util::write_binary_file(path_to_entry.string(), buffer.data(), buffer.size()); + iso.files_extracted++; + if (iso.shouldHash) { + xxh::hash_t<64> hash = xxh::xxhash<64>(buffer); + iso.hashes.push_back(hash); + } } } } // namespace @@ -105,10 +113,13 @@ IsoFile find_files_in_iso(FILE* fp) { return result; } -void unpack_iso_files(FILE* fp, const IsoFile& layout, const std::filesystem::path& dest) { - unpack_entry(fp, layout.root, dest); +void unpack_iso_files(FILE* fp, IsoFile& layout, const std::filesystem::path& dest) { + unpack_entry(fp, layout, layout.root, dest); } -void unpack_iso_files(FILE* fp, const std::filesystem::path& dest) { - unpack_iso_files(fp, find_files_in_iso(fp), dest); +IsoFile unpack_iso_files(FILE* fp, const std::filesystem::path& dest, const bool hashFiles) { + auto file = find_files_in_iso(fp); + file.shouldHash = hashFiles; + unpack_iso_files(fp, file, dest); + return file; } diff --git a/common/util/read_iso_file.h b/common/util/read_iso_file.h index af771db8f..e71a4a7c1 100644 --- a/common/util/read_iso_file.h +++ b/common/util/read_iso_file.h @@ -3,6 +3,7 @@ #include #include #include +#include "third-party/xxhash.hpp" struct IsoFile { struct Entry { @@ -22,9 +23,15 @@ struct IsoFile { Entry root; + int files_extracted = 0; + bool shouldHash = false; + // There is no reason to map to the files, as we don't retain mappings of each file's expected + // hash + std::vector hashes = {}; + IsoFile(); }; IsoFile find_files_in_iso(FILE* fp); -void unpack_iso_files(FILE* fp, const IsoFile& layout, const std::filesystem::path& dest); -void unpack_iso_files(FILE* fp, const std::filesystem::path& dest); \ No newline at end of file +void unpack_iso_files(FILE* fp, IsoFile& layout, const std::filesystem::path& dest); +IsoFile unpack_iso_files(FILE* fp, const std::filesystem::path& dest, const bool hashFiles = false); diff --git a/decompiler/extractor/main.cpp b/decompiler/extractor/main.cpp index ed5fadc2a..943352bec 100644 --- a/decompiler/extractor/main.cpp +++ b/decompiler/extractor/main.cpp @@ -7,37 +7,222 @@ #include "decompiler/config.h" #include "goalc/compiler/Compiler.h" #include "common/util/read_iso_file.h" +#include + +enum class ExtractorErrorCode { + SUCCESS = 0, + VALIDATION_CANT_LOCATE_ELF = 4000, + VALIDATION_SERIAL_MISSING_FROM_DB = 4001, + VALIDATION_ELF_MISSING_FROM_DB = 4002, + VALIDATION_BAD_ISO_CONTENTS = 4010, + VALIDATION_INCORRECT_EXTRACTION_COUNT = 4011, + VALIDATION_BAD_EXTRACTION = 4020 +}; + +struct ISOMetadata { + std::string canonical_name; + std::string region; + int num_files; + xxh::hash64_t contents_hash; + std::string decomp_config; +}; + +// TODO - when we support jak2 and beyond, add which game it's for as well +// this will let the installer reject (or gracefully handle) jak2 isos on the jak1 page, etc. + +// { SERIAL : { ELF_HASH : ISOMetadataDatabase } } +static std::map> isoDatabase{ + {"SCUS-97124", + {{7280758013604870207U, + {"Jak and Daxter: The Precursor Legacy - Black Label", "NTSC-U", 337, 11363853835861842434U, + "jak1_ntsc_black_label"}}}}}; void setup_global_decompiler_stuff(std::optional project_path_override) { decompiler::init_opcode_info(); file_util::setup_project_path(project_path_override); } -void extract_files(std::filesystem::path data_dir_path, std::filesystem::path extracted_iso_path) { +IsoFile extract_files(std::filesystem::path data_dir_path, + std::filesystem::path extracted_iso_path) { fmt::print("Note: input isn't a folder, assuming it's an ISO file...\n"); std::filesystem::create_directories(extracted_iso_path); auto fp = fopen(data_dir_path.string().c_str(), "rb"); ASSERT_MSG(fp, "failed to open input ISO file\n"); - unpack_iso_files(fp, extracted_iso_path); + IsoFile iso = unpack_iso_files(fp, extracted_iso_path, true); fclose(fp); + return iso; } -int validate(std::filesystem::path path_to_iso_files) { - if (!std::filesystem::exists(path_to_iso_files / "DGO")) { - fmt::print("Error: input folder doesn't have a DGO folder. Is this the right input?\n"); - return 1; +std::pair, std::optional> findElfFile( + const std::filesystem::path& extracted_iso_path) { + std::optional serial = std::nullopt; + std::optional elf_hash = std::nullopt; + for (const auto& entry : fs::directory_iterator(extracted_iso_path)) { + auto as_str = entry.path().filename().string(); + if (std::regex_match(as_str, std::regex(".{4}_.{3}\\..{2}"))) { + serial = std::make_optional( + fmt::format("{}-{}", as_str.substr(0, 4), as_str.substr(5, 3) + as_str.substr(9, 2))); + // We already found the path, so hash it while we're here + auto fp = fopen(entry.path().string().c_str(), "rb"); + fseek(fp, 0, SEEK_END); + size_t size = ftell(fp); + std::vector buffer(size); + rewind(fp); + fread(&buffer[0], sizeof(std::vector::value_type), buffer.size(), fp); + elf_hash = std::make_optional(xxh::xxhash<64>(buffer)); + break; + } + } + return {serial, elf_hash}; +} + +ExtractorErrorCode validate(const IsoFile& iso_file, + const std::filesystem::path& extracted_iso_path) { + if (!std::filesystem::exists(extracted_iso_path / "DGO")) { + fmt::print(stderr, "ERROR: input folder doesn't have a DGO folder. Is this the right input?\n"); + return ExtractorErrorCode::VALIDATION_BAD_EXTRACTION; + } + + std::optional error_code; + std::optional serial = std::nullopt; + std::optional elf_hash = std::nullopt; + std::tie(serial, elf_hash) = findElfFile(extracted_iso_path); + + // - XOR all hashes together and hash the result. This makes the ordering of the hashes (aka + // files) irrelevant + xxh::hash64_t combined_hash = 0; + for (const auto& hash : iso_file.hashes) { + combined_hash ^= hash; + } + xxh::hash64_t contents_hash = xxh::xxhash<64>({combined_hash}); + + if (!serial || !elf_hash) { + fmt::print(stderr, "ERROR: Unable to locate a Serial/ELF file!\n"); + if (!error_code.has_value()) { + error_code = std::make_optional(ExtractorErrorCode::VALIDATION_CANT_LOCATE_ELF); + } + // No point in continuing here + return error_code.value(); + } + + // Find the game in our tracking database + auto dbEntry = isoDatabase.find(serial.value()); + if (dbEntry == isoDatabase.end()) { + fmt::print(stderr, "ERROR: Serial '{}' not found in the validation database\n", serial.value()); + if (!error_code.has_value()) { + error_code = std::make_optional(ExtractorErrorCode::VALIDATION_SERIAL_MISSING_FROM_DB); + } + } else { + auto& metaMap = dbEntry->second; + auto meta_entry = metaMap.find(elf_hash.value()); + if (meta_entry == metaMap.end()) { + fmt::print(stderr, + "ERROR: ELF Hash '{}' not found in the validation database, is this a new or " + "modified version of the same game?\n", + elf_hash.value()); + if (!error_code.has_value()) { + error_code = std::make_optional(ExtractorErrorCode::VALIDATION_ELF_MISSING_FROM_DB); + } + } else { + auto meta = meta_entry->second; + // Print out some information + fmt::print("Detected Game Metadata:\n"); + fmt::print("\tDetected - {}\n", meta.canonical_name); + fmt::print("\tRegion - {}\n", meta.region); + fmt::print("\tSerial - {}\n", dbEntry->first); + fmt::print("\tUses Decompiler Config - {}\n", meta.decomp_config); + + // - Number of Files + if (meta.num_files != iso_file.files_extracted) { + fmt::print(stderr, + "ERROR: Extracted an unexpected number of files. Expected '{}', Actual '{}'\n", + meta.num_files, iso_file.files_extracted); + if (!error_code.has_value()) { + error_code = + std::make_optional(ExtractorErrorCode::VALIDATION_INCORRECT_EXTRACTION_COUNT); + } + } + // Check the ISO Hash + if (meta.contents_hash != contents_hash) { + fmt::print(stderr, + "ERROR: Overall ISO content's hash does not match. Expected '{}', Actual '{}'\n", + meta.contents_hash, contents_hash); + } + } + } + + // Finally, return the result + if (error_code.has_value()) { + // Generate the map entry to make things simple, just convienance + if (error_code.value() == ExtractorErrorCode::VALIDATION_SERIAL_MISSING_FROM_DB) { + fmt::print( + "If this is a new release or version that should be supported, consider adding the " + "following serial entry to the database:\n"); + fmt::print( + "\t'{{\"{}\", {{{{{}U, {{\"GAME_TITLE\", \"NTSC-U/PAL/NTSC-J\", {}, {}U, " + "\"DECOMP_CONFIF_FILENAME_NO_EXTENSION\"}}}}}}}}'\n", + serial.value(), elf_hash.value(), iso_file.files_extracted, contents_hash); + } else if (error_code.value() == ExtractorErrorCode::VALIDATION_ELF_MISSING_FROM_DB) { + fmt::print( + "If this is a new release or version that should be supported, consider adding the " + "following ELF entry to the database under the '{}' serial:\n", + serial.value()); + fmt::print( + "\t'{{{}, {{\"GAME_TITLE\", \"NTSC-U/PAL/NTSC-J\", {}, {}U, " + "\"DECOMP_CONFIF_FILENAME_NO_EXTENSION\"}}}}'\n", + elf_hash.value(), iso_file.files_extracted, contents_hash); + } else { + fmt::print(stderr, + "Validation has failed to match with expected values, see the above errors for " + "specific. This may be an error in the validation database!\n"); + } + return error_code.value(); + } + + return ExtractorErrorCode::SUCCESS; +} + +std::optional determineRelease(const std::filesystem::path& jak1_input_files) { + std::optional serial = std::nullopt; + std::optional elf_hash = std::nullopt; + std::tie(serial, elf_hash) = findElfFile(jak1_input_files); + + if (!serial || !elf_hash) { + return std::nullopt; + } + + // Find the game in our tracking database + auto dbEntry = isoDatabase.find(serial.value()); + if (dbEntry == isoDatabase.end()) { + return std::nullopt; + } else { + auto& metaMap = dbEntry->second; + auto meta_entry = metaMap.find(elf_hash.value()); + if (meta_entry == metaMap.end()) { + return std::nullopt; + } else { + return std::make_optional(meta_entry->second); + } } - return 0; } void decompile(std::filesystem::path jak1_input_files) { using namespace decompiler; - Config config = read_config_file( - (file_util::get_jak_project_dir() / "decompiler" / "config" / "jak1_ntsc_black_label.jsonc") - .string(), - {}); + + // Determine which config to use from the database + auto meta = determineRelease(jak1_input_files); + std::string decomp_config = "jak1_ntsc_black_label"; + if (meta.has_value()) { + decomp_config = meta.value().decomp_config; + fmt::print("INFO: Automatically detected decompiler config, using - {}\n", decomp_config); + } + + Config config = read_config_file((file_util::get_jak_project_dir() / "decompiler" / "config" / + fmt::format("{}.jsonc", decomp_config)) + .string(), + {}); std::vector dgos, objs; @@ -128,7 +313,7 @@ int main(int argc, char** argv) { std::filesystem::path project_path_override; bool flag_runall = false; bool flag_extract = false; - bool flag_validate = false; + bool flag_fail_on_validation = false; bool flag_decompile = false; bool flag_compile = false; bool flag_play = false; @@ -145,7 +330,7 @@ int main(int argc, char** argv) { ->check(CLI::ExistingPath); app.add_flag("-a,--all", flag_runall, "Run all steps, from extraction to playing the game"); app.add_flag("-e,--extract", flag_extract, "Extract the ISO"); - app.add_flag("-v,--validate", flag_validate, "Validate the ISO / game files"); + app.add_flag("-v,--validate", flag_fail_on_validation, "Fail on Validation Errors"); app.add_flag("-d,--decompile", flag_decompile, "Decompile the game data"); app.add_flag("-c,--compile", flag_compile, "Compile the game"); app.add_flag("-p,--play", flag_play, "Play the game"); @@ -156,7 +341,7 @@ int main(int argc, char** argv) { fmt::print("Working Directory - {}\n", std::filesystem::current_path().string()); // If no flag is set, we default to running everything - if (!flag_extract && !flag_validate && !flag_decompile && !flag_compile && !flag_play) { + if (!flag_extract && !flag_decompile && !flag_compile && !flag_play) { fmt::print("Running all steps, no flags provided!\n"); flag_runall = true; } @@ -177,15 +362,15 @@ int main(int argc, char** argv) { } if (flag_runall || flag_extract) { - if (!std::filesystem::is_directory(path_to_iso_files)) { - extract_files(data_dir_path, path_to_iso_files); - } - } - - if (flag_runall || flag_validate) { - auto ok = validate(path_to_iso_files); - if (ok != 0) { - return ok; + if (!std::filesystem::is_directory(data_dir_path)) { + auto iso_file = extract_files(data_dir_path, path_to_iso_files); + auto validation_res = validate(iso_file, path_to_iso_files); + if (validation_res == ExtractorErrorCode::VALIDATION_BAD_EXTRACTION) { + // We fail here regardless of the flag + return static_cast(validation_res); + } else if (flag_fail_on_validation && validation_res != ExtractorErrorCode::SUCCESS) { + return static_cast(validation_res); + } } } diff --git a/third-party/xxhash.hpp b/third-party/xxhash.hpp new file mode 100644 index 000000000..804569de6 --- /dev/null +++ b/third-party/xxhash.hpp @@ -0,0 +1,1938 @@ +// Version - 0.7.3 +// - https://github.com/RedSpah/xxhash_cpp/blob/0.7.3/include/xxhash.hpp + +#pragma once +#include +#include +#include +#include +#include +#include + +/* +xxHash - Extremely Fast Hash algorithm +Header File +Copyright (C) 2012-2020, Yann Collet. +Copyright (C) 2017-2020, Red Gavin. +All rights reserved. + +BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +You can contact the author at : +- xxHash source repository : https://github.com/Cyan4973/xxHash +- xxHash C++ port repository : https://github.com/RedSpah/xxhash_cpp +*/ + +/* Intrinsics + * Sadly has to be included in the global namespace or literally everything breaks + */ +#include + +namespace xxh { +/* ************************************* + * Versioning + ***************************************/ + +namespace version { +constexpr int cpp_version_major = 0; +constexpr int cpp_version_minor = 7; +constexpr int cpp_version_release = 3; +} // namespace version + +constexpr uint32_t version_number() { + return version::cpp_version_major * 10000 + version::cpp_version_minor * 100 + + version::cpp_version_release; +} + +/* ************************************* + * Basic Types - Predefining uint128_t for intrin + ***************************************/ + +namespace typedefs { +struct alignas(16) uint128_t { + uint64_t low64 = 0; + uint64_t high64 = 0; + + bool operator==(const uint128_t& other) { + return (low64 == other.low64 && high64 == other.high64); + } + + bool operator>(const uint128_t& other) { return (high64 > other.high64 || low64 > other.low64); } + + bool operator>=(const uint128_t& other) { return (*this > other || *this == other); } + + bool operator<(const uint128_t& other) { return !(*this >= other); } + + bool operator<=(const uint128_t& other) { return !(*this > other); } + + bool operator!=(const uint128_t& other) { return !(*this == other); } + + uint128_t(uint64_t low, uint64_t high) : low64(low), high64(high) {} + + uint128_t() {} +}; + +} // namespace typedefs + +using uint128_t = typedefs::uint128_t; + +/* ************************************* + * Compiler / Platform Specific Features + ***************************************/ + +namespace intrin { +/*!XXH_CPU_LITTLE_ENDIAN : + * This is a CPU endian detection macro, will be + * automatically set to 1 (little endian) if it is left undefined. + * If compiling for a big endian system (why), XXH_CPU_LITTLE_ENDIAN has to be explicitly defined as + * 0. + */ +#ifndef XXH_CPU_LITTLE_ENDIAN +#define XXH_CPU_LITTLE_ENDIAN 1 +#endif + +/* Vectorization Detection + * NOTE: XXH_NEON and XXH_VSX aren't supported in this C++ port. + * The primary reason is that I don't have access to an ARM and PowerPC + * machines to test them, and the secondary reason is that I even doubt anyone writing + * code for such machines would bother using a C++ port rather than the original C version. + */ +#ifndef XXH_VECTOR /* can be predefined on command line */ +#if defined(__AVX2__) +#define XXH_VECTOR 2 /* AVX2 for Haswell and Bulldozer */ +#elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) +#define XXH_VECTOR 1 /* SSE2 for Pentium 4 and all x86_64 */ +#else +#define XXH_VECTOR 0 /* Portable scalar version */ +#endif +#endif + +constexpr int vector_mode = XXH_VECTOR; + +#if XXH_VECTOR == 2 /* AVX2 for Haswell and Bulldozer */ +constexpr int acc_align = 32; +using avx2_underlying = __m256i; +using sse2_underlying = __m128i; +#elif XXH_VECTOR == 1 /* SSE2 for Pentium 4 and all x86_64 */ +using avx2_underlying = void; // std::array<__m128i, 2>; +using sse2_underlying = __m128i; +constexpr int acc_align = 16; +#else /* Portable scalar version */ +using avx2_underlying = void; // std::array; +using sse2_underlying = void; // std::array; +constexpr int acc_align = 8; +#endif + +/* Compiler Specifics + * Defines inline macros and includes specific compiler's instrinsics. + * */ +#ifdef XXH_FORCE_INLINE /* First undefining the symbols in case they're already defined */ +#undef XXH_FORCE_INLINE +#endif +#ifdef XXH_NO_INLINE +#undef XXH_NO_INLINE +#endif + +#ifdef _MSC_VER /* Visual Studio */ +#pragma warning(disable : 4127) +#define XXH_FORCE_INLINE static __forceinline +#define XXH_NO_INLINE static __declspec(noinline) +#include +#elif defined(__GNUC__) /* Clang / GCC */ +#define XXH_FORCE_INLINE static inline __attribute__((always_inline)) +#define XXH_NO_INLINE static __attribute__((noinline)) +#include +#else +#define XXH_FORCE_INLINE static inline +#define XXH_NO_INLINE static +#endif + +/* Prefetch + * Can be disabled by defining XXH_NO_PREFETCH + */ +#if defined(XXH_NO_PREFETCH) +XXH_FORCE_INLINE void prefetch(const void* ptr) {} +#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) +XXH_FORCE_INLINE void prefetch(const void* ptr) { + _mm_prefetch((const char*)(ptr), _MM_HINT_T0); +} +#elif defined(__GNUC__) +XXH_FORCE_INLINE void prefetch(const void* ptr) { + __builtin_prefetch((ptr), 0, 3); +} +#else +XXH_FORCE_INLINE void prefetch(const void* ptr) {} +#endif + +/* Restrict + * Defines macro for restrict, which in C++ is sadly just a compiler extension (for now). + * Can be disabled by defining XXH_NO_RESTRICT + */ +#ifdef XXH_RESTRICT +#undef XXH_RESTRICT +#endif + +#if (defined(__GNUC__) || defined(_MSC_VER)) && defined(__cplusplus) && !defined(XXH_NO_RESTRICT) +#define XXH_RESTRICT __restrict +#else +#define XXH_RESTRICT +#endif + +/* Likely / Unlikely + * Defines macros for Likely / Unlikely, which are official in C++20, but sadly this library aims + * the previous standard. Not present on MSVC. Can be disabled by defining XXH_NO_BRANCH_HINTS + */ +#if ((defined(__GNUC__) && (__GNUC__ >= 3)) || \ + (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)) && \ + !defined(XXH_NO_BRANCH_HINTS) +#define XXH_likely(x) __builtin_expect(x, 1) +#define XXH_unlikely(x) __builtin_expect(x, 0) +#else +#define XXH_likely(x) (x) +#define XXH_unlikely(x) (x) +#endif + +namespace bit_ops { +#if defined(_MSC_VER) +static inline uint32_t rotl32(uint32_t x, int32_t r) { + return _rotl(x, r); +} +static inline uint64_t rotl64(uint64_t x, int32_t r) { + return _rotl64(x, r); +} +static inline uint32_t rotr32(uint32_t x, int32_t r) { + return _rotr(x, r); +} +static inline uint64_t rotr64(uint64_t x, int32_t r) { + return _rotr64(x, r); +} +#else +static inline uint32_t rotl32(uint32_t x, int32_t r) { + return ((x << r) | (x >> (32 - r))); +} +static inline uint64_t rotl64(uint64_t x, int32_t r) { + return ((x << r) | (x >> (64 - r))); +} +static inline uint32_t rotr32(uint32_t x, int32_t r) { + return ((x >> r) | (x << (32 - r))); +} +static inline uint64_t rotr64(uint64_t x, int32_t r) { + return ((x >> r) | (x << (64 - r))); +} +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +static inline uint32_t swap32(uint32_t x) { + return _byteswap_ulong(x); +} +static inline uint64_t swap64(uint64_t x) { + return _byteswap_uint64(x); +} +#elif defined(__GNUC__) +static inline uint32_t swap32(uint32_t x) { + return __builtin_bswap32(x); +} +static inline uint64_t swap64(uint64_t x) { + return __builtin_bswap64(x); +} +#else +static inline uint32_t swap32(uint32_t x) { + return ((x << 24) & 0xff000000) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | + ((x >> 24) & 0x000000ff); +} +static inline uint64_t swap64(uint64_t x) { + return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | ((x >> 56) & 0x00000000000000ffULL); +} +#endif + +#if defined(_MSC_VER) && defined(_M_IX86) // Only for 32-bit MSVC. +XXH_FORCE_INLINE uint64_t mult32to64(uint32_t x, uint32_t y) { + return __emulu(x, y); +} +#else +XXH_FORCE_INLINE uint64_t mult32to64(uint32_t x, uint32_t y) { + return (uint64_t)(uint32_t)(x) * (uint64_t)(uint32_t)(y); +} +#endif + +#if defined(__GNUC__) && !defined(__clang__) && defined(__i386__) +__attribute__((__target__("no-sse"))) +#endif +static inline uint128_t +mult64to128(uint64_t lhs, uint64_t rhs) { + +#if defined(__GNUC__) && !defined(__wasm__) && defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + + __uint128_t product = (__uint128_t)lhs * (__uint128_t)rhs; + uint128_t const r128 = {(uint64_t)(product), (uint64_t)(product >> 64)}; + return r128; + +#elif defined(_M_X64) || defined(_M_IA64) + +#ifndef _MSC_VER +#pragma intrinsic(_umul128) +#endif + uint64_t product_high; + uint64_t const product_low = _umul128(lhs, rhs, &product_high); + return uint128_t{product_low, product_high}; +#else + uint64_t const lo_lo = bit_ops::mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF); + uint64_t const hi_lo = bit_ops::mult32to64(lhs >> 32, rhs & 0xFFFFFFFF); + uint64_t const lo_hi = bit_ops::mult32to64(lhs & 0xFFFFFFFF, rhs >> 32); + uint64_t const hi_hi = bit_ops::mult32to64(lhs >> 32, rhs >> 32); + + /* Now add the products together. These will never overflow. */ + uint64_t const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; + uint64_t const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; + uint64_t const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF); + + uint128_t r128 = {lower, upper}; + return r128; +#endif +} +} // namespace bit_ops +} // namespace intrin + +/* ************************************* + * Basic Types - Everything else + ***************************************/ + +namespace typedefs { +/* ************************************* + * Basic Types - Detail + ***************************************/ + +template +struct hash_type { + using type = void; +}; + +template <> +struct hash_type<32> { + using type = uint32_t; +}; + +template <> +struct hash_type<64> { + using type = uint64_t; +}; + +template <> +struct hash_type<128> { + using type = uint128_t; +}; + +template +struct vec_type { + using type = void; +}; + +template <> +struct vec_type<64> { + using type = uint64_t; +}; + +template <> +struct vec_type<128> { + using type = intrin::sse2_underlying; +}; + +template <> +struct vec_type<256> { + using type = intrin::avx2_underlying; +}; + +/* Rationale + * On the surface level uint_type appears to be pointless, + * as it is just a copy of hash_type. They do use the same types, + * that is true, but the reasoning for the difference is aimed at humans, + * not the compiler, as a difference between values that are 'just' numbers, + * and those that represent actual hash values. + */ +template +struct uint_type { + using type = void; +}; + +template <> +struct uint_type<32> { + using type = uint32_t; +}; + +template <> +struct uint_type<64> { + using type = uint64_t; +}; + +template <> +struct uint_type<128> { + using type = uint128_t; +}; +} // namespace typedefs + +template +using hash_t = typename typedefs::hash_type::type; +using hash32_t = hash_t<32>; +using hash64_t = hash_t<64>; +using hash128_t = hash_t<128>; + +template +using vec_t = typename typedefs::vec_type::type; +using vec64_t = vec_t<64>; +using vec128_t = vec_t<128>; +using vec256_t = vec_t<256>; + +template +using uint_t = typename typedefs::uint_type::type; + +/* ************************************* + * Bit Operations + ***************************************/ + +namespace bit_ops { +/* **************************************** + * Bit Operations + ******************************************/ + +template +static inline uint_t rotl(uint_t n, int32_t r) { + if constexpr (N == 32) { + return intrin::bit_ops::rotl32(n, r); + } + + if constexpr (N == 64) { + return intrin::bit_ops::rotl64(n, r); + } +} + +template +static inline uint_t rotr(uint_t n, int32_t r) { + if constexpr (N == 32) { + return intrin::bit_ops::rotr32(n, r); + } + + if constexpr (N == 64) { + return intrin::bit_ops::rotr64(n, r); + } +} + +template +static inline uint_t swap(uint_t n) { + if constexpr (N == 32) { + return intrin::bit_ops::swap32(n); + } + + if constexpr (N == 64) { + return intrin::bit_ops::swap64(n); + } +} + +static inline uint64_t mul32to64(uint32_t x, uint32_t y) { + return intrin::bit_ops::mult32to64(x, y); +} + +static inline uint128_t mul64to128(uint64_t x, uint64_t y) { + return intrin::bit_ops::mult64to128(x, y); +} + +static inline uint64_t mul128fold64(uint64_t x, uint64_t y) { + uint128_t product = mul64to128(x, y); + + return (product.low64 ^ product.high64); +} +} // namespace bit_ops + +/* ************************************* + * Memory Functions + ***************************************/ + +namespace mem_ops { + +/* ************************************* + * Endianness + ***************************************/ + +constexpr bool is_little_endian() { + return (XXH_CPU_LITTLE_ENDIAN == 1); +} + +/* ************************************* + * Memory Access + ***************************************/ + +template +static inline uint_t read(const void* memPtr) { + uint_t val; + + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +template +static inline uint_t readLE(const void* ptr) { + if constexpr (is_little_endian()) { + return read(ptr); + } else { + return bit_ops::swap(read(ptr)); + } +} + +template +static inline uint_t readBE(const void* ptr) { + if constexpr (is_little_endian()) { + return bit_ops::swap(read(ptr)); + } else { + return read(ptr); + } +} + +template +static void writeLE(void* dst, uint_t v) { + if constexpr (!is_little_endian()) { + v = bit_ops::swap(v); + } + + memcpy(dst, &v, sizeof(v)); +} +} // namespace mem_ops + +/* ************************************* + * Vector Functions + ***************************************/ + +namespace vec_ops { +template +XXH_FORCE_INLINE vec_t loadu(const vec_t* input) { + static_assert(!(N != 128 && N != 256 && N != 64), + "Invalid template argument passed to xxh::vec_ops::loadu"); + + if constexpr (N == 128) { + return _mm_loadu_si128(input); + } + + if constexpr (N == 256) { + return _mm256_loadu_si256(input); + } + + if constexpr (N == 64) { + return mem_ops::readLE<64>(input); + } +} + +// 'xorv' instead of 'xor' because 'xor' is a weird wacky alternate operator expression thing. +template +XXH_FORCE_INLINE vec_t xorv(vec_t a, vec_t b) { + static_assert(!(N != 128 && N != 256 && N != 64), + "Invalid argument passed to xxh::vec_ops::xorv"); + + if constexpr (N == 128) { + return _mm_xor_si128(a, b); + } + + if constexpr (N == 256) { + return _mm256_xor_si256(a, b); + } + + if constexpr (N == 64) { + return a ^ b; + } +} + +template +XXH_FORCE_INLINE vec_t mul(vec_t a, vec_t b) { + static_assert(!(N != 128 && N != 256 && N != 64), "Invalid argument passed to xxh::vec_ops::mul"); + + if constexpr (N == 128) { + return _mm_mul_epu32(a, b); + } + + if constexpr (N == 256) { + return _mm256_mul_epu32(a, b); + } + + if constexpr (N == 64) { + return a * b; + } +} + +template +XXH_FORCE_INLINE vec_t add(vec_t a, vec_t b) { + static_assert(!(N != 128 && N != 256 && N != 64), "Invalid argument passed to xxh::vec_ops::add"); + + if constexpr (N == 128) { + return _mm_add_epi64(a, b); + } + + if constexpr (N == 256) { + return _mm256_add_epi64(a, b); + } + + if constexpr (N == 64) { + return a + b; + } +} + +template +XXH_FORCE_INLINE vec_t shuffle(vec_t a) { + static_assert(!(N != 128 && N != 256 && N != 64), + "Invalid argument passed to xxh::vec_ops::shuffle"); + + if constexpr (N == 128) { + return _mm_shuffle_epi32(a, _MM_SHUFFLE(S1, S2, S3, S4)); + } + + if constexpr (N == 256) { + return _mm256_shuffle_epi32(a, _MM_SHUFFLE(S1, S2, S3, S4)); + } + + if constexpr (N == 64) { + return a; + } +} + +template +XXH_FORCE_INLINE vec_t set1(int a) { + static_assert(!(N != 128 && N != 256 && N != 64), + "Invalid argument passed to xxh::vec_ops::set1"); + + if constexpr (N == 128) { + return _mm_set1_epi32(a); + } + + if constexpr (N == 256) { + return _mm256_set1_epi32(a); + } + + if constexpr (N == 64) { + return a; + } +} + +template +XXH_FORCE_INLINE vec_t srli(vec_t n, int a) { + static_assert(!(N != 128 && N != 256 && N != 64), + "Invalid argument passed to xxh::vec_ops::srli"); + + if constexpr (N == 128) { + return _mm_srli_epi64(n, a); + } + + if constexpr (N == 256) { + return _mm256_srli_epi64(n, a); + } + + if constexpr (N == 64) { + return n >> a; + } +} + +template +XXH_FORCE_INLINE vec_t slli(vec_t n, int a) { + static_assert(!(N != 128 && N != 256 && N != 64), + "Invalid argument passed to xxh::vec_ops::slli"); + + if constexpr (N == 128) { + return _mm_slli_epi64(n, a); + } + + if constexpr (N == 256) { + return _mm256_slli_epi64(n, a); + } + + if constexpr (N == 64) { + return n << a; + } +} +} // namespace vec_ops + +/* ************************************* + * Algorithm Implementation - xxhash + ***************************************/ + +namespace detail { +using namespace mem_ops; +using namespace bit_ops; + +/* ************************************* + * Constants + ***************************************/ + +constexpr static std::array primes32 = {2654435761U, 2246822519U, 3266489917U, + 668265263U, 374761393U}; +constexpr static std::array primes64 = { + 11400714785074694791ULL, 14029467366897019727ULL, 1609587929392839161ULL, + 9650029242287828579ULL, 2870177450012600261ULL}; + +template +constexpr uint_t PRIME(uint64_t n) { + if constexpr (N == 32) { + return primes32[n - 1]; + } else { + return primes64[n - 1]; + } +} + +/* ************************************* + * Functions + ***************************************/ + +template +static inline uint_t round(uint_t seed, uint_t input) { + seed += input * PRIME(2); + + if constexpr (N == 32) { + seed = rotl(seed, 13); + } else { + seed = rotl(seed, 31); + } + + seed *= PRIME(1); + return seed; +} + +static inline uint64_t mergeRound64(hash64_t acc, uint64_t val) { + val = round<64>(0, val); + acc ^= val; + acc = acc * PRIME<64>(1) + PRIME<64>(4); + return acc; +} + +static inline void endian_align_sub_mergeround(hash64_t& hash_ret, + uint64_t v1, + uint64_t v2, + uint64_t v3, + uint64_t v4) { + hash_ret = mergeRound64(hash_ret, v1); + hash_ret = mergeRound64(hash_ret, v2); + hash_ret = mergeRound64(hash_ret, v3); + hash_ret = mergeRound64(hash_ret, v4); +} + +template +static inline hash_t endian_align_sub_ending(hash_t hash_ret, + const uint8_t* p, + const uint8_t* bEnd) { + if constexpr (N == 32) { + while ((p + 4) <= bEnd) { + hash_ret += readLE<32>(p) * PRIME<32>(3); + hash_ret = rotl<32>(hash_ret, 17) * PRIME<32>(4); + p += 4; + } + + while (p < bEnd) { + hash_ret += (*p) * PRIME<32>(5); + hash_ret = rotl<32>(hash_ret, 11) * PRIME<32>(1); + p++; + } + + hash_ret ^= hash_ret >> 15; + hash_ret *= PRIME<32>(2); + hash_ret ^= hash_ret >> 13; + hash_ret *= PRIME<32>(3); + hash_ret ^= hash_ret >> 16; + + return hash_ret; + } else { + while (p + 8 <= bEnd) { + const uint64_t k1 = round<64>(0, readLE<64>(p)); + + hash_ret ^= k1; + hash_ret = rotl<64>(hash_ret, 27) * PRIME<64>(1) + PRIME<64>(4); + p += 8; + } + + if (p + 4 <= bEnd) { + hash_ret ^= static_cast(readLE<32>(p)) * PRIME<64>(1); + hash_ret = rotl<64>(hash_ret, 23) * PRIME<64>(2) + PRIME<64>(3); + p += 4; + } + + while (p < bEnd) { + hash_ret ^= (*p) * PRIME<64>(5); + hash_ret = rotl<64>(hash_ret, 11) * PRIME<64>(1); + p++; + } + + hash_ret ^= hash_ret >> 33; + hash_ret *= PRIME<64>(2); + hash_ret ^= hash_ret >> 29; + hash_ret *= PRIME<64>(3); + hash_ret ^= hash_ret >> 32; + + return hash_ret; + } +} + +template +static inline hash_t endian_align(const void* input, size_t len, uint_t seed) { + static_assert(!(N != 32 && N != 64), "You can only call endian_align in 32 or 64 bit mode."); + + const uint8_t* p = static_cast(input); + const uint8_t* bEnd = p + len; + hash_t hash_ret; + + if (len >= (N / 2)) { + const uint8_t* const limit = bEnd - (N / 2); + uint_t v1 = seed + PRIME(1) + PRIME(2); + uint_t v2 = seed + PRIME(2); + uint_t v3 = seed + 0; + uint_t v4 = seed - PRIME(1); + + do { + v1 = round(v1, readLE(p)); + p += (N / 8); + v2 = round(v2, readLE(p)); + p += (N / 8); + v3 = round(v3, readLE(p)); + p += (N / 8); + v4 = round(v4, readLE(p)); + p += (N / 8); + } while (p <= limit); + + hash_ret = rotl(v1, 1) + rotl(v2, 7) + rotl(v3, 12) + rotl(v4, 18); + + if constexpr (N == 64) { + endian_align_sub_mergeround(hash_ret, v1, v2, v3, v4); + } + } else { + hash_ret = seed + PRIME(5); + } + + hash_ret += static_cast>(len); + + return endian_align_sub_ending(hash_ret, p, bEnd); +} +} // namespace detail + +/* ************************************* + * Algorithm Implementation - xxhash3 + ***************************************/ + +namespace detail3 { +using namespace vec_ops; +using namespace detail; +using namespace mem_ops; +using namespace bit_ops; + +/* ************************************* + * Enums + ***************************************/ + +enum class acc_width : uint8_t { acc_64bits, acc_128bits }; +enum class vec_mode : uint8_t { scalar = 0, sse2 = 1, avx2 = 2 }; + +/* ************************************* + * Constants + ***************************************/ + +constexpr uint64_t secret_default_size = 192; +constexpr uint64_t secret_size_min = 136; +constexpr uint64_t secret_consume_rate = 8; +constexpr uint64_t stripe_len = 64; +constexpr uint64_t acc_nb = 8; +constexpr uint64_t prefetch_distance = 384; +constexpr uint64_t secret_lastacc_start = 7; +constexpr uint64_t secret_mergeaccs_start = 11; +constexpr uint64_t midsize_max = 240; +constexpr uint64_t midsize_startoffset = 3; +constexpr uint64_t midsize_lastoffset = 17; + +constexpr vec_mode vector_mode = static_cast(intrin::vector_mode); +constexpr uint64_t acc_align = intrin::acc_align; +constexpr std::array vector_bit_width{64, 128, 256}; + +/* ************************************* + * Defaults + ***************************************/ + +alignas(64) constexpr uint8_t default_secret[secret_default_size] = { + 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, + 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, + 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, + 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, + 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, + 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, + 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, + 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, + 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, + 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, + 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, + 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, +}; + +constexpr std::array init_acc = {PRIME<32>(3), PRIME<64>(1), PRIME<64>(2), + PRIME<64>(3), PRIME<64>(4), PRIME<32>(2), + PRIME<64>(5), PRIME<32>(1)}; + +/* ************************************* + * Functions + ***************************************/ + +static hash_t<64> avalanche(hash_t<64> h64) { + constexpr uint64_t avalanche_mul_prime = 0x165667919E3779F9ULL; + + h64 ^= h64 >> 37; + h64 *= avalanche_mul_prime; + h64 ^= h64 >> 32; + return h64; +} + +template +XXH_FORCE_INLINE void accumulate_512(void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret, + acc_width width) { + constexpr uint64_t bits = vector_bit_width[static_cast(V)]; + + using vec_t = vec_t; + + alignas(sizeof(vec_t)) vec_t* const xacc = static_cast(acc); + const vec_t* const xinput = static_cast(input); + const vec_t* const xsecret = static_cast(secret); + + for (size_t i = 0; i < stripe_len / sizeof(vec_t); i++) { + vec_t const data_vec = loadu(xinput + i); + vec_t const key_vec = loadu(xsecret + i); + vec_t const data_key = xorv(data_vec, key_vec); + vec_t product = set1(0); + + if constexpr (V != vec_mode::scalar) { + vec_t const data_key_lo = shuffle(data_key); + + product = mul(data_key, data_key_lo); + + if (width == acc_width::acc_128bits) { + vec_t const data_swap = shuffle(data_vec); + vec_t const sum = add(xacc[i], data_swap); + + xacc[i] = add(sum, product); + } else { + vec_t const sum = add(xacc[i], data_vec); + + xacc[i] = add(sum, product); + } + } else { + product = mul32to64(data_key & 0xFFFFFFFF, data_key >> 32); + + if (width == acc_width::acc_128bits) { + xacc[i ^ 1] = add(xacc[i ^ 1], data_vec); + } else { + xacc[i] = add(xacc[i], data_vec); + } + + xacc[i] = add(xacc[i], product); + } + } +} + +template +XXH_FORCE_INLINE void scramble_acc(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) { + constexpr uint64_t bits = vector_bit_width[static_cast(V)]; + ; + + using vec_t = vec_t; + + alignas(sizeof(vec_t)) vec_t* const xacc = (vec_t*)acc; + const vec_t* const xsecret = (const vec_t*)secret; + + for (size_t i = 0; i < stripe_len / sizeof(vec_t); i++) { + vec_t const acc_vec = xacc[i]; + vec_t const shifted = srli(acc_vec, 47); + vec_t const data_vec = xorv(acc_vec, shifted); + vec_t const key_vec = loadu(xsecret + i); + vec_t const data_key = xorv(data_vec, key_vec); + + if constexpr (V != vec_mode::scalar) { + vec_t const prime32 = set1(PRIME<32>(1)); + vec_t const data_key_hi = shuffle(data_key); + vec_t const prod_lo = mul(data_key, prime32); + vec_t const prod_hi = mul(data_key_hi, prime32); + + xacc[i] = add(prod_lo, vec_ops::slli(prod_hi, 32)); + } else { + xacc[i] = mul(data_key, PRIME<32>(1)); + } + } +} + +XXH_FORCE_INLINE void accumulate(uint64_t* XXH_RESTRICT acc, + const uint8_t* XXH_RESTRICT input, + const uint8_t* XXH_RESTRICT secret, + size_t nbStripes, + acc_width accWidth) { + for (size_t n = 0; n < nbStripes; n++) { + const uint8_t* const in = input + n * stripe_len; + + intrin::prefetch(in + prefetch_distance); + accumulate_512(acc, in, secret + n * secret_consume_rate, accWidth); + } +} + +XXH_FORCE_INLINE void hash_long_internal_loop(uint64_t* XXH_RESTRICT acc, + const uint8_t* XXH_RESTRICT input, + size_t len, + const uint8_t* XXH_RESTRICT secret, + size_t secretSize, + acc_width accWidth) { + size_t const nb_rounds = (secretSize - stripe_len) / secret_consume_rate; + size_t const block_len = stripe_len * nb_rounds; + size_t const nb_blocks = len / block_len; + + for (size_t n = 0; n < nb_blocks; n++) { + accumulate(acc, input + n * block_len, secret, nb_rounds, accWidth); + scramble_acc(acc, secret + secretSize - stripe_len); + } + + /* last partial block */ + size_t const nbStripes = (len - (block_len * nb_blocks)) / stripe_len; + + accumulate(acc, input + nb_blocks * block_len, secret, nbStripes, accWidth); + + /* last stripe */ + if (len & (stripe_len - 1)) { + const uint8_t* const p = input + len - stripe_len; + + accumulate_512(acc, p, secret + secretSize - stripe_len - secret_lastacc_start, + accWidth); + } +} + +XXH_FORCE_INLINE uint64_t mix_2_accs(const uint64_t* XXH_RESTRICT acc, + const uint8_t* XXH_RESTRICT secret) { + return mul128fold64(acc[0] ^ readLE<64>(secret), acc[1] ^ readLE<64>(secret + 8)); +} + +XXH_FORCE_INLINE uint64_t merge_accs(const uint64_t* XXH_RESTRICT acc, + const uint8_t* XXH_RESTRICT secret, + uint64_t start) { + uint64_t result64 = start; + + result64 += mix_2_accs(acc + 0, secret + 0); + result64 += mix_2_accs(acc + 2, secret + 16); + result64 += mix_2_accs(acc + 4, secret + 32); + result64 += mix_2_accs(acc + 6, secret + 48); + + return avalanche(result64); +} + +XXH_FORCE_INLINE void init_custom_secret(uint8_t* customSecret, uint64_t seed) { + for (uint64_t i = 0; i < secret_default_size / 16; i++) { + writeLE<64>(customSecret + i * 16, readLE<64>(default_secret + i * 16) + seed); + writeLE<64>(customSecret + i * 16 + 8, readLE<64>(default_secret + i * 16 + 8) - seed); + } +} + +template +XXH_FORCE_INLINE hash_t len_1to3(const uint8_t* input, + size_t len, + const uint8_t* secret, + uint64_t seed) { + if constexpr (N == 64) { + uint8_t const c1 = input[0]; + uint8_t const c2 = input[len >> 1]; + uint8_t const c3 = input[len - 1]; + uint32_t const combined = ((uint32_t)c1 << 16) | (((uint32_t)c2) << 24) | + (((uint32_t)c3) << 0) | (((uint32_t)len) << 8); + uint64_t const bitflip = (readLE<32>(secret) ^ readLE<32>(secret + 4)) + seed; + uint64_t const keyed = (uint64_t)combined ^ bitflip; + uint64_t const mixed = keyed * PRIME<64>(1); + + return avalanche(mixed); + } else { + uint8_t const c1 = input[0]; + uint8_t const c2 = input[len >> 1]; + uint8_t const c3 = input[len - 1]; + uint32_t const combinedl = ((uint32_t)c1 << 16) + (((uint32_t)c2) << 24) + + (((uint32_t)c3) << 0) + (((uint32_t)len) << 8); + uint32_t const combinedh = rotl<32>(swap<32>(combinedl), 13); + uint64_t const bitflipl = (readLE<32>(secret) ^ readLE<32>(secret + 4)) + seed; + uint64_t const bitfliph = (readLE<32>(secret + 8) ^ readLE<32>(secret + 12)) - seed; + uint64_t const keyed_lo = (uint64_t)combinedl ^ bitflipl; + uint64_t const keyed_hi = (uint64_t)combinedh ^ bitfliph; + uint64_t const mixedl = keyed_lo * PRIME<64>(1); + uint64_t const mixedh = keyed_hi * PRIME<64>(5); + hash128_t const h128 = {avalanche(mixedl), avalanche(mixedh)}; + + return h128; + } +} + +template +XXH_FORCE_INLINE hash_t len_4to8(const uint8_t* input, + size_t len, + const uint8_t* secret, + uint64_t seed) { + constexpr uint64_t mix_constant = 0x9FB21C651E98DF25ULL; + + seed ^= (uint64_t)swap<32>((uint32_t)seed) << 32; + + if constexpr (N == 64) { + uint32_t const input1 = readLE<32>(input); + uint32_t const input2 = readLE<32>(input + len - 4); + uint64_t const bitflip = (readLE<64>(secret + 8) ^ readLE<64>(secret + 16)) - seed; + uint64_t const input64 = input2 + ((uint64_t)input1 << 32); + uint64_t x = input64 ^ bitflip; + + x ^= rotl<64>(x, 49) ^ rotl<64>(x, 24); + x *= mix_constant; + x ^= (x >> 35) + len; + x *= mix_constant; + + return (x ^ (x >> 28)); + } else { + uint32_t const input_lo = readLE<32>(input); + uint32_t const input_hi = readLE<32>(input + len - 4); + uint64_t const input_64 = input_lo + ((uint64_t)input_hi << 32); + uint64_t const bitflip = (readLE<64>(secret + 16) ^ readLE<64>(secret + 24)) + seed; + uint64_t const keyed = input_64 ^ bitflip; + uint128_t m128 = mul64to128(keyed, PRIME<64>(1) + (len << 2)); + + m128.high64 += (m128.low64 << 1); + m128.low64 ^= (m128.high64 >> 3); + m128.low64 ^= (m128.low64 >> 35); + m128.low64 *= mix_constant; + m128.low64 ^= (m128.low64 >> 28); + m128.high64 = avalanche(m128.high64); + + return m128; + } +} + +template +XXH_FORCE_INLINE hash_t len_9to16(const uint8_t* input, + size_t len, + const uint8_t* secret, + uint64_t seed) { + if constexpr (N == 64) { + uint64_t const bitflip1 = (readLE<64>(secret + 24) ^ readLE<64>(secret + 32)) + seed; + uint64_t const bitflip2 = (readLE<64>(secret + 40) ^ readLE<64>(secret + 48)) - seed; + uint64_t const input_lo = readLE<64>(input) ^ bitflip1; + uint64_t const input_hi = readLE<64>(input + len - 8) ^ bitflip2; + uint64_t const acc = len + swap<64>(input_lo) + input_hi + mul128fold64(input_lo, input_hi); + + return avalanche(acc); + } else { + uint64_t const bitflipl = (readLE<64>(secret + 32) ^ readLE<64>(secret + 40)) - seed; + uint64_t const bitfliph = (readLE<64>(secret + 48) ^ readLE<64>(secret + 56)) + seed; + uint64_t const input_lo = readLE<64>(input); + uint64_t input_hi = readLE<64>(input + len - 8); + uint128_t m128 = mul64to128(input_lo ^ input_hi ^ bitflipl, PRIME<64>(1)); + + m128.low64 += (uint64_t)(len - 1) << 54; + input_hi ^= bitfliph; + + if constexpr (sizeof(void*) < sizeof(uint64_t)) // 32-bit version + { + m128.high64 += (input_hi & 0xFFFFFFFF00000000) + mul32to64((uint32_t)input_hi, PRIME<32>(2)); + } else { + m128.high64 += input_hi + mul32to64((uint32_t)input_hi, PRIME<32>(2) - 1); + } + + m128.low64 ^= swap<64>(m128.high64); + + hash128_t h128 = mul64to128(m128.low64, PRIME<64>(2)); + + h128.high64 += m128.high64 * PRIME<64>(2); + h128.low64 = avalanche(h128.low64); + h128.high64 = avalanche(h128.high64); + + return h128; + } +} + +template +XXH_FORCE_INLINE hash_t len_0to16(const uint8_t* input, + size_t len, + const uint8_t* secret, + uint64_t seed) { + if (XXH_likely(len > 8)) { + return len_9to16(input, len, secret, seed); + } else if (XXH_likely(len >= 4)) { + return len_4to8(input, len, secret, seed); + } else if (len) { + return len_1to3(input, len, secret, seed); + } else { + if constexpr (N == 64) { + return avalanche((PRIME<64>(1) + seed) ^ (readLE<64>(secret + 56) ^ readLE<64>(secret + 64))); + } else { + uint64_t const bitflipl = readLE<64>(secret + 64) ^ readLE<64>(secret + 72); + uint64_t const bitfliph = readLE<64>(secret + 80) ^ readLE<64>(secret + 88); + + return hash128_t(avalanche((PRIME<64>(1) + seed) ^ bitflipl), + avalanche((PRIME<64>(2) - seed) ^ bitfliph)); + } + } +} + +template +XXH_FORCE_INLINE hash_t hash_long_internal(const uint8_t* XXH_RESTRICT input, + size_t len, + const uint8_t* XXH_RESTRICT secret = default_secret, + size_t secretSize = sizeof(default_secret)) { + alignas(acc_align) std::array acc = init_acc; + + if constexpr (N == 64) { + hash_long_internal_loop(acc.data(), input, len, secret, secretSize, acc_width::acc_64bits); + + /* converge into final hash */ + return merge_accs(acc.data(), secret + secret_mergeaccs_start, (uint64_t)len * PRIME<64>(1)); + } else { + hash_long_internal_loop(acc.data(), input, len, secret, secretSize, acc_width::acc_128bits); + + /* converge into final hash */ + uint64_t const low64 = + merge_accs(acc.data(), secret + secret_mergeaccs_start, (uint64_t)len * PRIME<64>(1)); + uint64_t const high64 = + merge_accs(acc.data(), secret + secretSize - sizeof(acc) - secret_mergeaccs_start, + ~((uint64_t)len * PRIME<64>(2))); + + return hash128_t(low64, high64); + } +} + +XXH_FORCE_INLINE uint64_t mix_16b(const uint8_t* XXH_RESTRICT input, + const uint8_t* XXH_RESTRICT secret, + uint64_t seed) { + uint64_t const input_lo = readLE<64>(input); + uint64_t const input_hi = readLE<64>(input + 8); + + return mul128fold64(input_lo ^ (readLE<64>(secret) + seed), + input_hi ^ (readLE<64>(secret + 8) - seed)); +} + +XXH_FORCE_INLINE uint128_t mix_32b(uint128_t acc, + const uint8_t* input1, + const uint8_t* input2, + const uint8_t* secret, + uint64_t seed) { + acc.low64 += mix_16b(input1, secret + 0, seed); + acc.low64 ^= readLE<64>(input2) + readLE<64>(input2 + 8); + acc.high64 += mix_16b(input2, secret + 16, seed); + acc.high64 ^= readLE<64>(input1) + readLE<64>(input1 + 8); + + return acc; +} + +template +XXH_FORCE_INLINE hash_t len_17to128(const uint8_t* XXH_RESTRICT input, + size_t len, + const uint8_t* XXH_RESTRICT secret, + uint64_t seed) { + if constexpr (N == 64) { + hash64_t acc = len * PRIME<64>(1); + + if (len > 32) { + if (len > 64) { + if (len > 96) { + acc += mix_16b(input + 48, secret + 96, seed); + acc += mix_16b(input + len - 64, secret + 112, seed); + } + + acc += mix_16b(input + 32, secret + 64, seed); + acc += mix_16b(input + len - 48, secret + 80, seed); + } + + acc += mix_16b(input + 16, secret + 32, seed); + acc += mix_16b(input + len - 32, secret + 48, seed); + } + + acc += mix_16b(input + 0, secret + 0, seed); + acc += mix_16b(input + len - 16, secret + 16, seed); + + return avalanche(acc); + } else { + hash128_t acc = {len * PRIME<64>(1), 0}; + + if (len > 32) { + if (len > 64) { + if (len > 96) { + acc = mix_32b(acc, input + 48, input + len - 64, secret + 96, seed); + } + + acc = mix_32b(acc, input + 32, input + len - 48, secret + 64, seed); + } + + acc = mix_32b(acc, input + 16, input + len - 32, secret + 32, seed); + } + + acc = mix_32b(acc, input, input + len - 16, secret, seed); + + uint64_t const low64 = acc.low64 + acc.high64; + uint64_t const high64 = + (acc.low64 * PRIME<64>(1)) + (acc.high64 * PRIME<64>(4)) + ((len - seed) * PRIME<64>(2)); + + return {avalanche(low64), (uint64_t)0 - avalanche(high64)}; + } +} + +template +XXH_NO_INLINE hash_t len_129to240(const uint8_t* XXH_RESTRICT input, + size_t len, + const uint8_t* XXH_RESTRICT secret, + uint64_t seed) { + if constexpr (N == 64) { + uint64_t acc = len * PRIME<64>(1); + size_t const nbRounds = len / 16; + + for (size_t i = 0; i < 8; i++) { + acc += mix_16b(input + (i * 16), secret + (i * 16), seed); + } + + acc = avalanche(acc); + + for (size_t i = 8; i < nbRounds; i++) { + acc += mix_16b(input + (i * 16), secret + ((i - 8) * 16) + midsize_startoffset, seed); + } + + /* last bytes */ + acc += mix_16b(input + len - 16, secret + secret_size_min - midsize_lastoffset, seed); + + return avalanche(acc); + } else { + hash128_t acc; + uint64_t const nbRounds = len / 32; + + acc.low64 = len * PRIME<64>(1); + acc.high64 = 0; + + for (size_t i = 0; i < 4; i++) { + acc = mix_32b(acc, input + (i * 32), input + (i * 32) + 16, secret + (i * 32), seed); + } + + acc.low64 = avalanche(acc.low64); + acc.high64 = avalanche(acc.high64); + + for (size_t i = 4; i < nbRounds; i++) { + acc = mix_32b(acc, input + (i * 32), input + (i * 32) + 16, + secret + midsize_startoffset + ((i - 4) * 32), seed); + } + + /* last bytes */ + acc = mix_32b(acc, input + len - 16, input + len - 32, + secret + secret_size_min - midsize_lastoffset - 16, 0ULL - seed); + + uint64_t const low64 = acc.low64 + acc.high64; + uint64_t const high64 = + (acc.low64 * PRIME<64>(1)) + (acc.high64 * PRIME<64>(4)) + ((len - seed) * PRIME<64>(2)); + + return {avalanche(low64), (uint64_t)0 - avalanche(high64)}; + } +} + +template +XXH_NO_INLINE hash_t xxhash3_impl(const void* XXH_RESTRICT input, + size_t len, + hash64_t seed, + const void* XXH_RESTRICT secret = default_secret, + size_t secretSize = secret_default_size) { + alignas(8) uint8_t custom_secret[secret_default_size]; + const void* short_secret = secret; + + if (seed != 0) { + init_custom_secret(custom_secret, seed); + secret = custom_secret; + secretSize = secret_default_size; + short_secret = default_secret; + } + + if (len <= 16) { + return len_0to16(static_cast(input), len, + static_cast(short_secret), seed); + } else if (len <= 128) { + return len_17to128(static_cast(input), len, + static_cast(short_secret), seed); + } else if (len <= midsize_max) { + return len_129to240(static_cast(input), len, + static_cast(short_secret), seed); + } else { + return hash_long_internal(static_cast(input), len, + static_cast(secret), secretSize); + } +} +} // namespace detail3 + +/* ************************************* + * Public Access Point - xxhash + ***************************************/ + +template +inline hash_t xxhash(const void* input, size_t len, uint_t seed = 0) { + static_assert(!(bit_mode != 32 && bit_mode != 64), + "xxhash can only be used in 32 and 64 bit modes."); + return detail::endian_align(input, len, seed); +} + +template +inline hash_t xxhash(const std::basic_string& input, uint_t seed = 0) { + static_assert(!(bit_mode != 32 && bit_mode != 64), + "xxhash can only be used in 32 and 64 bit modes."); + return detail::endian_align(static_cast(input.data()), + input.length() * sizeof(T), seed); +} + +template +inline hash_t xxhash(ContiguousIterator begin, + ContiguousIterator end, + uint_t seed = 0) { + static_assert(!(bit_mode != 32 && bit_mode != 64), + "xxhash can only be used in 32 and 64 bit modes."); + using T = typename std::decay_t; + return detail::endian_align(static_cast(&*begin), + (end - begin) * sizeof(T), seed); +} + +template +inline hash_t xxhash(const std::vector& input, uint_t seed = 0) { + static_assert(!(bit_mode != 32 && bit_mode != 64), + "xxhash can only be used in 32 and 64 bit modes."); + return detail::endian_align(static_cast(input.data()), + input.size() * sizeof(T), seed); +} + +template +inline hash_t xxhash(const std::array& input, uint_t seed = 0) { + static_assert(!(bit_mode != 32 && bit_mode != 64), + "xxhash can only be used in 32 and 64 bit modes."); + return detail::endian_align(static_cast(input.data()), AN * sizeof(T), + seed); +} + +template +inline hash_t xxhash(const std::initializer_list& input, uint_t seed = 0) { + static_assert(!(bit_mode != 32 && bit_mode != 64), + "xxhash can only be used in 32 and 64 bit modes."); + return detail::endian_align(static_cast(input.begin()), + input.size() * sizeof(T), seed); +} + +/* ************************************* + * Public Access Point - xxhash3 + ***************************************/ + +template +inline hash_t xxhash3(const void* input, size_t len, uint64_t seed = 0) { + static_assert(!(bit_mode != 128 && bit_mode != 64), + "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(input, len, seed); +} + +template +inline hash_t xxhash3(const void* input, + size_t len, + const void* secret, + size_t secretSize) { + static_assert(!(bit_mode != 128 && bit_mode != 64), + "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(input, len, 0, secret, secretSize); +} + +template +inline hash_t xxhash3(const std::basic_string& input, uint64_t seed = 0) { + static_assert(!(bit_mode != 128 && bit_mode != 64), + "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.data()), + input.length() * sizeof(T), seed); +} + +template +inline hash_t xxhash3(const std::basic_string& input, + const void* secret, + size_t secretSize) { + static_assert(!(bit_mode != 128 && bit_mode != 64), + "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.data()), + input.length() * sizeof(T), 0, secret, secretSize); +} + +template +inline hash_t xxhash3(ContiguousIterator begin, ContiguousIterator end, uint64_t seed = 0) { + static_assert(!(N != 128 && N != 64), "xxhash3 can only be used in 64 and 128 bit modes."); + using T = typename std::decay_t; + return detail3::xxhash3_impl(static_cast(&*begin), (end - begin) * sizeof(T), + seed); +} + +template +inline hash_t xxhash3(ContiguousIterator begin, + ContiguousIterator end, + const void* secret, + size_t secretSize) { + static_assert(!(bit_mode != 128 && bit_mode != 64), + "xxhash3 can only be used in 64 and 128 bit modes."); + using T = typename std::decay_t; + return detail3::xxhash3_impl(static_cast(&*begin), + (end - begin) * sizeof(T), 0, secret, secretSize); +} + +template +inline hash_t xxhash3(const std::vector& input, uint64_t seed = 0) { + static_assert(!(bit_mode != 128 && bit_mode != 64), + "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.data()), + input.size() * sizeof(T), seed); +} + +template +inline hash_t xxhash3(const std::vector& input, + const void* secret, + size_t secretSize) { + static_assert(!(bit_mode != 128 && bit_mode != 64), + "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.data()), + input.size() * sizeof(T), 0, secret, secretSize); +} + +template +inline hash_t xxhash3(const std::array& input, uint64_t seed = 0) { + static_assert(!(bit_mode != 128 && bit_mode != 64), + "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.data()), AN * sizeof(T), + seed); +} + +template +inline hash_t xxhash3(const std::array& input, + const void* secret, + size_t secretSize) { + static_assert(!(bit_mode != 128 && bit_mode != 64), + "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.data()), AN * sizeof(T), 0, + secret, secretSize); +} + +template +inline hash_t xxhash3(const std::initializer_list& input, uint64_t seed = 0) { + static_assert(!(bit_mode != 128 && bit_mode != 64), + "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.begin()), + input.size() * sizeof(T), seed); +} + +template +inline hash_t xxhash3(const std::initializer_list& input, + const void* secret, + size_t secretSize) { + static_assert(!(bit_mode != 128 && bit_mode != 64), + "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.begin()), + input.size() * sizeof(T), 0, secret, secretSize); +} + +/* ************************************* + * Hash streaming - xxhash + ***************************************/ + +template +class hash_state_t { + uint64_t total_len = 0; + uint_t v1 = 0, v2 = 0, v3 = 0, v4 = 0; + std::array, 4> mem = {0, 0, 0, 0}; + uint32_t memsize = 0; + + inline void update_impl(const void* input, size_t length) { + const uint8_t* p = reinterpret_cast(input); + const uint8_t* const bEnd = p + length; + + total_len += length; + + if (memsize + length < (bit_mode / 2)) { /* fill in tmp buffer */ + memcpy(reinterpret_cast(mem.data()) + memsize, input, length); + memsize += static_cast(length); + return; + } + + if (memsize > 0) { /* some data left from previous update */ + memcpy(reinterpret_cast(mem.data()) + memsize, input, (bit_mode / 2) - memsize); + + const uint_t* ptr = mem.data(); + + v1 = detail::round(v1, mem_ops::readLE(ptr)); + ptr++; + v2 = detail::round(v2, mem_ops::readLE(ptr)); + ptr++; + v3 = detail::round(v3, mem_ops::readLE(ptr)); + ptr++; + v4 = detail::round(v4, mem_ops::readLE(ptr)); + + p += (bit_mode / 2) - memsize; + memsize = 0; + } + + if (p <= bEnd - (bit_mode / 2)) { + const uint8_t* const limit = bEnd - (bit_mode / 2); + + do { + v1 = detail::round(v1, mem_ops::readLE(p)); + p += (bit_mode / 8); + v2 = detail::round(v2, mem_ops::readLE(p)); + p += (bit_mode / 8); + v3 = detail::round(v3, mem_ops::readLE(p)); + p += (bit_mode / 8); + v4 = detail::round(v4, mem_ops::readLE(p)); + p += (bit_mode / 8); + } while (p <= limit); + } + + if (p < bEnd) { + memcpy(mem.data(), p, static_cast(bEnd - p)); + memsize = static_cast(bEnd - p); + } + } + + inline hash_t digest_impl() const { + const uint8_t* p = reinterpret_cast(mem.data()); + const uint8_t* const bEnd = reinterpret_cast(mem.data()) + memsize; + hash_t hash_ret; + + if (total_len >= (bit_mode / 2)) { + hash_ret = bit_ops::rotl(v1, 1) + bit_ops::rotl(v2, 7) + + bit_ops::rotl(v3, 12) + bit_ops::rotl(v4, 18); + + if constexpr (bit_mode == 64) { + detail::endian_align_sub_mergeround(hash_ret, v1, v2, v3, v4); + } + } else { + hash_ret = v3 + detail::PRIME(5); + } + + hash_ret += static_cast>(total_len); + + return detail::endian_align_sub_ending(hash_ret, p, bEnd); + } + + public: + hash_state_t(uint_t seed = 0) { + static_assert(!(bit_mode != 32 && bit_mode != 64), + "xxhash streaming can only be used in 32 and 64 bit modes."); + v1 = seed + detail::PRIME(1) + detail::PRIME(2); + v2 = seed + detail::PRIME(2); + v3 = seed + 0; + v4 = seed - detail::PRIME(1); + }; + + hash_state_t operator=(hash_state_t& other) { + memcpy(this, &other, sizeof(hash_state_t)); + } + + void reset(uint_t seed = 0) { + memset(this, 0, sizeof(hash_state_t)); + v1 = seed + detail::PRIME(1) + detail::PRIME(2); + v2 = seed + detail::PRIME(2); + v3 = seed + 0; + v4 = seed - detail::PRIME(1); + } + + void update(const void* input, size_t length) { return update_impl(input, length); } + + template + void update(const std::basic_string& input) { + return update_impl(static_cast(input.data()), input.length() * sizeof(T)); + } + + template + void update(ContiguousIterator begin, ContiguousIterator end) { + using T = typename std::decay_t; + return update_impl(static_cast(&*begin), (end - begin) * sizeof(T)); + } + + template + void update(const std::vector& input) { + return update_impl(static_cast(input.data()), input.size() * sizeof(T)); + } + + template + void update(const std::array& input) { + return update_impl(static_cast(input.data()), AN * sizeof(T)); + } + + template + void update(const std::initializer_list& input) { + return update_impl(static_cast(input.begin()), input.size() * sizeof(T)); + } + + hash_t digest() const { return digest_impl(); } +}; + +using hash_state32_t = hash_state_t<32>; +using hash_state64_t = hash_state_t<64>; + +/* ************************************* + * Hash streaming - xxhash3 + ***************************************/ + +template +class alignas(64) hash3_state_t { + constexpr static int internal_buffer_size = 256; + constexpr static int internal_buffer_stripes = (internal_buffer_size / detail3::stripe_len); + constexpr static detail3::acc_width accWidth = + (bit_mode == 64) ? detail3::acc_width::acc_64bits : detail3::acc_width::acc_128bits; + + alignas(64) uint64_t acc[8]; + alignas(64) uint8_t + customSecret[detail3::secret_default_size]; /* used to store a custom secret generated from + the seed. Makes state larger. Design might + change */ + alignas(64) uint8_t buffer[internal_buffer_size]; + uint32_t bufferedSize = 0; + uint32_t nbStripesPerBlock = 0; + uint32_t nbStripesSoFar = 0; + uint32_t secretLimit = 0; + uint32_t reserved32 = 0; + uint32_t reserved32_2 = 0; + uint64_t totalLen = 0; + uint64_t seed = 0; + uint64_t reserved64 = 0; + const uint8_t* secret = + nullptr; /* note : there is some padding after, due to alignment on 64 bytes */ + + void consume_stripes(uint64_t* acc, + uint32_t& nbStripesSoFar, + size_t totalStripes, + const uint8_t* input, + detail3::acc_width accWidth) { + if (nbStripesPerBlock - nbStripesSoFar <= totalStripes) /* need a scrambling operation */ + { + size_t const nbStripes = nbStripesPerBlock - nbStripesSoFar; + + detail3::accumulate(acc, input, secret + (nbStripesSoFar * detail3::secret_consume_rate), + nbStripes, accWidth); + detail3::scramble_acc(acc, secret + secretLimit); + detail3::accumulate(acc, input + nbStripes * detail3::stripe_len, secret, + totalStripes - nbStripes, accWidth); + nbStripesSoFar = (uint32_t)(totalStripes - nbStripes); + } else { + detail3::accumulate(acc, input, secret + (nbStripesSoFar * detail3::secret_consume_rate), + totalStripes, accWidth); + nbStripesSoFar += (uint32_t)totalStripes; + } + } + + void update_impl(const void* input_, size_t len) { + const uint8_t* input = static_cast(input_); + const uint8_t* const bEnd = input + len; + + totalLen += len; + + if (bufferedSize + len <= internal_buffer_size) { /* fill in tmp buffer */ + memcpy(buffer + bufferedSize, input, len); + bufferedSize += (uint32_t)len; + return; + } + /* input now > XXH3_INTERNALBUFFER_SIZE */ + + if (bufferedSize > 0) { /* some input within internal buffer: fill then consume it */ + size_t const loadSize = internal_buffer_size - bufferedSize; + + memcpy(buffer + bufferedSize, input, loadSize); + input += loadSize; + consume_stripes(acc, nbStripesSoFar, internal_buffer_stripes, buffer, accWidth); + bufferedSize = 0; + } + + /* consume input by full buffer quantities */ + if (input + internal_buffer_size <= bEnd) { + const uint8_t* const limit = bEnd - internal_buffer_size; + + do { + consume_stripes(acc, nbStripesSoFar, internal_buffer_stripes, input, accWidth); + input += internal_buffer_size; + } while (input <= limit); + } + + if (input < bEnd) { /* some remaining input input : buffer it */ + memcpy(buffer, input, (size_t)(bEnd - input)); + bufferedSize = (uint32_t)(bEnd - input); + } + } + + void digest_long(uint64_t* acc_, detail3::acc_width accWidth) { + memcpy(acc_, acc, sizeof(acc)); /* digest locally, state remains unaltered, and can continue + ingesting more input afterwards */ + + if (bufferedSize >= detail3::stripe_len) { + size_t const totalNbStripes = bufferedSize / detail3::stripe_len; + uint32_t nbStripesSoFar = this->nbStripesSoFar; + + consume_stripes(acc_, nbStripesSoFar, totalNbStripes, buffer, accWidth); + + if (bufferedSize % detail3::stripe_len) { /* one last partial stripe */ + detail3::accumulate_512( + acc_, buffer + bufferedSize - detail3::stripe_len, + secret + secretLimit - detail3::secret_lastacc_start, accWidth); + } + } else { /* bufferedSize < STRIPE_LEN */ + if (bufferedSize > 0) { /* one last stripe */ + uint8_t lastStripe[detail3::stripe_len]; + size_t const catchupSize = detail3::stripe_len - bufferedSize; + memcpy(lastStripe, buffer + sizeof(buffer) - catchupSize, catchupSize); + memcpy(lastStripe + catchupSize, buffer, bufferedSize); + detail3::accumulate_512( + acc_, lastStripe, secret + secretLimit - detail3::secret_lastacc_start, accWidth); + } + } + } + + public: + hash3_state_t operator=(hash3_state_t& other) { memcpy(this, &other, sizeof(hash3_state_t)); } + + hash3_state_t(uint64_t seed = 0) { + static_assert(!(bit_mode != 128 && bit_mode != 64), + "xxhash3 streaming can only be used in 64 and 128 bit modes."); + reset(seed); + } + + hash3_state_t(const void* secret, size_t secretSize) { + static_assert(!(bit_mode != 128 && bit_mode != 64), + "xxhash3 streaming can only be used in 64 and 128 bit modes."); + reset(secret, secretSize); + } + + void reset(uint64_t seed = 0) { + memset(this, 0, sizeof(*this)); + memcpy(acc, detail3::init_acc.data(), sizeof(detail3::init_acc)); + (*this).seed = seed; + + if (seed == 0) { + secret = detail3::default_secret; + } else { + detail3::init_custom_secret(customSecret, seed); + secret = customSecret; + } + + secretLimit = (uint32_t)(detail3::secret_default_size - detail3::stripe_len); + nbStripesPerBlock = secretLimit / detail3::secret_consume_rate; + } + + void reset(const void* secret, size_t secretSize) { + memset(this, 0, sizeof(*this)); + memcpy(acc, detail3::init_acc.data(), sizeof(detail3::init_acc)); + seed = 0; + + (*this).secret = (const uint8_t*)secret; + secretLimit = (uint32_t)(secretSize - detail3::stripe_len); + nbStripesPerBlock = secretLimit / detail3::secret_consume_rate; + } + + void update(const void* input, size_t len) { + return update_impl(static_cast(input), len); + } + + template + void update(const std::basic_string& input) { + return update_impl(static_cast(input.data()), input.length() * sizeof(T)); + } + + template + void update(ContiguousIterator begin, ContiguousIterator end) { + using T = typename std::decay_t; + return update_impl(static_cast(&*begin), (end - begin) * sizeof(T)); + } + + template + void update(const std::vector& input) { + return update_impl(static_cast(input.data()), input.size() * sizeof(T)); + } + + template + void update(const std::array& input) { + return update_impl(static_cast(input.data()), AN * sizeof(T)); + } + + template + void update(const std::initializer_list& input) { + return update_impl(static_cast(input.begin()), input.size() * sizeof(T)); + } + + hash_t digest() { + if (totalLen > detail3::midsize_max) { + alignas(detail3::acc_align) hash64_t acc[detail3::acc_nb]; + + digest_long(acc, accWidth); + + if constexpr (bit_mode == 64) { + return detail3::merge_accs(acc, secret + detail3::secret_mergeaccs_start, + (uint64_t)totalLen * detail::PRIME<64>(1)); + } else { + uint64_t const low64 = detail3::merge_accs(acc, secret + detail3::secret_mergeaccs_start, + (uint64_t)totalLen * detail::PRIME<64>(1)); + uint64_t const high64 = + detail3::merge_accs(acc, + secret + secretLimit + detail3::stripe_len - sizeof(acc) - + detail3::secret_mergeaccs_start, + ~((uint64_t)totalLen * detail::PRIME<64>(2))); + + return {low64, high64}; + } + } else { + return detail3::xxhash3_impl(buffer, totalLen, seed, secret, + secretLimit + detail3::stripe_len); + } + } +}; + +using hash3_state64_t = hash3_state_t<64>; +using hash3_state128_t = hash3_state_t<128>; + +/* ************************************* + * Canonical represenation + ***************************************/ + +template +struct canonical_t { + std::array digest{0}; + + canonical_t(hash_t hash) { + if constexpr (bit_mode < 128) { + if (mem_ops::is_little_endian()) { + hash = bit_ops::swap(hash); + } + + memcpy(digest.data(), &hash, sizeof(canonical_t)); + } else { + if (mem_ops::is_little_endian()) { + hash.low64 = bit_ops::swap<64>(hash.low64); + hash.high64 = bit_ops::swap<64>(hash.high64); + } + + memcpy(digest.data(), &hash.high64, sizeof(hash.high64)); + memcpy(digest.data() + sizeof(hash.high64), &hash.low64, sizeof(hash.low64)); + } + } + + hash_t get_hash() const { + if constexpr (bit_mode < 128) { + return mem_ops::readBE(&digest); + } else { + return {mem_ops::readBE<64>(&digest[8]), mem_ops::readBE<64>(&digest)}; + } + } +}; + +using canonical32_t = canonical_t<32>; +using canonical64_t = canonical_t<64>; +using canonical128_t = canonical_t<128>; +} // namespace xxh diff --git a/vendor.yaml b/vendor.yaml index d6324d093..b993bb4c9 100644 --- a/vendor.yaml +++ b/vendor.yaml @@ -8,3 +8,7 @@ third-party/discord-rpc: sha: 963aa9f3e5ce81a4682c6ca3d136cddda614db33 third-party/fpng: sha: bfe5f9c69e93b99b31268c10db8e645c9125a07f +third-party/CLI11.hpp: + git: https://github.com/CLIUtils/CLI11/tree/v2.2.0 +third-party/xxhash.hpp: + git: https://github.com/RedSpah/xxhash_cpp/tree/0.7.3