From ad0f94e97382f818c1bde6e22df8fbf4b78e7821 Mon Sep 17 00:00:00 2001 From: Kawe Mazidjatari <48657826+Mauler125@users.noreply.github.com> Date: Sat, 4 Jun 2022 13:55:24 +0200 Subject: [PATCH] Implement data sharing algorithm in VPK packing This reduces size of the resulting VPK (checks entry block in hash map). mp_common compresses to 32mb instead of 37.3mb (original is 38.5mb). --- r5dev/mathlib/sha1.cpp | 269 ++++++++++++++++++++++++++ r5dev/mathlib/sha1.h | 57 ++++++ r5dev/public/binstream.cpp | 9 + r5dev/public/include/binstream.h | 2 +- r5dev/vpklib/packedstore.cpp | 57 ++++-- r5dev/vpklib/packedstore.h | 1 + r5dev/vproj/clientsdk.vcxproj | 2 + r5dev/vproj/clientsdk.vcxproj.filters | 6 + r5dev/vproj/dedicated.vcxproj | 2 + r5dev/vproj/dedicated.vcxproj.filters | 6 + r5dev/vproj/gamesdk.vcxproj | 2 + r5dev/vproj/gamesdk.vcxproj.filters | 6 + 12 files changed, 403 insertions(+), 16 deletions(-) create mode 100644 r5dev/mathlib/sha1.cpp create mode 100644 r5dev/mathlib/sha1.h diff --git a/r5dev/mathlib/sha1.cpp b/r5dev/mathlib/sha1.cpp new file mode 100644 index 00000000..a8c871a7 --- /dev/null +++ b/r5dev/mathlib/sha1.cpp @@ -0,0 +1,269 @@ +/* + sha1.cpp - source code of + + ============ + SHA-1 in C++ + ============ + + 100% Public Domain. + + Original C Code + -- Steve Reid + Small changes to fit into bglibs + -- Bruce Guenter + Translation to simpler C++ Code + -- Volker Grabsch +*/ + +#include "core/stdafx.h" +#include "mathlib/sha1.h" + +/* Help macros */ +#define SHA1_ROL(value, bits) (((value) << (bits)) | (((value) & 0xffffffff) >> (32 - (bits)))) +#define SHA1_BLK(i) (block[i&15] = SHA1_ROL(block[(i+13)&15] ^ block[(i+8)&15] ^ block[(i+2)&15] ^ block[i&15],1)) + +/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ +#define SHA1_R0(v,w,x,y,z,i) z += ((w&(x^y))^y) + block[i] + 0x5a827999 + SHA1_ROL(v,5); w=SHA1_ROL(w,30); +#define SHA1_R1(v,w,x,y,z,i) z += ((w&(x^y))^y) + SHA1_BLK(i) + 0x5a827999 + SHA1_ROL(v,5); w=SHA1_ROL(w,30); +#define SHA1_R2(v,w,x,y,z,i) z += (w^x^y) + SHA1_BLK(i) + 0x6ed9eba1 + SHA1_ROL(v,5); w=SHA1_ROL(w,30); +#define SHA1_R3(v,w,x,y,z,i) z += (((w|x)&y)|(w&x)) + SHA1_BLK(i) + 0x8f1bbcdc + SHA1_ROL(v,5); w=SHA1_ROL(w,30); +#define SHA1_R4(v,w,x,y,z,i) z += (w^x^y) + SHA1_BLK(i) + 0xca62c1d6 + SHA1_ROL(v,5); w=SHA1_ROL(w,30); + +SHA1::SHA1() +{ + reset(); +} + + +void SHA1::update(const std::string &s) +{ + std::istringstream is(s); + update(is); +} + + +void SHA1::update(std::istream &is) +{ + std::string rest_of_buffer; + read(is, rest_of_buffer, BLOCK_BYTES - buffer.size()); + buffer += rest_of_buffer; + + while (is) + { + uint32 block[BLOCK_INTS]; + buffer_to_block(buffer, block); + transform(block); + read(is, buffer, BLOCK_BYTES); + } +} + + +/* + * Add padding and return the message digest. + */ + +std::string SHA1::final() +{ + /* Total number of hashed bits */ + uint64 total_bits = (transforms*BLOCK_BYTES + buffer.size()) * 8; + + /* Padding */ + buffer += 0x80; + unsigned int orig_size = buffer.size(); + while (buffer.size() < BLOCK_BYTES) + { + buffer += (char)0x00; + } + + uint32 block[BLOCK_INTS]; + buffer_to_block(buffer, block); + + if (orig_size > BLOCK_BYTES - 8) + { + transform(block); + for (unsigned int i = 0; i < BLOCK_INTS - 2; i++) + { + block[i] = 0; + } + } + + /* Append total_bits, split this uint64 into two uint32 */ + block[BLOCK_INTS - 1] = total_bits; + block[BLOCK_INTS - 2] = (total_bits >> 32); + transform(block); + + /* Hex std::string */ + std::ostringstream result; + for (unsigned int i = 0; i < DIGEST_INTS; i++) + { + result << std::hex << std::setfill('0') << std::setw(8); + result << (digest[i] & 0xffffffff); + } + + /* Reset for next run */ + reset(); + + return result.str(); +} + + +std::string SHA1::from_file(const std::string &filename) +{ + std::ifstream stream(filename.c_str(), std::ios::binary); + SHA1 checksum; + checksum.update(stream); + return checksum.final(); +} + + +void SHA1::reset() +{ + /* SHA1 initialization constants */ + digest[0] = 0x67452301; + digest[1] = 0xefcdab89; + digest[2] = 0x98badcfe; + digest[3] = 0x10325476; + digest[4] = 0xc3d2e1f0; + + /* Reset counters */ + transforms = 0; + buffer = ""; +} + + +/* + * Hash a single 512-bit block. This is the core of the algorithm. + */ + +void SHA1::transform(uint32 block[BLOCK_BYTES]) +{ + /* Copy digest[] to working vars */ + uint32 a = digest[0]; + uint32 b = digest[1]; + uint32 c = digest[2]; + uint32 d = digest[3]; + uint32 e = digest[4]; + + + /* 4 rounds of 20 operations each. Loop unrolled. */ + SHA1_R0(a,b,c,d,e, 0); + SHA1_R0(e,a,b,c,d, 1); + SHA1_R0(d,e,a,b,c, 2); + SHA1_R0(c,d,e,a,b, 3); + SHA1_R0(b,c,d,e,a, 4); + SHA1_R0(a,b,c,d,e, 5); + SHA1_R0(e,a,b,c,d, 6); + SHA1_R0(d,e,a,b,c, 7); + SHA1_R0(c,d,e,a,b, 8); + SHA1_R0(b,c,d,e,a, 9); + SHA1_R0(a,b,c,d,e,10); + SHA1_R0(e,a,b,c,d,11); + SHA1_R0(d,e,a,b,c,12); + SHA1_R0(c,d,e,a,b,13); + SHA1_R0(b,c,d,e,a,14); + SHA1_R0(a,b,c,d,e,15); + SHA1_R1(e,a,b,c,d,16); + SHA1_R1(d,e,a,b,c,17); + SHA1_R1(c,d,e,a,b,18); + SHA1_R1(b,c,d,e,a,19); + SHA1_R2(a,b,c,d,e,20); + SHA1_R2(e,a,b,c,d,21); + SHA1_R2(d,e,a,b,c,22); + SHA1_R2(c,d,e,a,b,23); + SHA1_R2(b,c,d,e,a,24); + SHA1_R2(a,b,c,d,e,25); + SHA1_R2(e,a,b,c,d,26); + SHA1_R2(d,e,a,b,c,27); + SHA1_R2(c,d,e,a,b,28); + SHA1_R2(b,c,d,e,a,29); + SHA1_R2(a,b,c,d,e,30); + SHA1_R2(e,a,b,c,d,31); + SHA1_R2(d,e,a,b,c,32); + SHA1_R2(c,d,e,a,b,33); + SHA1_R2(b,c,d,e,a,34); + SHA1_R2(a,b,c,d,e,35); + SHA1_R2(e,a,b,c,d,36); + SHA1_R2(d,e,a,b,c,37); + SHA1_R2(c,d,e,a,b,38); + SHA1_R2(b,c,d,e,a,39); + SHA1_R3(a,b,c,d,e,40); + SHA1_R3(e,a,b,c,d,41); + SHA1_R3(d,e,a,b,c,42); + SHA1_R3(c,d,e,a,b,43); + SHA1_R3(b,c,d,e,a,44); + SHA1_R3(a,b,c,d,e,45); + SHA1_R3(e,a,b,c,d,46); + SHA1_R3(d,e,a,b,c,47); + SHA1_R3(c,d,e,a,b,48); + SHA1_R3(b,c,d,e,a,49); + SHA1_R3(a,b,c,d,e,50); + SHA1_R3(e,a,b,c,d,51); + SHA1_R3(d,e,a,b,c,52); + SHA1_R3(c,d,e,a,b,53); + SHA1_R3(b,c,d,e,a,54); + SHA1_R3(a,b,c,d,e,55); + SHA1_R3(e,a,b,c,d,56); + SHA1_R3(d,e,a,b,c,57); + SHA1_R3(c,d,e,a,b,58); + SHA1_R3(b,c,d,e,a,59); + SHA1_R4(a,b,c,d,e,60); + SHA1_R4(e,a,b,c,d,61); + SHA1_R4(d,e,a,b,c,62); + SHA1_R4(c,d,e,a,b,63); + SHA1_R4(b,c,d,e,a,64); + SHA1_R4(a,b,c,d,e,65); + SHA1_R4(e,a,b,c,d,66); + SHA1_R4(d,e,a,b,c,67); + SHA1_R4(c,d,e,a,b,68); + SHA1_R4(b,c,d,e,a,69); + SHA1_R4(a,b,c,d,e,70); + SHA1_R4(e,a,b,c,d,71); + SHA1_R4(d,e,a,b,c,72); + SHA1_R4(c,d,e,a,b,73); + SHA1_R4(b,c,d,e,a,74); + SHA1_R4(a,b,c,d,e,75); + SHA1_R4(e,a,b,c,d,76); + SHA1_R4(d,e,a,b,c,77); + SHA1_R4(c,d,e,a,b,78); + SHA1_R4(b,c,d,e,a,79); + + /* Add the working vars back into digest[] */ + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + + /* Count the number of transformations */ + transforms++; +} + + +void SHA1::buffer_to_block(const std::string &buffer, uint32 block[BLOCK_BYTES]) +{ + /* Convert the std::string (byte buffer) to a uint32 array (MSB) */ + for (unsigned int i = 0; i < BLOCK_INTS; i++) + { + block[i] = (buffer[4*i+3] & 0xff) + | (buffer[4*i+2] & 0xff)<<8 + | (buffer[4*i+1] & 0xff)<<16 + | (buffer[4*i+0] & 0xff)<<24; + } +} + + +void SHA1::read(std::istream &is, std::string &s, int max) +{ + char* sbuf = new char[max]; + is.read(sbuf, max); + s.assign(sbuf, is.gcount()); + delete[] sbuf; +} + + +std::string sha1(const std::string &string) +{ + SHA1 checksum; + checksum.update(string); + return checksum.final(); +} \ No newline at end of file diff --git a/r5dev/mathlib/sha1.h b/r5dev/mathlib/sha1.h new file mode 100644 index 00000000..ef5cf152 --- /dev/null +++ b/r5dev/mathlib/sha1.h @@ -0,0 +1,57 @@ +/* + sha1.h - header of + + ============ + SHA-1 in C++ + ============ + + 100% Public Domain. + + Original C Code + -- Steve Reid + Small changes to fit into bglibs + -- Bruce Guenter + Translation to simpler C++ Code + -- Volker Grabsch +*/ + +#ifndef SHA1_HPP +#define SHA1_HPP + + +#include +#include + +class SHA1 +{ +public: + SHA1(); + void update(const std::string &s); + void update(std::istream &is); + std::string final(); + static std::string from_file(const std::string &filename); + +private: + typedef unsigned long int uint32; /* just needs to be at least 32bit */ + typedef unsigned long long uint64; /* just needs to be at least 64bit */ + + static const unsigned int DIGEST_INTS = 5; /* number of 32bit integers per SHA1 digest */ + static const unsigned int BLOCK_INTS = 16; /* number of 32bit integers per SHA1 block */ + static const unsigned int BLOCK_BYTES = BLOCK_INTS * 4; + + uint32 digest[DIGEST_INTS]; + std::string buffer; + uint64 transforms; + + void reset(); + void transform(uint32 block[BLOCK_BYTES]); + + static void buffer_to_block(const std::string &buffer, uint32 block[BLOCK_BYTES]); + static void read(std::istream &is, std::string &s, int max); +}; + +std::string sha1(const std::string &string); + + + +#endif /* SHA1_HPP */ \ No newline at end of file diff --git a/r5dev/public/binstream.cpp b/r5dev/public/binstream.cpp index c6ed6e39..77f1abf4 100644 --- a/r5dev/public/binstream.cpp +++ b/r5dev/public/binstream.cpp @@ -103,6 +103,15 @@ void CIOStream::Close() } } +//----------------------------------------------------------------------------- +// Purpose: flushes the ofstream +//----------------------------------------------------------------------------- +void CIOStream::Flush() +{ + if (IsWritable()) + m_oStream.flush(); +} + //----------------------------------------------------------------------------- // Purpose: gets the possition of the current character in the stream //----------------------------------------------------------------------------- diff --git a/r5dev/public/include/binstream.h b/r5dev/public/include/binstream.h index a4d39f29..e2834214 100644 --- a/r5dev/public/include/binstream.h +++ b/r5dev/public/include/binstream.h @@ -16,6 +16,7 @@ public: bool Open(const string& svFileFullPath, Mode_t eMode); void Close(); + void Flush(); size_t GetPosition(); void SetPosition(int64_t nOffset); @@ -86,7 +87,6 @@ public: return; m_oStream.write(reinterpret_cast(tValue), nSize); - m_oStream.flush(); } void WriteString(string svInput); diff --git a/r5dev/vpklib/packedstore.cpp b/r5dev/vpklib/packedstore.cpp index a7d1f0e7..2fbe5603 100644 --- a/r5dev/vpklib/packedstore.cpp +++ b/r5dev/vpklib/packedstore.cpp @@ -11,6 +11,7 @@ #include "tier1/cvar.h" #include "mathlib/adler32.h" #include "mathlib/crc32.h" +#include "mathlib/sha1.h" #include "vpklib/packedstore.h" //----------------------------------------------------------------------------- @@ -300,10 +301,11 @@ void CPackedStore::BuildManifest(const vector& vBlock, const st { jEntry[vBlock[i].m_svBlockPath] = { - { "preloadData", vBlock[i].m_nPreloadData}, - { "entryFlags", vBlock[i].m_vvEntries[0].m_nEntryFlags}, - { "textureFlags", vBlock[i].m_vvEntries[0].m_nTextureFlags}, - { "useCompression", vBlock[i].m_vvEntries[0].m_nCompressedSize != vBlock[i].m_vvEntries[0].m_nUncompressedSize} + { "preloadData", vBlock[i].m_nPreloadData }, + { "entryFlags", vBlock[i].m_vvEntries[0].m_nEntryFlags }, + { "textureFlags", vBlock[i].m_vvEntries[0].m_nTextureFlags }, + { "useCompression", vBlock[i].m_vvEntries[0].m_nCompressedSize != vBlock[i].m_vvEntries[0].m_nUncompressedSize }, + { "useDataSharing", true } }; } @@ -378,10 +380,12 @@ void CPackedStore::PackAll(const VPKPair_t& vPair, const string& svPathIn, const CIOStream reader(vPaths[i], CIOStream::Mode_t::READ); if (reader.IsReadable()) { - uint16_t nPreloadData = 0i16; - uint32_t nEntryFlags = static_cast(EPackedEntryFlags::ENTRY_VISIBLE) | static_cast(EPackedEntryFlags::ENTRY_CACHE); + uint16_t nPreloadData = 0i16; + uint32_t nEntryFlags = static_cast(EPackedEntryFlags::ENTRY_VISIBLE) | static_cast(EPackedEntryFlags::ENTRY_CACHE); uint16_t nTextureFlags = static_cast(EPackedTextureFlags::TEXTURE_DEFAULT); // !TODO: Reverse these. - bool bUseCompression = true; + bool bUseCompression = true; + bool bUseDataSharing = true; + string svEntryHash; if (!jManifest.is_null()) { @@ -394,6 +398,7 @@ void CPackedStore::PackAll(const VPKPair_t& vPair, const string& svPathIn, const nEntryFlags = jEntry.at("entryFlags").get(); nTextureFlags = jEntry.at("textureFlags").get(); bUseCompression = jEntry.at("useCompression").get(); + bUseDataSharing = jEntry.at("useDataSharing").get(); } } catch (const std::exception& ex) @@ -406,7 +411,10 @@ void CPackedStore::PackAll(const VPKPair_t& vPair, const string& svPathIn, const for (size_t j = 0; j < vEntryBlocks[i].m_vvEntries.size(); j++) { uint8_t* pSrc = new uint8_t[vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize]; - uint8_t* pDest = new uint8_t[COMP_MAX];; + uint8_t* pDest = new uint8_t[COMP_MAX]; + + bool bShared = false; + bool bCompressed = bUseCompression; reader.Read(*pSrc, vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize); vEntryBlocks[i].m_vvEntries[j].m_nArchiveOffset = writer.GetPosition(); @@ -420,25 +428,43 @@ void CPackedStore::PackAll(const VPKPair_t& vPair, const string& svPathIn, const Warning(eDLL_T::FS, "'lzham::lzham_lib_compress_memory' returned with status '%d' (entry will be packed without compression).\n", m_lzCompStatus); vEntryBlocks[i].m_vvEntries[j].m_nCompressedSize = vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize; - writer.Write(pSrc, vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize); - } - else - { - writer.Write(pDest, vEntryBlocks[i].m_vvEntries[j].m_nCompressedSize); + memmove(pDest, pSrc, vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize); } } else // Write data uncompressed. { - writer.Write(pSrc, vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize); + vEntryBlocks[i].m_vvEntries[j].m_nCompressedSize = vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize; + memmove(pDest, pSrc, vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize); } vEntryBlocks[i].m_vvEntries[j].m_bIsCompressed = vEntryBlocks[i].m_vvEntries[j].m_nCompressedSize != vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize; + if (bUseDataSharing) + { + svEntryHash = sha1(string(reinterpret_cast(pDest), vEntryBlocks[i].m_vvEntries[j].m_nCompressedSize)); + + if (auto it{ m_mEntryHasMap.find(svEntryHash) }; it != std::end(m_mEntryHasMap)) + { + vEntryBlocks[i].m_vvEntries[j] = it->second; + bShared = true; + } + else + { + m_mEntryHasMap.insert({ svEntryHash, vEntryBlocks[i].m_vvEntries[j] }); + bShared = false; + } + } + if (!bShared) + { + writer.Write(pDest, vEntryBlocks[i].m_vvEntries[j].m_nCompressedSize); + } + delete[] pDest; delete[] pSrc; } } } + m_mEntryHasMap.clear(); VPKDir_t vDir = VPKDir_t(); vDir.Build(svPathOut + vPair.m_svDirectoryName, vEntryBlocks); } @@ -511,11 +537,12 @@ void CPackedStore::UnpackAll(const VPKDir_t& vpkDir, const string& svPathOut) if (m_nEntryCount == vBlock.m_vvEntries.size()) // Only validate after last entry in block had been written. { + m_nEntryCount = 0; m_nCrc32_Internal = vBlock.m_nCrc32; + oStream.Flush(); ValidateCRC32PostDecomp(svFilePath); //ValidateAdler32PostDecomp(svFilePath); - m_nEntryCount = 0; } }escape:; } diff --git a/r5dev/vpklib/packedstore.h b/r5dev/vpklib/packedstore.h index 13e642f1..25bb9737 100644 --- a/r5dev/vpklib/packedstore.h +++ b/r5dev/vpklib/packedstore.h @@ -119,6 +119,7 @@ class CPackedStore lzham_compress_status_t m_lzCompStatus {}; // LZham compression status. lzham_decompress_params m_lzDecompParams {}; // LZham decompression parameters. lzham_decompress_status_t m_lzDecompStatus {}; // LZham decompression status. + std::unordered_map m_mEntryHasMap{}; public: void InitLzCompParams(void); diff --git a/r5dev/vproj/clientsdk.vcxproj b/r5dev/vproj/clientsdk.vcxproj index e0623e85..a81fafeb 100644 --- a/r5dev/vproj/clientsdk.vcxproj +++ b/r5dev/vproj/clientsdk.vcxproj @@ -60,6 +60,7 @@ + @@ -189,6 +190,7 @@ + diff --git a/r5dev/vproj/clientsdk.vcxproj.filters b/r5dev/vproj/clientsdk.vcxproj.filters index 04003d47..09db07e5 100644 --- a/r5dev/vproj/clientsdk.vcxproj.filters +++ b/r5dev/vproj/clientsdk.vcxproj.filters @@ -504,6 +504,9 @@ sdk\squirrel + + sdk\mathlib + @@ -1508,6 +1511,9 @@ sdk\squirrel + + sdk\mathlib + diff --git a/r5dev/vproj/dedicated.vcxproj b/r5dev/vproj/dedicated.vcxproj index f9ee8536..4e6e5534 100644 --- a/r5dev/vproj/dedicated.vcxproj +++ b/r5dev/vproj/dedicated.vcxproj @@ -190,6 +190,7 @@ + @@ -478,6 +479,7 @@ + diff --git a/r5dev/vproj/dedicated.vcxproj.filters b/r5dev/vproj/dedicated.vcxproj.filters index 35c87b66..652a8af2 100644 --- a/r5dev/vproj/dedicated.vcxproj.filters +++ b/r5dev/vproj/dedicated.vcxproj.filters @@ -1119,6 +1119,9 @@ sdk\squirrel + + sdk\mathlib + @@ -1373,6 +1376,9 @@ sdk\squirrel + + sdk\mathlib + diff --git a/r5dev/vproj/gamesdk.vcxproj b/r5dev/vproj/gamesdk.vcxproj index 1548a655..96922216 100644 --- a/r5dev/vproj/gamesdk.vcxproj +++ b/r5dev/vproj/gamesdk.vcxproj @@ -66,6 +66,7 @@ + @@ -205,6 +206,7 @@ + diff --git a/r5dev/vproj/gamesdk.vcxproj.filters b/r5dev/vproj/gamesdk.vcxproj.filters index 0856daea..9c7830a7 100644 --- a/r5dev/vproj/gamesdk.vcxproj.filters +++ b/r5dev/vproj/gamesdk.vcxproj.filters @@ -534,6 +534,9 @@ sdk\squirrel + + sdk\mathlib + @@ -1571,6 +1574,9 @@ sdk\squirrel + + sdk\mathlib +