Implement data sharing algorithm in VPK packing

This reduces size of the resulting VPK (checks entry block in hash map).
mp_common compresses to 32mb instead of 37.3mb (original is 38.5mb).
This commit is contained in:
Kawe Mazidjatari 2022-06-04 13:55:24 +02:00
parent 3dd96964e3
commit ad0f94e973
12 changed files with 403 additions and 16 deletions

269
r5dev/mathlib/sha1.cpp Normal file
View File

@ -0,0 +1,269 @@
/*
sha1.cpp - source code of
============
SHA-1 in C++
============
100% Public Domain.
Original C Code
-- Steve Reid <steve@edmweb.com>
Small changes to fit into bglibs
-- Bruce Guenter <bruce@untroubled.org>
Translation to simpler C++ Code
-- Volker Grabsch <vog@notjusthosting.com>
*/
#include "core/stdafx.h"
#include "mathlib/sha1.h"
/* Help macros */
#define SHA1_ROL(value, bits) (((value) << (bits)) | (((value) & 0xffffffff) >> (32 - (bits))))
#define SHA1_BLK(i) (block[i&15] = SHA1_ROL(block[(i+13)&15] ^ block[(i+8)&15] ^ block[(i+2)&15] ^ block[i&15],1))
/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
#define SHA1_R0(v,w,x,y,z,i) z += ((w&(x^y))^y) + block[i] + 0x5a827999 + SHA1_ROL(v,5); w=SHA1_ROL(w,30);
#define SHA1_R1(v,w,x,y,z,i) z += ((w&(x^y))^y) + SHA1_BLK(i) + 0x5a827999 + SHA1_ROL(v,5); w=SHA1_ROL(w,30);
#define SHA1_R2(v,w,x,y,z,i) z += (w^x^y) + SHA1_BLK(i) + 0x6ed9eba1 + SHA1_ROL(v,5); w=SHA1_ROL(w,30);
#define SHA1_R3(v,w,x,y,z,i) z += (((w|x)&y)|(w&x)) + SHA1_BLK(i) + 0x8f1bbcdc + SHA1_ROL(v,5); w=SHA1_ROL(w,30);
#define SHA1_R4(v,w,x,y,z,i) z += (w^x^y) + SHA1_BLK(i) + 0xca62c1d6 + SHA1_ROL(v,5); w=SHA1_ROL(w,30);
SHA1::SHA1()
{
reset();
}
void SHA1::update(const std::string &s)
{
std::istringstream is(s);
update(is);
}
void SHA1::update(std::istream &is)
{
std::string rest_of_buffer;
read(is, rest_of_buffer, BLOCK_BYTES - buffer.size());
buffer += rest_of_buffer;
while (is)
{
uint32 block[BLOCK_INTS];
buffer_to_block(buffer, block);
transform(block);
read(is, buffer, BLOCK_BYTES);
}
}
/*
* Add padding and return the message digest.
*/
std::string SHA1::final()
{
/* Total number of hashed bits */
uint64 total_bits = (transforms*BLOCK_BYTES + buffer.size()) * 8;
/* Padding */
buffer += 0x80;
unsigned int orig_size = buffer.size();
while (buffer.size() < BLOCK_BYTES)
{
buffer += (char)0x00;
}
uint32 block[BLOCK_INTS];
buffer_to_block(buffer, block);
if (orig_size > BLOCK_BYTES - 8)
{
transform(block);
for (unsigned int i = 0; i < BLOCK_INTS - 2; i++)
{
block[i] = 0;
}
}
/* Append total_bits, split this uint64 into two uint32 */
block[BLOCK_INTS - 1] = total_bits;
block[BLOCK_INTS - 2] = (total_bits >> 32);
transform(block);
/* Hex std::string */
std::ostringstream result;
for (unsigned int i = 0; i < DIGEST_INTS; i++)
{
result << std::hex << std::setfill('0') << std::setw(8);
result << (digest[i] & 0xffffffff);
}
/* Reset for next run */
reset();
return result.str();
}
std::string SHA1::from_file(const std::string &filename)
{
std::ifstream stream(filename.c_str(), std::ios::binary);
SHA1 checksum;
checksum.update(stream);
return checksum.final();
}
void SHA1::reset()
{
/* SHA1 initialization constants */
digest[0] = 0x67452301;
digest[1] = 0xefcdab89;
digest[2] = 0x98badcfe;
digest[3] = 0x10325476;
digest[4] = 0xc3d2e1f0;
/* Reset counters */
transforms = 0;
buffer = "";
}
/*
* Hash a single 512-bit block. This is the core of the algorithm.
*/
void SHA1::transform(uint32 block[BLOCK_BYTES])
{
/* Copy digest[] to working vars */
uint32 a = digest[0];
uint32 b = digest[1];
uint32 c = digest[2];
uint32 d = digest[3];
uint32 e = digest[4];
/* 4 rounds of 20 operations each. Loop unrolled. */
SHA1_R0(a,b,c,d,e, 0);
SHA1_R0(e,a,b,c,d, 1);
SHA1_R0(d,e,a,b,c, 2);
SHA1_R0(c,d,e,a,b, 3);
SHA1_R0(b,c,d,e,a, 4);
SHA1_R0(a,b,c,d,e, 5);
SHA1_R0(e,a,b,c,d, 6);
SHA1_R0(d,e,a,b,c, 7);
SHA1_R0(c,d,e,a,b, 8);
SHA1_R0(b,c,d,e,a, 9);
SHA1_R0(a,b,c,d,e,10);
SHA1_R0(e,a,b,c,d,11);
SHA1_R0(d,e,a,b,c,12);
SHA1_R0(c,d,e,a,b,13);
SHA1_R0(b,c,d,e,a,14);
SHA1_R0(a,b,c,d,e,15);
SHA1_R1(e,a,b,c,d,16);
SHA1_R1(d,e,a,b,c,17);
SHA1_R1(c,d,e,a,b,18);
SHA1_R1(b,c,d,e,a,19);
SHA1_R2(a,b,c,d,e,20);
SHA1_R2(e,a,b,c,d,21);
SHA1_R2(d,e,a,b,c,22);
SHA1_R2(c,d,e,a,b,23);
SHA1_R2(b,c,d,e,a,24);
SHA1_R2(a,b,c,d,e,25);
SHA1_R2(e,a,b,c,d,26);
SHA1_R2(d,e,a,b,c,27);
SHA1_R2(c,d,e,a,b,28);
SHA1_R2(b,c,d,e,a,29);
SHA1_R2(a,b,c,d,e,30);
SHA1_R2(e,a,b,c,d,31);
SHA1_R2(d,e,a,b,c,32);
SHA1_R2(c,d,e,a,b,33);
SHA1_R2(b,c,d,e,a,34);
SHA1_R2(a,b,c,d,e,35);
SHA1_R2(e,a,b,c,d,36);
SHA1_R2(d,e,a,b,c,37);
SHA1_R2(c,d,e,a,b,38);
SHA1_R2(b,c,d,e,a,39);
SHA1_R3(a,b,c,d,e,40);
SHA1_R3(e,a,b,c,d,41);
SHA1_R3(d,e,a,b,c,42);
SHA1_R3(c,d,e,a,b,43);
SHA1_R3(b,c,d,e,a,44);
SHA1_R3(a,b,c,d,e,45);
SHA1_R3(e,a,b,c,d,46);
SHA1_R3(d,e,a,b,c,47);
SHA1_R3(c,d,e,a,b,48);
SHA1_R3(b,c,d,e,a,49);
SHA1_R3(a,b,c,d,e,50);
SHA1_R3(e,a,b,c,d,51);
SHA1_R3(d,e,a,b,c,52);
SHA1_R3(c,d,e,a,b,53);
SHA1_R3(b,c,d,e,a,54);
SHA1_R3(a,b,c,d,e,55);
SHA1_R3(e,a,b,c,d,56);
SHA1_R3(d,e,a,b,c,57);
SHA1_R3(c,d,e,a,b,58);
SHA1_R3(b,c,d,e,a,59);
SHA1_R4(a,b,c,d,e,60);
SHA1_R4(e,a,b,c,d,61);
SHA1_R4(d,e,a,b,c,62);
SHA1_R4(c,d,e,a,b,63);
SHA1_R4(b,c,d,e,a,64);
SHA1_R4(a,b,c,d,e,65);
SHA1_R4(e,a,b,c,d,66);
SHA1_R4(d,e,a,b,c,67);
SHA1_R4(c,d,e,a,b,68);
SHA1_R4(b,c,d,e,a,69);
SHA1_R4(a,b,c,d,e,70);
SHA1_R4(e,a,b,c,d,71);
SHA1_R4(d,e,a,b,c,72);
SHA1_R4(c,d,e,a,b,73);
SHA1_R4(b,c,d,e,a,74);
SHA1_R4(a,b,c,d,e,75);
SHA1_R4(e,a,b,c,d,76);
SHA1_R4(d,e,a,b,c,77);
SHA1_R4(c,d,e,a,b,78);
SHA1_R4(b,c,d,e,a,79);
/* Add the working vars back into digest[] */
digest[0] += a;
digest[1] += b;
digest[2] += c;
digest[3] += d;
digest[4] += e;
/* Count the number of transformations */
transforms++;
}
void SHA1::buffer_to_block(const std::string &buffer, uint32 block[BLOCK_BYTES])
{
/* Convert the std::string (byte buffer) to a uint32 array (MSB) */
for (unsigned int i = 0; i < BLOCK_INTS; i++)
{
block[i] = (buffer[4*i+3] & 0xff)
| (buffer[4*i+2] & 0xff)<<8
| (buffer[4*i+1] & 0xff)<<16
| (buffer[4*i+0] & 0xff)<<24;
}
}
void SHA1::read(std::istream &is, std::string &s, int max)
{
char* sbuf = new char[max];
is.read(sbuf, max);
s.assign(sbuf, is.gcount());
delete[] sbuf;
}
std::string sha1(const std::string &string)
{
SHA1 checksum;
checksum.update(string);
return checksum.final();
}

57
r5dev/mathlib/sha1.h Normal file
View File

@ -0,0 +1,57 @@
/*
sha1.h - header of
============
SHA-1 in C++
============
100% Public Domain.
Original C Code
-- Steve Reid <steve@edmweb.com>
Small changes to fit into bglibs
-- Bruce Guenter <bruce@untroubled.org>
Translation to simpler C++ Code
-- Volker Grabsch <vog@notjusthosting.com>
*/
#ifndef SHA1_HPP
#define SHA1_HPP
#include <iostream>
#include <string>
class SHA1
{
public:
SHA1();
void update(const std::string &s);
void update(std::istream &is);
std::string final();
static std::string from_file(const std::string &filename);
private:
typedef unsigned long int uint32; /* just needs to be at least 32bit */
typedef unsigned long long uint64; /* just needs to be at least 64bit */
static const unsigned int DIGEST_INTS = 5; /* number of 32bit integers per SHA1 digest */
static const unsigned int BLOCK_INTS = 16; /* number of 32bit integers per SHA1 block */
static const unsigned int BLOCK_BYTES = BLOCK_INTS * 4;
uint32 digest[DIGEST_INTS];
std::string buffer;
uint64 transforms;
void reset();
void transform(uint32 block[BLOCK_BYTES]);
static void buffer_to_block(const std::string &buffer, uint32 block[BLOCK_BYTES]);
static void read(std::istream &is, std::string &s, int max);
};
std::string sha1(const std::string &string);
#endif /* SHA1_HPP */

View File

@ -103,6 +103,15 @@ void CIOStream::Close()
}
}
//-----------------------------------------------------------------------------
// Purpose: flushes the ofstream
//-----------------------------------------------------------------------------
void CIOStream::Flush()
{
if (IsWritable())
m_oStream.flush();
}
//-----------------------------------------------------------------------------
// Purpose: gets the possition of the current character in the stream
//-----------------------------------------------------------------------------

View File

@ -16,6 +16,7 @@ public:
bool Open(const string& svFileFullPath, Mode_t eMode);
void Close();
void Flush();
size_t GetPosition();
void SetPosition(int64_t nOffset);
@ -86,7 +87,6 @@ public:
return;
m_oStream.write(reinterpret_cast<const char*>(tValue), nSize);
m_oStream.flush();
}
void WriteString(string svInput);

View File

@ -11,6 +11,7 @@
#include "tier1/cvar.h"
#include "mathlib/adler32.h"
#include "mathlib/crc32.h"
#include "mathlib/sha1.h"
#include "vpklib/packedstore.h"
//-----------------------------------------------------------------------------
@ -300,10 +301,11 @@ void CPackedStore::BuildManifest(const vector<VPKEntryBlock_t>& vBlock, const st
{
jEntry[vBlock[i].m_svBlockPath] =
{
{ "preloadData", vBlock[i].m_nPreloadData},
{ "entryFlags", vBlock[i].m_vvEntries[0].m_nEntryFlags},
{ "textureFlags", vBlock[i].m_vvEntries[0].m_nTextureFlags},
{ "useCompression", vBlock[i].m_vvEntries[0].m_nCompressedSize != vBlock[i].m_vvEntries[0].m_nUncompressedSize}
{ "preloadData", vBlock[i].m_nPreloadData },
{ "entryFlags", vBlock[i].m_vvEntries[0].m_nEntryFlags },
{ "textureFlags", vBlock[i].m_vvEntries[0].m_nTextureFlags },
{ "useCompression", vBlock[i].m_vvEntries[0].m_nCompressedSize != vBlock[i].m_vvEntries[0].m_nUncompressedSize },
{ "useDataSharing", true }
};
}
@ -378,10 +380,12 @@ void CPackedStore::PackAll(const VPKPair_t& vPair, const string& svPathIn, const
CIOStream reader(vPaths[i], CIOStream::Mode_t::READ);
if (reader.IsReadable())
{
uint16_t nPreloadData = 0i16;
uint32_t nEntryFlags = static_cast<uint32_t>(EPackedEntryFlags::ENTRY_VISIBLE) | static_cast<uint32_t>(EPackedEntryFlags::ENTRY_CACHE);
uint16_t nPreloadData = 0i16;
uint32_t nEntryFlags = static_cast<uint32_t>(EPackedEntryFlags::ENTRY_VISIBLE) | static_cast<uint32_t>(EPackedEntryFlags::ENTRY_CACHE);
uint16_t nTextureFlags = static_cast<short>(EPackedTextureFlags::TEXTURE_DEFAULT); // !TODO: Reverse these.
bool bUseCompression = true;
bool bUseCompression = true;
bool bUseDataSharing = true;
string svEntryHash;
if (!jManifest.is_null())
{
@ -394,6 +398,7 @@ void CPackedStore::PackAll(const VPKPair_t& vPair, const string& svPathIn, const
nEntryFlags = jEntry.at("entryFlags").get<uint32_t>();
nTextureFlags = jEntry.at("textureFlags").get<uint16_t>();
bUseCompression = jEntry.at("useCompression").get<bool>();
bUseDataSharing = jEntry.at("useDataSharing").get<bool>();
}
}
catch (const std::exception& ex)
@ -406,7 +411,10 @@ void CPackedStore::PackAll(const VPKPair_t& vPair, const string& svPathIn, const
for (size_t j = 0; j < vEntryBlocks[i].m_vvEntries.size(); j++)
{
uint8_t* pSrc = new uint8_t[vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize];
uint8_t* pDest = new uint8_t[COMP_MAX];;
uint8_t* pDest = new uint8_t[COMP_MAX];
bool bShared = false;
bool bCompressed = bUseCompression;
reader.Read(*pSrc, vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize);
vEntryBlocks[i].m_vvEntries[j].m_nArchiveOffset = writer.GetPosition();
@ -420,25 +428,43 @@ void CPackedStore::PackAll(const VPKPair_t& vPair, const string& svPathIn, const
Warning(eDLL_T::FS, "'lzham::lzham_lib_compress_memory' returned with status '%d' (entry will be packed without compression).\n", m_lzCompStatus);
vEntryBlocks[i].m_vvEntries[j].m_nCompressedSize = vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize;
writer.Write(pSrc, vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize);
}
else
{
writer.Write(pDest, vEntryBlocks[i].m_vvEntries[j].m_nCompressedSize);
memmove(pDest, pSrc, vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize);
}
}
else // Write data uncompressed.
{
writer.Write(pSrc, vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize);
vEntryBlocks[i].m_vvEntries[j].m_nCompressedSize = vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize;
memmove(pDest, pSrc, vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize);
}
vEntryBlocks[i].m_vvEntries[j].m_bIsCompressed = vEntryBlocks[i].m_vvEntries[j].m_nCompressedSize != vEntryBlocks[i].m_vvEntries[j].m_nUncompressedSize;
if (bUseDataSharing)
{
svEntryHash = sha1(string(reinterpret_cast<char*>(pDest), vEntryBlocks[i].m_vvEntries[j].m_nCompressedSize));
if (auto it{ m_mEntryHasMap.find(svEntryHash) }; it != std::end(m_mEntryHasMap))
{
vEntryBlocks[i].m_vvEntries[j] = it->second;
bShared = true;
}
else
{
m_mEntryHasMap.insert({ svEntryHash, vEntryBlocks[i].m_vvEntries[j] });
bShared = false;
}
}
if (!bShared)
{
writer.Write(pDest, vEntryBlocks[i].m_vvEntries[j].m_nCompressedSize);
}
delete[] pDest;
delete[] pSrc;
}
}
}
m_mEntryHasMap.clear();
VPKDir_t vDir = VPKDir_t();
vDir.Build(svPathOut + vPair.m_svDirectoryName, vEntryBlocks);
}
@ -511,11 +537,12 @@ void CPackedStore::UnpackAll(const VPKDir_t& vpkDir, const string& svPathOut)
if (m_nEntryCount == vBlock.m_vvEntries.size()) // Only validate after last entry in block had been written.
{
m_nEntryCount = 0;
m_nCrc32_Internal = vBlock.m_nCrc32;
oStream.Flush();
ValidateCRC32PostDecomp(svFilePath);
//ValidateAdler32PostDecomp(svFilePath);
m_nEntryCount = 0;
}
}escape:;
}

View File

@ -119,6 +119,7 @@ class CPackedStore
lzham_compress_status_t m_lzCompStatus {}; // LZham compression status.
lzham_decompress_params m_lzDecompParams {}; // LZham decompression parameters.
lzham_decompress_status_t m_lzDecompStatus {}; // LZham decompression status.
std::unordered_map<string, VPKEntryDescriptor_t> m_mEntryHasMap{};
public:
void InitLzCompParams(void);

View File

@ -60,6 +60,7 @@
<ClCompile Include="..\mathlib\bits.cpp" />
<ClCompile Include="..\mathlib\crc32.cpp" />
<ClCompile Include="..\mathlib\IceKey.cpp" />
<ClCompile Include="..\mathlib\sha1.cpp" />
<ClCompile Include="..\mathlib\sha256.cpp" />
<ClCompile Include="..\networksystem\pylon.cpp" />
<ClCompile Include="..\networksystem\r5net.cpp" />
@ -189,6 +190,7 @@
<ClInclude Include="..\mathlib\crc32.h" />
<ClInclude Include="..\mathlib\IceKey.H" />
<ClInclude Include="..\mathlib\parallel_for.h" />
<ClInclude Include="..\mathlib\sha1.h" />
<ClInclude Include="..\mathlib\sha256.h" />
<ClInclude Include="..\mathlib\swap.h" />
<ClInclude Include="..\mathlib\vector.h" />

View File

@ -504,6 +504,9 @@
<ClCompile Include="..\squirrel\sqscript.cpp">
<Filter>sdk\squirrel</Filter>
</ClCompile>
<ClCompile Include="..\mathlib\sha1.cpp">
<Filter>sdk\mathlib</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\client\cdll_engine_int.h">
@ -1508,6 +1511,9 @@
<ClInclude Include="..\squirrel\sqscript.h">
<Filter>sdk\squirrel</Filter>
</ClInclude>
<ClInclude Include="..\mathlib\sha1.h">
<Filter>sdk\mathlib</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Image Include="..\shared\resource\lockedserver.png">

View File

@ -190,6 +190,7 @@
<ClInclude Include="..\mathlib\crc32.h" />
<ClInclude Include="..\mathlib\IceKey.H" />
<ClInclude Include="..\mathlib\parallel_for.h" />
<ClInclude Include="..\mathlib\sha1.h" />
<ClInclude Include="..\mathlib\sha256.h" />
<ClInclude Include="..\mathlib\swap.h" />
<ClInclude Include="..\mathlib\vector.h" />
@ -478,6 +479,7 @@
<ClCompile Include="..\mathlib\bits.cpp" />
<ClCompile Include="..\mathlib\crc32.cpp" />
<ClCompile Include="..\mathlib\IceKey.cpp" />
<ClCompile Include="..\mathlib\sha1.cpp" />
<ClCompile Include="..\mathlib\sha256.cpp" />
<ClCompile Include="..\networksystem\pylon.cpp" />
<ClCompile Include="..\networksystem\r5net.cpp" />

View File

@ -1119,6 +1119,9 @@
<ClInclude Include="..\squirrel\sqscript.h">
<Filter>sdk\squirrel</Filter>
</ClInclude>
<ClInclude Include="..\mathlib\sha1.h">
<Filter>sdk\mathlib</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\common\opcodes.cpp">
@ -1373,6 +1376,9 @@
<ClCompile Include="..\squirrel\sqscript.cpp">
<Filter>sdk\squirrel</Filter>
</ClCompile>
<ClCompile Include="..\mathlib\sha1.cpp">
<Filter>sdk\mathlib</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="..\Dedicated.def" />

View File

@ -66,6 +66,7 @@
<ClCompile Include="..\mathlib\bits.cpp" />
<ClCompile Include="..\mathlib\crc32.cpp" />
<ClCompile Include="..\mathlib\IceKey.cpp" />
<ClCompile Include="..\mathlib\sha1.cpp" />
<ClCompile Include="..\mathlib\sha256.cpp" />
<ClCompile Include="..\networksystem\pylon.cpp" />
<ClCompile Include="..\networksystem\r5net.cpp" />
@ -205,6 +206,7 @@
<ClInclude Include="..\mathlib\crc32.h" />
<ClInclude Include="..\mathlib\IceKey.H" />
<ClInclude Include="..\mathlib\parallel_for.h" />
<ClInclude Include="..\mathlib\sha1.h" />
<ClInclude Include="..\mathlib\sha256.h" />
<ClInclude Include="..\mathlib\swap.h" />
<ClInclude Include="..\mathlib\vector.h" />

View File

@ -534,6 +534,9 @@
<ClCompile Include="..\squirrel\sqscript.cpp">
<Filter>sdk\squirrel</Filter>
</ClCompile>
<ClCompile Include="..\mathlib\sha1.cpp">
<Filter>sdk\mathlib</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\client\cdll_engine_int.h">
@ -1571,6 +1574,9 @@
<ClInclude Include="..\squirrel\sqscript.h">
<Filter>sdk\squirrel</Filter>
</ClInclude>
<ClInclude Include="..\mathlib\sha1.h">
<Filter>sdk\mathlib</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Image Include="..\shared\resource\lockedserver.png">