RTech: cleanup ZStd pak decoder and fix bugs

* Pak_InitDecoder() now takes the output buf and mask as parameters.
* Pak_InitDecoder() checks if provided masks are a power of 2 (required).
* Pak_ComputeRingBufferFrame() now uses the bit mask instead of modulo to determine # bytes used.
* Fixed a bug where PakDecoder_t::bufferSizeNeeded could be bigger than the file stream, causing a deadlock.
This commit is contained in:
Kawe Mazidjatari 2024-01-28 01:57:11 +01:00
parent 5ea886ab65
commit 1c2b02b972
5 changed files with 293 additions and 276 deletions

View File

@ -563,7 +563,7 @@ void Pak_Decompress_f(const CCommand& args)
const bool usesCustomCompression = pHeader->flags & PAK_HEADER_FLAGS_ZSTREAM;
PakDecoder_t decoder{};
const uint64_t nDecompSize = Pak_InitDecoder(&decoder, pPakBuf, UINT64_MAX, nFileSize, NULL, sizeof(PakFileHeader_t), usesCustomCompression);
const uint64_t nDecompSize = Pak_InitDecoder(&decoder, pPakBuf, nullptr, UINT64_MAX, UINT64_MAX, nFileSize, NULL, sizeof(PakFileHeader_t), usesCustomCompression);
if (nDecompSize != pHeader->decompressedSize)
{

View File

@ -363,7 +363,7 @@ struct PakDecoder_t
uint64_t outputInvMask;
uint32_t headerOffset;
uint32_t dword44;
uint32_t padding; // unused data, available for other stuff
uint64_t inBufBytePos;
uint64_t outBufBytePos;
@ -377,7 +377,11 @@ struct PakDecoder_t
uint32_t dword6C;
uint64_t qword70;
size_t compressedStreamSize;
union
{
size_t compressedStreamSize;
size_t frameHeaderSize;
};
union
{
@ -386,6 +390,12 @@ struct PakDecoder_t
};
};
struct PakRingBufferFrame_t
{
size_t bufIndex;
size_t frameLen;
};
struct PakFile_t;
class PakLoadedInfo_t

View File

@ -130,16 +130,122 @@ static const unsigned char /*141313180*/ s_defaultDecoderLUT[] =
0x4C, 0x39, 0x56, 0x75, 0x42, 0x52, 0x65, 0x75, 0x70, 0x35, 0x31, 0x77, 0x4C, 0x51, 0x64, 0x61,
};
//-----------------------------------------------------------------------------
// checks if we have enough output buffer room to decode the data stream
//-----------------------------------------------------------------------------
bool Pak_HasEnoughDecodeBufferAvailable(PakDecoder_t* const decoder, const size_t outLen)
{
const uint64_t bytesWritten = (decoder->outBufBytePos & ~decoder->outputInvMask);
return (outLen >= decoder->outputInvMask + (bytesWritten + 1) || outLen >= decoder->decompSize);
}
//-----------------------------------------------------------------------------
// checks if we have enough source data streamed to decode the next block
//-----------------------------------------------------------------------------
bool Pak_HasEnoughStreamedDataForDecode(PakDecoder_t* const decoder, const size_t inLen)
{
// the decoder needs at least this many bytes to decode the current block
// buffer contiguously
return (inLen >= decoder->bufferSizeNeeded);
}
//-----------------------------------------------------------------------------
// gets the frame for the data in the ring buffer, the frame returned is always
// ending to the end of the ring buffer, or the end of the data itself
//-----------------------------------------------------------------------------
PakRingBufferFrame_t Pak_ComputeRingBufferFrame(const uint64_t bufMask, const size_t seekPos, const size_t dataLen)
{
PakRingBufferFrame_t ring;
ring.bufIndex = seekPos & bufMask;
// the total amount of bytes used and available in this frame
const size_t bytesUsed = ring.bufIndex & bufMask;
const size_t totalAvail = bufMask+1 - bytesUsed;
// the last part of the data might be smaller than the remainder of the ring
// buffer; clamp it
ring.frameLen = Min(dataLen - seekPos, totalAvail);
return ring;
}
//-----------------------------------------------------------------------------
// initializes the RTech decoder
//-----------------------------------------------------------------------------
size_t Pak_RStreamDecoderInit(PakDecoder_t* const decoder, const uint8_t* const fileBuffer,
const uint64_t inputMask, const size_t dataSize, const size_t dataOffset, const size_t headerSize)
{
uint64_t frameHeader = *(_QWORD*)((inputMask & (dataOffset + headerSize)) + fileBuffer);
const int decompressedSizeBits = frameHeader & 0x3F;
frameHeader >>= 6;
decoder->decompSize = (1i64 << decompressedSizeBits) | frameHeader & ((1i64 << decompressedSizeBits) - 1);
const uint64_t bytePos = dataOffset + headerSize + 8;
const int64_t currByteLow = *(_QWORD*)((inputMask & bytePos) + fileBuffer) << (64 - ((unsigned __int8)decompressedSizeBits + 6));
decoder->inBufBytePos = bytePos + ((unsigned __int64)(unsigned int)(decompressedSizeBits + 6) >> 3);
const uint32_t bitPosFinal = ((decompressedSizeBits + 6) & 7) + 13;
const uint64_t currByte = (0xFFFFFFFFFFFFFFFFui64 >> ((decompressedSizeBits + 6) & 7)) & ((frameHeader >> decompressedSizeBits) | currByteLow);
const uint32_t currbits = (((_BYTE)currByte - 1) & 0x3F) + 1;
const uint64_t invMaskIn = 0xFFFFFFFFFFFFFFFFui64 >> (64 - (unsigned __int8)currbits);
decoder->inputInvMask = invMaskIn;
const uint64_t invMaskOut = 0xFFFFFFFFFFFFFFFFui64 >> (64 - ((((currByte >> 6) - 1) & 0x3F) + 1));
decoder->outputInvMask = invMaskOut;
const uint64_t finalByteFull = (currByte >> 13) | (*(_QWORD*)((inputMask & decoder->inBufBytePos) + fileBuffer) << (64 - (unsigned __int8)bitPosFinal));
const uint32_t finalBitOffset = bitPosFinal & 7;
decoder->inBufBytePos += bitPosFinal >> 3;
const uint64_t finalByte = (0xFFFFFFFFFFFFFFFFui64 >> finalBitOffset) & finalByteFull;
if (decoder->inputInvMask == 0xFFFFFFFFFFFFFFFFui64)
{
decoder->headerOffset = 0;
decoder->bufferSizeNeeded = dataSize;
}
else
{
const uint64_t finalPos = inputMask & decoder->inBufBytePos;
decoder->headerOffset = (currbits >> 3) + 1;
decoder->inBufBytePos += (currbits >> 3) + 1;
decoder->bufferSizeNeeded = *(_QWORD*)(finalPos + fileBuffer) & ((1i64 << (8 * ((unsigned __int8)(currbits >> 3) + 1))) - 1);;
}
decoder->bufferSizeNeeded += dataOffset;
decoder->currentByte = finalByte;
decoder->currentBit = finalBitOffset;
decoder->qword70 = decoder->inputInvMask + dataOffset - 6;
decoder->dword6C = 0;
decoder->compressedStreamSize = decoder->bufferSizeNeeded;
decoder->decompressedStreamSize = decoder->decompSize;
if ((((unsigned __int8)(currByte >> 6) - 1) & 0x3F) != -1i64 && decoder->decompSize - 1 > decoder->outputInvMask)
{
const uint64_t streamCompressedSize = decoder->bufferSizeNeeded - decoder->headerOffset;
decoder->compressedStreamSize = streamCompressedSize;
decoder->decompressedStreamSize = decoder->outputInvMask + 1;
}
return decoder->decompSize;
}
//-----------------------------------------------------------------------------
// decodes the RTech data stream up to available buffer or data
//-----------------------------------------------------------------------------
bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const size_t outLen)
{
bool result; // al
uint64_t m_decompBytePosition; // r15
uint8_t* m_outputBuf; // r11
uint32_t m_currentBit; // ebp
uint64_t m_currentByte; // rsi
uint64_t m_fileBytePosition; // rdi
uint64_t outBufBytePos; // r15
uint8_t* outputBuf; // r11
uint32_t currentBit; // ebp
uint64_t currentByte; // rsi
uint64_t inBufBytePos; // rdi
size_t qword70; // r12
const uint8_t* m_inputBuf; // r13
const uint8_t* inputBuf; // r13
uint32_t dword6C; // ecx
uint64_t v13; // rsi
unsigned __int64 i; // rax
@ -152,15 +258,15 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si
int v21; // ecx
unsigned __int64 v22; // r11
int v23; // edx
uint64_t m_outputMask; // rax
uint64_t outputMask; // rax
int v25; // r8d
unsigned int v26; // r13d
uint64_t v27; // r10
uint8_t* v28; // rax
uint8_t* v29; // r10
size_t m_decompSize; // r9
uint64_t m_inputInvMask; // r10
uint64_t m_headerOffset; // r8
size_t decompSize; // r9
uint64_t inputInvMask; // r10
uint64_t headerOffset; // r8
uint64_t v33; // rax
uint64_t v34; // rax
uint64_t v35; // rax
@ -202,49 +308,49 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si
uint32_t v71; // [rsp+60h] [rbp+8h]
const uint8_t* v74; // [rsp+78h] [rbp+20h]
m_decompBytePosition = decoder->outBufBytePos;
outBufBytePos = decoder->outBufBytePos;
m_outputBuf = decoder->outputBuf;
m_currentBit = decoder->currentBit;
m_currentByte = decoder->currentByte;
m_fileBytePosition = decoder->inBufBytePos;
outputBuf = decoder->outputBuf;
currentBit = decoder->currentBit;
currentByte = decoder->currentByte;
inBufBytePos = decoder->inBufBytePos;
qword70 = decoder->qword70;
m_inputBuf = decoder->inputBuf;
inputBuf = decoder->inputBuf;
if (decoder->compressedStreamSize < qword70)
qword70 = decoder->compressedStreamSize;
dword6C = decoder->dword6C;
v74 = m_inputBuf;
v70 = m_outputBuf;
v74 = inputBuf;
v70 = outputBuf;
v71 = dword6C;
if (!m_currentBit)
if (!currentBit)
goto LABEL_11;
v13 = (*(_QWORD*)&m_inputBuf[m_fileBytePosition & decoder->inputMask] << (64 - (unsigned __int8)m_currentBit)) | m_currentByte;
for (i = m_currentBit; ; i = m_currentBit)
v13 = (*(_QWORD*)&inputBuf[inBufBytePos & decoder->inputMask] << (64 - (unsigned __int8)currentBit)) | currentByte;
for (i = currentBit; ; i = currentBit)
{
m_currentBit &= 7u;
m_fileBytePosition += i >> 3;
currentBit &= 7u;
inBufBytePos += i >> 3;
dword6C = v71;
m_currentByte = (0xFFFFFFFFFFFFFFFFui64 >> m_currentBit) & v13;
currentByte = (0xFFFFFFFFFFFFFFFFui64 >> currentBit) & v13;
LABEL_11:
v15 = (unsigned __int64)dword6C << 8;
v16 = dword6C;
v17 = s_defaultDecoderLUT[(unsigned __int8)m_currentByte + 512 + v15];
v18 = (unsigned __int8)m_currentByte + v15;
m_currentBit += v17;
v19 = m_currentByte >> v17;
v17 = s_defaultDecoderLUT[(unsigned __int8)currentByte + 512 + v15];
v18 = (unsigned __int8)currentByte + v15;
currentBit += v17;
v19 = currentByte >> v17;
v20 = (unsigned int)(char)s_defaultDecoderLUT[v18];
if ((s_defaultDecoderLUT[v18] & 0x80u) != 0)
{
v56 = -(int)v20;
v57 = &m_inputBuf[m_fileBytePosition & decoder->inputMask];
v57 = &inputBuf[inBufBytePos & decoder->inputMask];
v71 = 1;
v58 = &m_outputBuf[m_decompBytePosition & decoder->outputMask];
v58 = &outputBuf[outBufBytePos & decoder->outputMask];
if (v56 == s_defaultDecoderLUT[v16 + 1248])
{
if ((~m_fileBytePosition & decoder->inputInvMask) < 0xF || (decoder->outputInvMask & ~m_decompBytePosition) < 0xF || decoder->decompSize - m_decompBytePosition < 0x10)
if ((~inBufBytePos & decoder->inputInvMask) < 0xF || (decoder->outputInvMask & ~outBufBytePos) < 0xF || decoder->decompSize - outBufBytePos < 0x10)
v56 = 1;
v59 = v19;
v60 = v19 >> 3;
@ -259,11 +365,11 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si
{
v62 = v60 >> 4;
v65 = v60 & 0xF;
m_currentBit += 4;
currentBit += 4;
v63 = *(_DWORD*)&s_defaultDecoderLUT[4 * v65 + 1152];
v64 = s_defaultDecoderLUT[v65 + 1216];
}
m_currentBit += v64 + 3;
currentBit += v64 + 3;
v19 = v62 >> v64;
v66 = v63 + (v62 & ((1 << v64) - 1)) + v56;
for (j = v66 >> 3; j; --j)
@ -287,15 +393,15 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si
}
if ((v66 & 1) != 0)
*v58 = *v57;
m_fileBytePosition += v66;
m_decompBytePosition += v66;
inBufBytePos += v66;
outBufBytePos += v66;
}
else
{
*(_QWORD*)v58 = *(_QWORD*)v57;
*((_QWORD*)v58 + 1) = *((_QWORD*)v57 + 1);
m_fileBytePosition += v56;
m_decompBytePosition += v56;
inBufBytePos += v56;
outBufBytePos += v56;
}
}
else
@ -304,13 +410,13 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si
v71 = 0;
v22 = ((unsigned __int64)(unsigned int)v19 >> (((unsigned int)(v21 - 31) >> 3) & 6)) & 0x3F;
v23 = 1 << (v21 + ((v19 >> 4) & ((24 * (((unsigned int)(v21 - 31) >> 3) & 2)) >> 4)));
m_currentBit += (((unsigned int)(v21 - 31) >> 3) & 6) + s_defaultDecoderLUT[v22 + 1088] + v21 + ((v19 >> 4) & ((24 * (((unsigned int)(v21 - 31) >> 3) & 2)) >> 4));
m_outputMask = decoder->outputMask;
currentBit += (((unsigned int)(v21 - 31) >> 3) & 6) + s_defaultDecoderLUT[v22 + 1088] + v21 + ((v19 >> 4) & ((24 * (((unsigned int)(v21 - 31) >> 3) & 2)) >> 4));
outputMask = decoder->outputMask;
v25 = 16 * (v23 + ((v23 - 1) & (v19 >> ((((unsigned int)(v21 - 31) >> 3) & 6) + s_defaultDecoderLUT[v22 + 1088]))));
v19 >>= (((unsigned int)(v21 - 31) >> 3) & 6) + s_defaultDecoderLUT[v22 + 1088] + v21 + ((v19 >> 4) & ((24 * (((unsigned int)(v21 - 31) >> 3) & 2)) >> 4));
v26 = v25 + s_defaultDecoderLUT[v22 + 1024] - 16;
v27 = m_outputMask & (m_decompBytePosition - v26);
v28 = &v70[m_decompBytePosition & m_outputMask];
v27 = outputMask & (outBufBytePos - v26);
v28 = &v70[outBufBytePos & outputMask];
v29 = &v70[v27];
if ((_DWORD)v20 == 17)
{
@ -325,26 +431,26 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si
}
else
{
m_currentBit += 4;
currentBit += 4;
v46 = v41 & 0xF;
v43 = v41 >> 4;
v44 = *(_DWORD*)&s_defaultDecoderLUT[4 * v46 + 1152];
v45 = s_defaultDecoderLUT[v46 + 1216];
if (v74 && m_currentBit + v45 >= 61)
if (v74 && currentBit + v45 >= 61)
{
v47 = m_fileBytePosition++ & decoder->inputMask;
v43 |= (unsigned __int64)v74[v47] << (61 - (unsigned __int8)m_currentBit);
m_currentBit -= 8;
v47 = inBufBytePos++ & decoder->inputMask;
v43 |= (unsigned __int64)v74[v47] << (61 - (unsigned __int8)currentBit);
currentBit -= 8;
}
}
m_currentBit += v45 + 3;
currentBit += v45 + 3;
v19 = v43 >> v45;
v48 = ((unsigned int)v43 & ((1 << v45) - 1)) + v44 + 17;
m_decompBytePosition += v48;
outBufBytePos += v48;
if (v26 < 8)
{
v50 = v48 - 13;
m_decompBytePosition -= 13i64;
outBufBytePos -= 13i64;
if (v26 == 1)
{
v51 = *v29;
@ -378,59 +484,59 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si
}
else
{
m_decompBytePosition += v20;
outBufBytePos += v20;
*(_QWORD*)v28 = *(_QWORD*)v29;
*((_QWORD*)v28 + 1) = *((_QWORD*)v29 + 1);
}
m_inputBuf = v74;
inputBuf = v74;
}
if (m_fileBytePosition >= qword70)
if (inBufBytePos >= qword70)
break;
LABEL_29:
m_outputBuf = v70;
v13 = (*(_QWORD*)&m_inputBuf[m_fileBytePosition & decoder->inputMask] << (64 - (unsigned __int8)m_currentBit)) | v19;
outputBuf = v70;
v13 = (*(_QWORD*)&inputBuf[inBufBytePos & decoder->inputMask] << (64 - (unsigned __int8)currentBit)) | v19;
}
if (m_decompBytePosition != decoder->decompressedStreamSize)
if (outBufBytePos != decoder->decompressedStreamSize)
goto LABEL_25;
m_decompSize = decoder->decompSize;
if (m_decompBytePosition == m_decompSize)
decompSize = decoder->decompSize;
if (outBufBytePos == decompSize)
{
result = true;
goto LABEL_69;
}
m_inputInvMask = decoder->inputInvMask;
m_headerOffset = decoder->headerOffset;
v33 = m_inputInvMask & -(__int64)m_fileBytePosition;
inputInvMask = decoder->inputInvMask;
headerOffset = decoder->headerOffset;
v33 = inputInvMask & -(__int64)inBufBytePos;
v19 >>= 1;
++m_currentBit;
if (m_headerOffset > v33)
++currentBit;
if (headerOffset > v33)
{
m_fileBytePosition += v33;
inBufBytePos += v33;
v34 = decoder->qword70;
if (m_fileBytePosition > v34)
decoder->qword70 = m_inputInvMask + v34 + 1;
if (inBufBytePos > v34)
decoder->qword70 = inputInvMask + v34 + 1;
}
v35 = m_fileBytePosition & decoder->inputMask;
m_fileBytePosition += m_headerOffset;
v36 = m_decompBytePosition + decoder->outputInvMask + 1;
v37 = *(_QWORD*)&m_inputBuf[v35] & ((1i64 << (8 * (unsigned __int8)m_headerOffset)) - 1);
v35 = inBufBytePos & decoder->inputMask;
inBufBytePos += headerOffset;
v36 = outBufBytePos + decoder->outputInvMask + 1;
v37 = *(_QWORD*)&inputBuf[v35] & ((1i64 << (8 * (unsigned __int8)headerOffset)) - 1);
v38 = v37 + decoder->bufferSizeNeeded;
v39 = v37 + decoder->compressedStreamSize;
decoder->bufferSizeNeeded = v38;
decoder->compressedStreamSize = v39;
if (v36 >= m_decompSize)
if (v36 >= decompSize)
{
v36 = m_decompSize;
decoder->compressedStreamSize = m_headerOffset + v39;
v36 = decompSize;
decoder->compressedStreamSize = headerOffset + v39;
}
decoder->decompressedStreamSize = v36;
if (inLen >= v38 && outLen >= v36)
{
LABEL_25:
qword70 = decoder->qword70;
if (m_fileBytePosition >= qword70)
if (inBufBytePos >= qword70)
{
m_fileBytePosition = ~decoder->inputInvMask & (m_fileBytePosition + 7);
inBufBytePos = ~decoder->inputInvMask & (inBufBytePos + 7);
qword70 += decoder->inputInvMask + 1;
decoder->qword70 = qword70;
}
@ -439,67 +545,77 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si
goto LABEL_29;
}
v69 = decoder->qword70;
if (m_fileBytePosition >= v69)
if (inBufBytePos >= v69)
{
m_fileBytePosition = ~m_inputInvMask & (m_fileBytePosition + 7);
decoder->qword70 = v69 + m_inputInvMask + 1;
inBufBytePos = ~inputInvMask & (inBufBytePos + 7);
decoder->qword70 = v69 + inputInvMask + 1;
}
decoder->dword6C = v71;
result = false;
decoder->currentByte = v19;
decoder->currentBit = m_currentBit;
decoder->currentBit = currentBit;
LABEL_69:
decoder->outBufBytePos = m_decompBytePosition;
decoder->inBufBytePos = m_fileBytePosition;
decoder->outBufBytePos = outBufBytePos;
decoder->inBufBytePos = inBufBytePos;
return result;
}
//-----------------------------------------------------------------------------
// checks if we have enough output buffer room to decode the data stream
// initializes the ZStd decoder
//-----------------------------------------------------------------------------
bool Pak_HasEnoughDecodeBufferLeft(PakDecoder_t* const decoder, const size_t outLen)
size_t Pak_ZStreamDecoderInit(PakDecoder_t* const decoder, const uint8_t* const fileBuffer,
const uint64_t inputMask, const size_t dataSize, const size_t dataOffset, const size_t headerSize)
{
const uint64_t bytesWritten = (decoder->outBufBytePos & ~decoder->outputInvMask);
return (outLen >= decoder->outputInvMask + (bytesWritten + 1) || outLen >= decoder->decompSize);
// NOTE: on original paks, this data is passed out of the frame header,
// but for ZStd encoded paks we are always limiting this to the ring
// buffer size
decoder->inputInvMask = PAK_DECODE_OUT_RING_BUFFER_MASK;
decoder->outputInvMask = PAK_DECODE_OUT_RING_BUFFER_MASK;
ZSTD_DStream* const dctx = ZSTD_createDStream();
assert(dctx);
// failure
if (!dctx)
return NULL;
decoder->zstreamContext = dctx;
// this points to the first byte of the frame header, takes dataOffset
// into account which is the offset in the ring buffer to the patched
// data as we parse it contiguously after the base pak data, which
// might have ended somewhere in the middle of the ring buffer
const uint8_t* const frameHeaderData = (inputMask & (dataOffset + headerSize)) + fileBuffer;
// this is the offset to the ZStd header in the input buffer
decoder->headerOffset = static_cast<uint32_t>(frameHeaderData - fileBuffer);
if (ZSTD_getFrameHeader(&dctx->fParams, frameHeaderData, dataSize) != 0)
{
ZSTD_freeDStream(decoder->zstreamContext);
decoder->zstreamContext = nullptr;
return NULL; // content size error
}
// ideally the frame header of the block gets parsed first, the length
// thereof is returned by initDStream and thus being processed first
// before moving on to actual data
decoder->frameHeaderSize = ZSTD_initDStream(dctx);
// we need at least this many bytes of streamed data to process the frame
// header of the compressed block
decoder->bufferSizeNeeded = decoder->headerOffset + decoder->frameHeaderSize;
// must include header size
decoder->decompSize = dctx->fParams.frameContentSize + headerSize;
return decoder->decompSize;
}
//-----------------------------------------------------------------------------
// checks if we have enough source data streamed to decode the next block
// decodes the ZStd data stream up to available buffer or data, whichever ends
// first (determined by Pak_ComputeRingBufferFrame())
//-----------------------------------------------------------------------------
bool Pak_HasEnoughStreamedDataForDecode(PakDecoder_t* const decoder, const size_t inLen, const bool useZStream)
{
// the decoder needs at least this many bytes to decode the current block
// buffer contiguously
return (inLen >= decoder->bufferSizeNeeded);
}
struct PakRingBufferFrame_t
{
uint64_t bufIndex;
uint64_t frameLen;
};
//-----------------------------------------------------------------------------
// gets the frame for the data in the ring buffer, the frame returned is always
// either the end of the ring buffer, or the end of the data itself
//-----------------------------------------------------------------------------
PakRingBufferFrame_t Pak_ComputeRingBufferFrame(const uint64_t bufMask, const uint64_t seekPos, const uint64_t dataLen)
{
PakRingBufferFrame_t ring;
ring.bufIndex = seekPos & bufMask;
const size_t bufSize = bufMask + 1;
const size_t bytesUsed = ring.bufIndex % bufSize;
const size_t totalAvail = bufSize - bytesUsed;
// the last part of the data might be smaller than the remainder of the ring
// buffer; clamp it
ring.frameLen = Min(dataLen - seekPos, totalAvail);
return ring;
}
bool Pak_ZStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const size_t outLen)
{
// must have a zstream decoder at this point, and input seek pos may not
@ -533,9 +649,21 @@ bool Pak_ZStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si
return false;
}
// the first call to this function expects a buffer with at least the size
// of the pak file header + the ZStd frame header, after this call we
// should subtract it from the needed buffer as
if (decoder->frameHeaderSize)
{
decoder->bufferSizeNeeded -= decoder->frameHeaderSize;
decoder->frameHeaderSize = 0;
}
// on the next call, we need at least this amount of data streamed in order
// to decode the rest of the pak file, as this is where reading has stopped
decoder->bufferSizeNeeded = inBuffer.pos;
// this value may equal the currently streamed input size, as its possible
// this function is getting called to flush the remainder decoded data into
// the out buffer which got truncated off on the call prior due to wrapping
decoder->bufferSizeNeeded += inBuffer.pos;
// advance buffer io positions, required so the main parser could already
// start parsing the headers while the rest is getting decoded still
@ -555,170 +683,52 @@ bool Pak_ZStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si
}
//-----------------------------------------------------------------------------
// initialize the pak decoder context
// initializes the decoder
//-----------------------------------------------------------------------------
size_t Pak_InitDefaultDecoder(PakDecoder_t* const decoder, const uint8_t* const fileBuffer,
const uint64_t inputMask, const size_t dataSize, const size_t dataOffset, const size_t headerSize)
size_t Pak_InitDecoder(PakDecoder_t* const decoder, const uint8_t* const inputBuf, uint8_t* const outputBuf,
const uint64_t inputMask, const uint64_t outputMask, const size_t dataSize, const size_t dataOffset,
const size_t headerSize, const bool useZStream)
{
uint64_t v8; // r9
unsigned __int64 v9; // r11
unsigned __int64 v10; // r8
int v11; // er8
__int64 v12; // rbx
unsigned int v13; // ebp
unsigned __int64 v14; // rbx
uint64_t v15; // rax
unsigned int v16; // er9
unsigned __int64 v17; // r12
unsigned __int64 v18; // r11
unsigned __int64 v19; // r10
unsigned __int64 v20; // rax
int v21; // ebp
unsigned __int64 v22; // r10
unsigned int v23; // er9
uint64_t v24; // rax
__int64 v25; // rsi
uint64_t v26; // rdx
size_t result; // rax
uint64_t v28; // rdx
// buffer size must be power of two as we index into buffers using a bit
// mask rather than modulo, the mask provided must be bufferSize-1
assert(IsPowerOfTwo(inputMask + 1));
assert(IsPowerOfTwo(outputMask + 1));
v8 = dataOffset + headerSize + 8;
v9 = *(_QWORD*)((inputMask & (dataOffset + headerSize)) + fileBuffer);
v10 = v9;
v9 >>= 6;
v11 = v10 & 0x3F;
decoder->decompSize = (1i64 << v11) | v9 & ((1i64 << v11) - 1);
v12 = *(_QWORD*)((inputMask & v8) + fileBuffer) << (64 - ((unsigned __int8)v11 + 6));
decoder->inBufBytePos = v8 + ((unsigned __int64)(unsigned int)(v11 + 6) >> 3);
v13 = ((v11 + 6) & 7) + 13;
v14 = (0xFFFFFFFFFFFFFFFFui64 >> ((v11 + 6) & 7)) & ((v9 >> v11) | v12);
v15 = inputMask & decoder->inBufBytePos;
v16 = (((_BYTE)v14 - 1) & 0x3F) + 1;
v17 = 0xFFFFFFFFFFFFFFFFui64 >> (64 - (unsigned __int8)v16);
decoder->inputInvMask = v17;
v18 = 0xFFFFFFFFFFFFFFFFui64 >> (64 - ((((v14 >> 6) - 1) & 0x3F) + 1));
decoder->outputInvMask = v18;
v19 = (v14 >> 13) | (*(_QWORD*)(v15 + fileBuffer) << (64 - (unsigned __int8)v13));
v20 = v13;
v21 = v13 & 7;
decoder->inBufBytePos += v20 >> 3;
v22 = (0xFFFFFFFFFFFFFFFFui64 >> v21) & v19;
if (v17 == -1i64)
{
decoder->headerOffset = 0;
decoder->bufferSizeNeeded = dataSize;
}
else
{
v23 = v16 >> 3;
v24 = inputMask & decoder->inBufBytePos;
decoder->headerOffset = v23 + 1;
v25 = *(_QWORD*)(v24 + fileBuffer) & ((1i64 << (8 * ((unsigned __int8)v23 + 1))) - 1);
decoder->inBufBytePos += v23 + 1;
decoder->bufferSizeNeeded = v25;
}
decoder->bufferSizeNeeded += dataOffset;
v26 = decoder->bufferSizeNeeded;
decoder->currentByte = v22;
decoder->currentBit = v21;
decoder->qword70 = v17 + dataOffset - 6;
result = decoder->decompSize;
decoder->dword6C = 0;
decoder->compressedStreamSize = v26;
decoder->decompressedStreamSize = result;
if ((((unsigned __int8)(v14 >> 6) - 1) & 0x3F) != -1i64 && result - 1 > v18)
{
v28 = v26 - decoder->headerOffset;
decoder->decompressedStreamSize = v18 + 1;
decoder->compressedStreamSize = v28;
}
return result;
}
size_t Pak_InitDecoder(PakDecoder_t* const decoder, const uint8_t* const fileBuffer,
const uint64_t inputMask, const size_t dataSize, const size_t dataOffset, const size_t headerSize, const bool useZStream)
{
decoder->inputBuf = fileBuffer;
decoder->outputBuf = nullptr;
// the absolute start address of the input and output buffers
decoder->inputBuf = inputBuf;
decoder->outputBuf = outputBuf;
// the actual file size, which consists of dataOffset (anything up to the
// frame header, like the file header) and the actual encoded data itself
decoder->fileSize = dataOffset + dataSize;
decoder->dword44 = NULL;
decoder->padding = NULL;
// buffer masks, which essentially gets used to index into the input and
// output buffers, similar to 'idx % bufSize', where bufSize = bufMask+1
decoder->inputMask = inputMask;
decoder->outputMask = NULL;
decoder->outputMask = outputMask;
// the current positions in the input and output buffers; if we deal with
// paks that are patched, the buffer positions during the init and decode
// call on subsequent patches may not be at the start of the buffers
decoder->inBufBytePos = dataOffset + headerSize;
decoder->outBufBytePos = headerSize;
if (useZStream)
{
// NOTE: on original paks, this data is passed out of the frame header,
// but for ZStd encoded paks we are always limiting this to the ring
// buffer size
decoder->inputInvMask = PAK_DECODE_OUT_RING_BUFFER_MASK;
decoder->outputInvMask = PAK_DECODE_OUT_RING_BUFFER_MASK;
return Pak_ZStreamDecoderInit(decoder, inputBuf, inputMask, dataSize, dataOffset, headerSize);
ZSTD_DStream* const dctx = ZSTD_createDStream();
assert(dctx);
if (!dctx)
return NULL;
decoder->zstreamContext = dctx;
// this is the offset to the ZSTD header in the input buffer
decoder->headerOffset = static_cast<uint32_t>(decoder->inBufBytePos);
// this points to the first byte of the frame header, takes dataOffset
// into account which is the offset in the ring buffer to the patched
// data as we parse it contiguously after the base pak data, which
// might have ended somewhere in the middle of the ring buffer
const uint8_t* const frameHeaderData = (inputMask & (dataOffset + headerSize)) + fileBuffer;
if (ZSTD_getFrameHeader(&dctx->fParams, frameHeaderData, dataSize) != 0)
{
if (decoder->zstreamContext)
{
ZSTD_freeDStream(decoder->zstreamContext);
decoder->zstreamContext = nullptr;
}
return NULL; // content size error
}
// ideally the frame header of the block gets parsed first, the length
// thereof is returned by initDStream and thus being processed first
// before moving on to actual data
const size_t frameMetaDataSize = ZSTD_initDStream(dctx);
// we need at least this many bytes of streamed data to process the frame
// header of the compressed block
decoder->bufferSizeNeeded = frameMetaDataSize + decoder->headerOffset;
// must include header size
const uint64_t decompSize = dctx->fParams.frameContentSize + headerSize;
decoder->decompSize = decompSize;
return decompSize;
}
return Pak_InitDefaultDecoder(decoder, fileBuffer, inputMask, dataSize, dataOffset, headerSize);
return Pak_RStreamDecoderInit(decoder, inputBuf, inputMask, dataSize, dataOffset, headerSize);
}
//-----------------------------------------------------------------------------
// decode input pak data
// decodes streamed input pak data
//-----------------------------------------------------------------------------
bool Pak_StreamToBufferDecode(PakDecoder_t* const decoder, const size_t inLen, const size_t outLen, const bool useZStream)
{
if (!Pak_HasEnoughStreamedDataForDecode(decoder, inLen, useZStream))
if (!Pak_HasEnoughStreamedDataForDecode(decoder, inLen))
return false;
if (!Pak_HasEnoughDecodeBufferLeft(decoder, outLen))
if (!Pak_HasEnoughDecodeBufferAvailable(decoder, outLen))
return false;
if (useZStream)

View File

@ -2,11 +2,9 @@
#define RTECH_PAKDECODE_H
#include "rtech/ipakfile.h"
extern size_t Pak_InitDefaultDecoder(PakDecoder_t* const decoder, const uint8_t* const fileBuffer,
const uint64_t inputMask, const size_t dataSize, const size_t dataOffset, const size_t headerSize);
extern size_t Pak_InitDecoder(PakDecoder_t* const decoder, const uint8_t* const fileBuffer,
const uint64_t inputMask, const size_t dataSize, const size_t dataOffset, const size_t headerSize, const bool useCustom);
extern size_t Pak_InitDecoder(PakDecoder_t* const decoder, const uint8_t* const inputBuf, uint8_t* const outputBuf,
const uint64_t inputMask, const uint64_t outputMask, const size_t dataSize, const size_t dataOffset,
const size_t headerSize, const bool useZStream);
extern bool Pak_StreamToBufferDecode(PakDecoder_t* const decoder, const size_t inLen, const size_t outLen, const bool useCustom);

View File

@ -382,8 +382,10 @@ LABEL_18:
decodeContext = &pak->pakDecoder;
decompressedSize = Pak_InitDecoder(&pak->pakDecoder, fileStream->buffer,
PAK_DECODE_IN_RING_BUFFER_MASK, v22->compressedSize - (v22->dataOffset - sizeof(PakFileHeader_t)),
decompressedSize = Pak_InitDecoder(&pak->pakDecoder,
fileStream->buffer, pak->decompBuffer,
PAK_DECODE_IN_RING_BUFFER_MASK, PAK_DECODE_OUT_RING_BUFFER_MASK,
v22->compressedSize - (v22->dataOffset - sizeof(PakFileHeader_t)),
v22->dataOffset - sizeof(PakFileHeader_t), sizeof(PakFileHeader_t), useZStream);
if (decompressedSize != v22->decompressedSize)
@ -392,9 +394,6 @@ LABEL_18:
pak->memoryData.fileName,
decompressedSize,
pak->memoryData.pakHeader.decompressedSize);
pak->pakDecoder.outputBuf = pak->decompBuffer;
pak->pakDecoder.outputMask = PAK_DECODE_OUT_RING_BUFFER_MASK;
}
else
{