From 26ce3c50c23328d5e62dd7f273bb6c880f9b30e9 Mon Sep 17 00:00:00 2001 From: Kawe Mazidjatari <48657826+Mauler125@users.noreply.github.com> Date: Sun, 28 Jan 2024 01:57:11 +0100 Subject: [PATCH] RTech: cleanup ZStd pak decoder and fix bugs * Pak_InitDecoder() now takes the output buf and mask as parameters. * Pak_InitDecoder() checks if provided masks are a power of 2 (required). * Pak_ComputeRingBufferFrame() now uses the bit mask instead of modulo to determine # bytes used. * Fixed a bug where PakDecoder_t::bufferSizeNeeded could be bigger than the file stream, causing a deadlock. --- src/common/callback.cpp | 2 +- src/public/rtech/ipakfile.h | 14 +- src/rtech/pak/pakdecode.cpp | 536 ++++++++++++++++++------------------ src/rtech/pak/pakdecode.h | 8 +- src/rtech/pak/pakparse.cpp | 9 +- 5 files changed, 293 insertions(+), 276 deletions(-) diff --git a/src/common/callback.cpp b/src/common/callback.cpp index b8d1f335..ab749d37 100644 --- a/src/common/callback.cpp +++ b/src/common/callback.cpp @@ -563,7 +563,7 @@ void Pak_Decompress_f(const CCommand& args) const bool usesCustomCompression = pHeader->flags & PAK_HEADER_FLAGS_ZSTREAM; PakDecoder_t decoder{}; - const uint64_t nDecompSize = Pak_InitDecoder(&decoder, pPakBuf, UINT64_MAX, nFileSize, NULL, sizeof(PakFileHeader_t), usesCustomCompression); + const uint64_t nDecompSize = Pak_InitDecoder(&decoder, pPakBuf, nullptr, UINT64_MAX, UINT64_MAX, nFileSize, NULL, sizeof(PakFileHeader_t), usesCustomCompression); if (nDecompSize != pHeader->decompressedSize) { diff --git a/src/public/rtech/ipakfile.h b/src/public/rtech/ipakfile.h index 45080d23..d9b56eab 100644 --- a/src/public/rtech/ipakfile.h +++ b/src/public/rtech/ipakfile.h @@ -363,7 +363,7 @@ struct PakDecoder_t uint64_t outputInvMask; uint32_t headerOffset; - uint32_t dword44; + uint32_t padding; // unused data, available for other stuff uint64_t inBufBytePos; uint64_t outBufBytePos; @@ -377,7 +377,11 @@ struct PakDecoder_t uint32_t dword6C; uint64_t qword70; - size_t compressedStreamSize; + union + { + size_t compressedStreamSize; + size_t frameHeaderSize; + }; union { @@ -386,6 +390,12 @@ struct PakDecoder_t }; }; +struct PakRingBufferFrame_t +{ + size_t bufIndex; + size_t frameLen; +}; + struct PakFile_t; class PakLoadedInfo_t diff --git a/src/rtech/pak/pakdecode.cpp b/src/rtech/pak/pakdecode.cpp index 7d9609ef..2a2b84fc 100644 --- a/src/rtech/pak/pakdecode.cpp +++ b/src/rtech/pak/pakdecode.cpp @@ -130,16 +130,122 @@ static const unsigned char /*141313180*/ s_defaultDecoderLUT[] = 0x4C, 0x39, 0x56, 0x75, 0x42, 0x52, 0x65, 0x75, 0x70, 0x35, 0x31, 0x77, 0x4C, 0x51, 0x64, 0x61, }; +//----------------------------------------------------------------------------- +// checks if we have enough output buffer room to decode the data stream +//----------------------------------------------------------------------------- +bool Pak_HasEnoughDecodeBufferAvailable(PakDecoder_t* const decoder, const size_t outLen) +{ + const uint64_t bytesWritten = (decoder->outBufBytePos & ~decoder->outputInvMask); + return (outLen >= decoder->outputInvMask + (bytesWritten + 1) || outLen >= decoder->decompSize); +} + +//----------------------------------------------------------------------------- +// checks if we have enough source data streamed to decode the next block +//----------------------------------------------------------------------------- +bool Pak_HasEnoughStreamedDataForDecode(PakDecoder_t* const decoder, const size_t inLen) +{ + // the decoder needs at least this many bytes to decode the current block + // buffer contiguously + return (inLen >= decoder->bufferSizeNeeded); +} + +//----------------------------------------------------------------------------- +// gets the frame for the data in the ring buffer, the frame returned is always +// ending to the end of the ring buffer, or the end of the data itself +//----------------------------------------------------------------------------- +PakRingBufferFrame_t Pak_ComputeRingBufferFrame(const uint64_t bufMask, const size_t seekPos, const size_t dataLen) +{ + PakRingBufferFrame_t ring; + ring.bufIndex = seekPos & bufMask; + + // the total amount of bytes used and available in this frame + const size_t bytesUsed = ring.bufIndex & bufMask; + const size_t totalAvail = bufMask+1 - bytesUsed; + + // the last part of the data might be smaller than the remainder of the ring + // buffer; clamp it + ring.frameLen = Min(dataLen - seekPos, totalAvail); + return ring; +} + +//----------------------------------------------------------------------------- +// initializes the RTech decoder +//----------------------------------------------------------------------------- +size_t Pak_RStreamDecoderInit(PakDecoder_t* const decoder, const uint8_t* const fileBuffer, + const uint64_t inputMask, const size_t dataSize, const size_t dataOffset, const size_t headerSize) +{ + uint64_t frameHeader = *(_QWORD*)((inputMask & (dataOffset + headerSize)) + fileBuffer); + const int decompressedSizeBits = frameHeader & 0x3F; + + frameHeader >>= 6; + decoder->decompSize = (1i64 << decompressedSizeBits) | frameHeader & ((1i64 << decompressedSizeBits) - 1); + + const uint64_t bytePos = dataOffset + headerSize + 8; + const int64_t currByteLow = *(_QWORD*)((inputMask & bytePos) + fileBuffer) << (64 - ((unsigned __int8)decompressedSizeBits + 6)); + + decoder->inBufBytePos = bytePos + ((unsigned __int64)(unsigned int)(decompressedSizeBits + 6) >> 3); + const uint32_t bitPosFinal = ((decompressedSizeBits + 6) & 7) + 13; + + const uint64_t currByte = (0xFFFFFFFFFFFFFFFFui64 >> ((decompressedSizeBits + 6) & 7)) & ((frameHeader >> decompressedSizeBits) | currByteLow); + const uint32_t currbits = (((_BYTE)currByte - 1) & 0x3F) + 1; + + const uint64_t invMaskIn = 0xFFFFFFFFFFFFFFFFui64 >> (64 - (unsigned __int8)currbits); + decoder->inputInvMask = invMaskIn; + + const uint64_t invMaskOut = 0xFFFFFFFFFFFFFFFFui64 >> (64 - ((((currByte >> 6) - 1) & 0x3F) + 1)); + decoder->outputInvMask = invMaskOut; + + const uint64_t finalByteFull = (currByte >> 13) | (*(_QWORD*)((inputMask & decoder->inBufBytePos) + fileBuffer) << (64 - (unsigned __int8)bitPosFinal)); + const uint32_t finalBitOffset = bitPosFinal & 7; + + decoder->inBufBytePos += bitPosFinal >> 3; + const uint64_t finalByte = (0xFFFFFFFFFFFFFFFFui64 >> finalBitOffset) & finalByteFull; + + if (decoder->inputInvMask == 0xFFFFFFFFFFFFFFFFui64) + { + decoder->headerOffset = 0; + decoder->bufferSizeNeeded = dataSize; + } + else + { + const uint64_t finalPos = inputMask & decoder->inBufBytePos; + decoder->headerOffset = (currbits >> 3) + 1; + decoder->inBufBytePos += (currbits >> 3) + 1; + decoder->bufferSizeNeeded = *(_QWORD*)(finalPos + fileBuffer) & ((1i64 << (8 * ((unsigned __int8)(currbits >> 3) + 1))) - 1);; + } + + decoder->bufferSizeNeeded += dataOffset; + decoder->currentByte = finalByte; + decoder->currentBit = finalBitOffset; + decoder->qword70 = decoder->inputInvMask + dataOffset - 6; + decoder->dword6C = 0; + decoder->compressedStreamSize = decoder->bufferSizeNeeded; + decoder->decompressedStreamSize = decoder->decompSize; + + if ((((unsigned __int8)(currByte >> 6) - 1) & 0x3F) != -1i64 && decoder->decompSize - 1 > decoder->outputInvMask) + { + const uint64_t streamCompressedSize = decoder->bufferSizeNeeded - decoder->headerOffset; + decoder->compressedStreamSize = streamCompressedSize; + + decoder->decompressedStreamSize = decoder->outputInvMask + 1; + } + + return decoder->decompSize; +} + +//----------------------------------------------------------------------------- +// decodes the RTech data stream up to available buffer or data +//----------------------------------------------------------------------------- bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const size_t outLen) { bool result; // al - uint64_t m_decompBytePosition; // r15 - uint8_t* m_outputBuf; // r11 - uint32_t m_currentBit; // ebp - uint64_t m_currentByte; // rsi - uint64_t m_fileBytePosition; // rdi + uint64_t outBufBytePos; // r15 + uint8_t* outputBuf; // r11 + uint32_t currentBit; // ebp + uint64_t currentByte; // rsi + uint64_t inBufBytePos; // rdi size_t qword70; // r12 - const uint8_t* m_inputBuf; // r13 + const uint8_t* inputBuf; // r13 uint32_t dword6C; // ecx uint64_t v13; // rsi unsigned __int64 i; // rax @@ -152,15 +258,15 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si int v21; // ecx unsigned __int64 v22; // r11 int v23; // edx - uint64_t m_outputMask; // rax + uint64_t outputMask; // rax int v25; // r8d unsigned int v26; // r13d uint64_t v27; // r10 uint8_t* v28; // rax uint8_t* v29; // r10 - size_t m_decompSize; // r9 - uint64_t m_inputInvMask; // r10 - uint64_t m_headerOffset; // r8 + size_t decompSize; // r9 + uint64_t inputInvMask; // r10 + uint64_t headerOffset; // r8 uint64_t v33; // rax uint64_t v34; // rax uint64_t v35; // rax @@ -202,49 +308,49 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si uint32_t v71; // [rsp+60h] [rbp+8h] const uint8_t* v74; // [rsp+78h] [rbp+20h] - m_decompBytePosition = decoder->outBufBytePos; + outBufBytePos = decoder->outBufBytePos; - m_outputBuf = decoder->outputBuf; - m_currentBit = decoder->currentBit; - m_currentByte = decoder->currentByte; - m_fileBytePosition = decoder->inBufBytePos; + outputBuf = decoder->outputBuf; + currentBit = decoder->currentBit; + currentByte = decoder->currentByte; + inBufBytePos = decoder->inBufBytePos; qword70 = decoder->qword70; - m_inputBuf = decoder->inputBuf; + inputBuf = decoder->inputBuf; if (decoder->compressedStreamSize < qword70) qword70 = decoder->compressedStreamSize; dword6C = decoder->dword6C; - v74 = m_inputBuf; - v70 = m_outputBuf; + v74 = inputBuf; + v70 = outputBuf; v71 = dword6C; - if (!m_currentBit) + if (!currentBit) goto LABEL_11; - v13 = (*(_QWORD*)&m_inputBuf[m_fileBytePosition & decoder->inputMask] << (64 - (unsigned __int8)m_currentBit)) | m_currentByte; - for (i = m_currentBit; ; i = m_currentBit) + v13 = (*(_QWORD*)&inputBuf[inBufBytePos & decoder->inputMask] << (64 - (unsigned __int8)currentBit)) | currentByte; + for (i = currentBit; ; i = currentBit) { - m_currentBit &= 7u; - m_fileBytePosition += i >> 3; + currentBit &= 7u; + inBufBytePos += i >> 3; dword6C = v71; - m_currentByte = (0xFFFFFFFFFFFFFFFFui64 >> m_currentBit) & v13; + currentByte = (0xFFFFFFFFFFFFFFFFui64 >> currentBit) & v13; LABEL_11: v15 = (unsigned __int64)dword6C << 8; v16 = dword6C; - v17 = s_defaultDecoderLUT[(unsigned __int8)m_currentByte + 512 + v15]; - v18 = (unsigned __int8)m_currentByte + v15; - m_currentBit += v17; - v19 = m_currentByte >> v17; + v17 = s_defaultDecoderLUT[(unsigned __int8)currentByte + 512 + v15]; + v18 = (unsigned __int8)currentByte + v15; + currentBit += v17; + v19 = currentByte >> v17; v20 = (unsigned int)(char)s_defaultDecoderLUT[v18]; if ((s_defaultDecoderLUT[v18] & 0x80u) != 0) { v56 = -(int)v20; - v57 = &m_inputBuf[m_fileBytePosition & decoder->inputMask]; + v57 = &inputBuf[inBufBytePos & decoder->inputMask]; v71 = 1; - v58 = &m_outputBuf[m_decompBytePosition & decoder->outputMask]; + v58 = &outputBuf[outBufBytePos & decoder->outputMask]; if (v56 == s_defaultDecoderLUT[v16 + 1248]) { - if ((~m_fileBytePosition & decoder->inputInvMask) < 0xF || (decoder->outputInvMask & ~m_decompBytePosition) < 0xF || decoder->decompSize - m_decompBytePosition < 0x10) + if ((~inBufBytePos & decoder->inputInvMask) < 0xF || (decoder->outputInvMask & ~outBufBytePos) < 0xF || decoder->decompSize - outBufBytePos < 0x10) v56 = 1; v59 = v19; v60 = v19 >> 3; @@ -259,11 +365,11 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si { v62 = v60 >> 4; v65 = v60 & 0xF; - m_currentBit += 4; + currentBit += 4; v63 = *(_DWORD*)&s_defaultDecoderLUT[4 * v65 + 1152]; v64 = s_defaultDecoderLUT[v65 + 1216]; } - m_currentBit += v64 + 3; + currentBit += v64 + 3; v19 = v62 >> v64; v66 = v63 + (v62 & ((1 << v64) - 1)) + v56; for (j = v66 >> 3; j; --j) @@ -287,15 +393,15 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si } if ((v66 & 1) != 0) *v58 = *v57; - m_fileBytePosition += v66; - m_decompBytePosition += v66; + inBufBytePos += v66; + outBufBytePos += v66; } else { *(_QWORD*)v58 = *(_QWORD*)v57; *((_QWORD*)v58 + 1) = *((_QWORD*)v57 + 1); - m_fileBytePosition += v56; - m_decompBytePosition += v56; + inBufBytePos += v56; + outBufBytePos += v56; } } else @@ -304,13 +410,13 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si v71 = 0; v22 = ((unsigned __int64)(unsigned int)v19 >> (((unsigned int)(v21 - 31) >> 3) & 6)) & 0x3F; v23 = 1 << (v21 + ((v19 >> 4) & ((24 * (((unsigned int)(v21 - 31) >> 3) & 2)) >> 4))); - m_currentBit += (((unsigned int)(v21 - 31) >> 3) & 6) + s_defaultDecoderLUT[v22 + 1088] + v21 + ((v19 >> 4) & ((24 * (((unsigned int)(v21 - 31) >> 3) & 2)) >> 4)); - m_outputMask = decoder->outputMask; + currentBit += (((unsigned int)(v21 - 31) >> 3) & 6) + s_defaultDecoderLUT[v22 + 1088] + v21 + ((v19 >> 4) & ((24 * (((unsigned int)(v21 - 31) >> 3) & 2)) >> 4)); + outputMask = decoder->outputMask; v25 = 16 * (v23 + ((v23 - 1) & (v19 >> ((((unsigned int)(v21 - 31) >> 3) & 6) + s_defaultDecoderLUT[v22 + 1088])))); v19 >>= (((unsigned int)(v21 - 31) >> 3) & 6) + s_defaultDecoderLUT[v22 + 1088] + v21 + ((v19 >> 4) & ((24 * (((unsigned int)(v21 - 31) >> 3) & 2)) >> 4)); v26 = v25 + s_defaultDecoderLUT[v22 + 1024] - 16; - v27 = m_outputMask & (m_decompBytePosition - v26); - v28 = &v70[m_decompBytePosition & m_outputMask]; + v27 = outputMask & (outBufBytePos - v26); + v28 = &v70[outBufBytePos & outputMask]; v29 = &v70[v27]; if ((_DWORD)v20 == 17) { @@ -325,26 +431,26 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si } else { - m_currentBit += 4; + currentBit += 4; v46 = v41 & 0xF; v43 = v41 >> 4; v44 = *(_DWORD*)&s_defaultDecoderLUT[4 * v46 + 1152]; v45 = s_defaultDecoderLUT[v46 + 1216]; - if (v74 && m_currentBit + v45 >= 61) + if (v74 && currentBit + v45 >= 61) { - v47 = m_fileBytePosition++ & decoder->inputMask; - v43 |= (unsigned __int64)v74[v47] << (61 - (unsigned __int8)m_currentBit); - m_currentBit -= 8; + v47 = inBufBytePos++ & decoder->inputMask; + v43 |= (unsigned __int64)v74[v47] << (61 - (unsigned __int8)currentBit); + currentBit -= 8; } } - m_currentBit += v45 + 3; + currentBit += v45 + 3; v19 = v43 >> v45; v48 = ((unsigned int)v43 & ((1 << v45) - 1)) + v44 + 17; - m_decompBytePosition += v48; + outBufBytePos += v48; if (v26 < 8) { v50 = v48 - 13; - m_decompBytePosition -= 13i64; + outBufBytePos -= 13i64; if (v26 == 1) { v51 = *v29; @@ -378,59 +484,59 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si } else { - m_decompBytePosition += v20; + outBufBytePos += v20; *(_QWORD*)v28 = *(_QWORD*)v29; *((_QWORD*)v28 + 1) = *((_QWORD*)v29 + 1); } - m_inputBuf = v74; + inputBuf = v74; } - if (m_fileBytePosition >= qword70) + if (inBufBytePos >= qword70) break; LABEL_29: - m_outputBuf = v70; - v13 = (*(_QWORD*)&m_inputBuf[m_fileBytePosition & decoder->inputMask] << (64 - (unsigned __int8)m_currentBit)) | v19; + outputBuf = v70; + v13 = (*(_QWORD*)&inputBuf[inBufBytePos & decoder->inputMask] << (64 - (unsigned __int8)currentBit)) | v19; } - if (m_decompBytePosition != decoder->decompressedStreamSize) + if (outBufBytePos != decoder->decompressedStreamSize) goto LABEL_25; - m_decompSize = decoder->decompSize; - if (m_decompBytePosition == m_decompSize) + decompSize = decoder->decompSize; + if (outBufBytePos == decompSize) { result = true; goto LABEL_69; } - m_inputInvMask = decoder->inputInvMask; - m_headerOffset = decoder->headerOffset; - v33 = m_inputInvMask & -(__int64)m_fileBytePosition; + inputInvMask = decoder->inputInvMask; + headerOffset = decoder->headerOffset; + v33 = inputInvMask & -(__int64)inBufBytePos; v19 >>= 1; - ++m_currentBit; - if (m_headerOffset > v33) + ++currentBit; + if (headerOffset > v33) { - m_fileBytePosition += v33; + inBufBytePos += v33; v34 = decoder->qword70; - if (m_fileBytePosition > v34) - decoder->qword70 = m_inputInvMask + v34 + 1; + if (inBufBytePos > v34) + decoder->qword70 = inputInvMask + v34 + 1; } - v35 = m_fileBytePosition & decoder->inputMask; - m_fileBytePosition += m_headerOffset; - v36 = m_decompBytePosition + decoder->outputInvMask + 1; - v37 = *(_QWORD*)&m_inputBuf[v35] & ((1i64 << (8 * (unsigned __int8)m_headerOffset)) - 1); + v35 = inBufBytePos & decoder->inputMask; + inBufBytePos += headerOffset; + v36 = outBufBytePos + decoder->outputInvMask + 1; + v37 = *(_QWORD*)&inputBuf[v35] & ((1i64 << (8 * (unsigned __int8)headerOffset)) - 1); v38 = v37 + decoder->bufferSizeNeeded; v39 = v37 + decoder->compressedStreamSize; decoder->bufferSizeNeeded = v38; decoder->compressedStreamSize = v39; - if (v36 >= m_decompSize) + if (v36 >= decompSize) { - v36 = m_decompSize; - decoder->compressedStreamSize = m_headerOffset + v39; + v36 = decompSize; + decoder->compressedStreamSize = headerOffset + v39; } decoder->decompressedStreamSize = v36; if (inLen >= v38 && outLen >= v36) { LABEL_25: qword70 = decoder->qword70; - if (m_fileBytePosition >= qword70) + if (inBufBytePos >= qword70) { - m_fileBytePosition = ~decoder->inputInvMask & (m_fileBytePosition + 7); + inBufBytePos = ~decoder->inputInvMask & (inBufBytePos + 7); qword70 += decoder->inputInvMask + 1; decoder->qword70 = qword70; } @@ -439,67 +545,77 @@ bool Pak_RStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si goto LABEL_29; } v69 = decoder->qword70; - if (m_fileBytePosition >= v69) + if (inBufBytePos >= v69) { - m_fileBytePosition = ~m_inputInvMask & (m_fileBytePosition + 7); - decoder->qword70 = v69 + m_inputInvMask + 1; + inBufBytePos = ~inputInvMask & (inBufBytePos + 7); + decoder->qword70 = v69 + inputInvMask + 1; } decoder->dword6C = v71; result = false; decoder->currentByte = v19; - decoder->currentBit = m_currentBit; + decoder->currentBit = currentBit; LABEL_69: - decoder->outBufBytePos = m_decompBytePosition; - decoder->inBufBytePos = m_fileBytePosition; + decoder->outBufBytePos = outBufBytePos; + decoder->inBufBytePos = inBufBytePos; return result; } - //----------------------------------------------------------------------------- -// checks if we have enough output buffer room to decode the data stream +// initializes the ZStd decoder //----------------------------------------------------------------------------- -bool Pak_HasEnoughDecodeBufferLeft(PakDecoder_t* const decoder, const size_t outLen) +size_t Pak_ZStreamDecoderInit(PakDecoder_t* const decoder, const uint8_t* const fileBuffer, + const uint64_t inputMask, const size_t dataSize, const size_t dataOffset, const size_t headerSize) { - const uint64_t bytesWritten = (decoder->outBufBytePos & ~decoder->outputInvMask); - return (outLen >= decoder->outputInvMask + (bytesWritten + 1) || outLen >= decoder->decompSize); + // NOTE: on original paks, this data is passed out of the frame header, + // but for ZStd encoded paks we are always limiting this to the ring + // buffer size + decoder->inputInvMask = PAK_DECODE_OUT_RING_BUFFER_MASK; + decoder->outputInvMask = PAK_DECODE_OUT_RING_BUFFER_MASK; + + ZSTD_DStream* const dctx = ZSTD_createDStream(); + assert(dctx); + + // failure + if (!dctx) + return NULL; + + decoder->zstreamContext = dctx; + + // this points to the first byte of the frame header, takes dataOffset + // into account which is the offset in the ring buffer to the patched + // data as we parse it contiguously after the base pak data, which + // might have ended somewhere in the middle of the ring buffer + const uint8_t* const frameHeaderData = (inputMask & (dataOffset + headerSize)) + fileBuffer; + + // this is the offset to the ZStd header in the input buffer + decoder->headerOffset = static_cast(frameHeaderData - fileBuffer); + + if (ZSTD_getFrameHeader(&dctx->fParams, frameHeaderData, dataSize) != 0) + { + ZSTD_freeDStream(decoder->zstreamContext); + decoder->zstreamContext = nullptr; + + return NULL; // content size error + } + + // ideally the frame header of the block gets parsed first, the length + // thereof is returned by initDStream and thus being processed first + // before moving on to actual data + decoder->frameHeaderSize = ZSTD_initDStream(dctx); + + // we need at least this many bytes of streamed data to process the frame + // header of the compressed block + decoder->bufferSizeNeeded = decoder->headerOffset + decoder->frameHeaderSize; + + // must include header size + decoder->decompSize = dctx->fParams.frameContentSize + headerSize; + return decoder->decompSize; } //----------------------------------------------------------------------------- -// checks if we have enough source data streamed to decode the next block +// decodes the ZStd data stream up to available buffer or data, whichever ends +// first (determined by Pak_ComputeRingBufferFrame()) //----------------------------------------------------------------------------- -bool Pak_HasEnoughStreamedDataForDecode(PakDecoder_t* const decoder, const size_t inLen, const bool useZStream) -{ - // the decoder needs at least this many bytes to decode the current block - // buffer contiguously - return (inLen >= decoder->bufferSizeNeeded); -} - -struct PakRingBufferFrame_t -{ - uint64_t bufIndex; - uint64_t frameLen; -}; - -//----------------------------------------------------------------------------- -// gets the frame for the data in the ring buffer, the frame returned is always -// either the end of the ring buffer, or the end of the data itself -//----------------------------------------------------------------------------- -PakRingBufferFrame_t Pak_ComputeRingBufferFrame(const uint64_t bufMask, const uint64_t seekPos, const uint64_t dataLen) -{ - PakRingBufferFrame_t ring; - - ring.bufIndex = seekPos & bufMask; - const size_t bufSize = bufMask + 1; - - const size_t bytesUsed = ring.bufIndex % bufSize; - const size_t totalAvail = bufSize - bytesUsed; - - // the last part of the data might be smaller than the remainder of the ring - // buffer; clamp it - ring.frameLen = Min(dataLen - seekPos, totalAvail); - return ring; -} - bool Pak_ZStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const size_t outLen) { // must have a zstream decoder at this point, and input seek pos may not @@ -533,9 +649,21 @@ bool Pak_ZStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si return false; } + // the first call to this function expects a buffer with at least the size + // of the pak file header + the ZStd frame header, after this call we + // should subtract it from the needed buffer as + if (decoder->frameHeaderSize) + { + decoder->bufferSizeNeeded -= decoder->frameHeaderSize; + decoder->frameHeaderSize = 0; + } + // on the next call, we need at least this amount of data streamed in order // to decode the rest of the pak file, as this is where reading has stopped - decoder->bufferSizeNeeded = inBuffer.pos; + // this value may equal the currently streamed input size, as its possible + // this function is getting called to flush the remainder decoded data into + // the out buffer which got truncated off on the call prior due to wrapping + decoder->bufferSizeNeeded += inBuffer.pos; // advance buffer io positions, required so the main parser could already // start parsing the headers while the rest is getting decoded still @@ -555,170 +683,52 @@ bool Pak_ZStreamDecode(PakDecoder_t* const decoder, const size_t inLen, const si } //----------------------------------------------------------------------------- -// initialize the pak decoder context +// initializes the decoder //----------------------------------------------------------------------------- -size_t Pak_InitDefaultDecoder(PakDecoder_t* const decoder, const uint8_t* const fileBuffer, - const uint64_t inputMask, const size_t dataSize, const size_t dataOffset, const size_t headerSize) +size_t Pak_InitDecoder(PakDecoder_t* const decoder, const uint8_t* const inputBuf, uint8_t* const outputBuf, + const uint64_t inputMask, const uint64_t outputMask, const size_t dataSize, const size_t dataOffset, + const size_t headerSize, const bool useZStream) { - uint64_t v8; // r9 - unsigned __int64 v9; // r11 - unsigned __int64 v10; // r8 - int v11; // er8 - __int64 v12; // rbx - unsigned int v13; // ebp - unsigned __int64 v14; // rbx - uint64_t v15; // rax - unsigned int v16; // er9 - unsigned __int64 v17; // r12 - unsigned __int64 v18; // r11 - unsigned __int64 v19; // r10 - unsigned __int64 v20; // rax - int v21; // ebp - unsigned __int64 v22; // r10 - unsigned int v23; // er9 - uint64_t v24; // rax - __int64 v25; // rsi - uint64_t v26; // rdx - size_t result; // rax - uint64_t v28; // rdx + // buffer size must be power of two as we index into buffers using a bit + // mask rather than modulo, the mask provided must be bufferSize-1 + assert(IsPowerOfTwo(inputMask + 1)); + assert(IsPowerOfTwo(outputMask + 1)); - v8 = dataOffset + headerSize + 8; - v9 = *(_QWORD*)((inputMask & (dataOffset + headerSize)) + fileBuffer); - - v10 = v9; - v9 >>= 6; - v11 = v10 & 0x3F; - decoder->decompSize = (1i64 << v11) | v9 & ((1i64 << v11) - 1); - v12 = *(_QWORD*)((inputMask & v8) + fileBuffer) << (64 - ((unsigned __int8)v11 + 6)); - decoder->inBufBytePos = v8 + ((unsigned __int64)(unsigned int)(v11 + 6) >> 3); - v13 = ((v11 + 6) & 7) + 13; - v14 = (0xFFFFFFFFFFFFFFFFui64 >> ((v11 + 6) & 7)) & ((v9 >> v11) | v12); - v15 = inputMask & decoder->inBufBytePos; - v16 = (((_BYTE)v14 - 1) & 0x3F) + 1; - v17 = 0xFFFFFFFFFFFFFFFFui64 >> (64 - (unsigned __int8)v16); - decoder->inputInvMask = v17; - v18 = 0xFFFFFFFFFFFFFFFFui64 >> (64 - ((((v14 >> 6) - 1) & 0x3F) + 1)); - decoder->outputInvMask = v18; - v19 = (v14 >> 13) | (*(_QWORD*)(v15 + fileBuffer) << (64 - (unsigned __int8)v13)); - v20 = v13; - v21 = v13 & 7; - decoder->inBufBytePos += v20 >> 3; - v22 = (0xFFFFFFFFFFFFFFFFui64 >> v21) & v19; - - if (v17 == -1i64) - { - decoder->headerOffset = 0; - decoder->bufferSizeNeeded = dataSize; - } - else - { - v23 = v16 >> 3; - v24 = inputMask & decoder->inBufBytePos; - decoder->headerOffset = v23 + 1; - v25 = *(_QWORD*)(v24 + fileBuffer) & ((1i64 << (8 * ((unsigned __int8)v23 + 1))) - 1); - decoder->inBufBytePos += v23 + 1; - decoder->bufferSizeNeeded = v25; - } - - decoder->bufferSizeNeeded += dataOffset; - v26 = decoder->bufferSizeNeeded; - decoder->currentByte = v22; - decoder->currentBit = v21; - decoder->qword70 = v17 + dataOffset - 6; - result = decoder->decompSize; - decoder->dword6C = 0; - decoder->compressedStreamSize = v26; - decoder->decompressedStreamSize = result; - - if ((((unsigned __int8)(v14 >> 6) - 1) & 0x3F) != -1i64 && result - 1 > v18) - { - v28 = v26 - decoder->headerOffset; - decoder->decompressedStreamSize = v18 + 1; - decoder->compressedStreamSize = v28; - } - - return result; -} - -size_t Pak_InitDecoder(PakDecoder_t* const decoder, const uint8_t* const fileBuffer, - const uint64_t inputMask, const size_t dataSize, const size_t dataOffset, const size_t headerSize, const bool useZStream) -{ - decoder->inputBuf = fileBuffer; - decoder->outputBuf = nullptr; + // the absolute start address of the input and output buffers + decoder->inputBuf = inputBuf; + decoder->outputBuf = outputBuf; + // the actual file size, which consists of dataOffset (anything up to the + // frame header, like the file header) and the actual encoded data itself decoder->fileSize = dataOffset + dataSize; - decoder->dword44 = NULL; + decoder->padding = NULL; + // buffer masks, which essentially gets used to index into the input and + // output buffers, similar to 'idx % bufSize', where bufSize = bufMask+1 decoder->inputMask = inputMask; - decoder->outputMask = NULL; + decoder->outputMask = outputMask; + // the current positions in the input and output buffers; if we deal with + // paks that are patched, the buffer positions during the init and decode + // call on subsequent patches may not be at the start of the buffers decoder->inBufBytePos = dataOffset + headerSize; decoder->outBufBytePos = headerSize; if (useZStream) - { - // NOTE: on original paks, this data is passed out of the frame header, - // but for ZStd encoded paks we are always limiting this to the ring - // buffer size - decoder->inputInvMask = PAK_DECODE_OUT_RING_BUFFER_MASK; - decoder->outputInvMask = PAK_DECODE_OUT_RING_BUFFER_MASK; + return Pak_ZStreamDecoderInit(decoder, inputBuf, inputMask, dataSize, dataOffset, headerSize); - ZSTD_DStream* const dctx = ZSTD_createDStream(); - assert(dctx); - - if (!dctx) - return NULL; - - decoder->zstreamContext = dctx; - - // this is the offset to the ZSTD header in the input buffer - decoder->headerOffset = static_cast(decoder->inBufBytePos); - - // this points to the first byte of the frame header, takes dataOffset - // into account which is the offset in the ring buffer to the patched - // data as we parse it contiguously after the base pak data, which - // might have ended somewhere in the middle of the ring buffer - const uint8_t* const frameHeaderData = (inputMask & (dataOffset + headerSize)) + fileBuffer; - - if (ZSTD_getFrameHeader(&dctx->fParams, frameHeaderData, dataSize) != 0) - { - if (decoder->zstreamContext) - { - ZSTD_freeDStream(decoder->zstreamContext); - decoder->zstreamContext = nullptr; - } - - return NULL; // content size error - } - - // ideally the frame header of the block gets parsed first, the length - // thereof is returned by initDStream and thus being processed first - // before moving on to actual data - const size_t frameMetaDataSize = ZSTD_initDStream(dctx); - - // we need at least this many bytes of streamed data to process the frame - // header of the compressed block - decoder->bufferSizeNeeded = frameMetaDataSize + decoder->headerOffset; - - // must include header size - const uint64_t decompSize = dctx->fParams.frameContentSize + headerSize; - - decoder->decompSize = decompSize; - return decompSize; - } - - return Pak_InitDefaultDecoder(decoder, fileBuffer, inputMask, dataSize, dataOffset, headerSize); + return Pak_RStreamDecoderInit(decoder, inputBuf, inputMask, dataSize, dataOffset, headerSize); } //----------------------------------------------------------------------------- -// decode input pak data +// decodes streamed input pak data //----------------------------------------------------------------------------- bool Pak_StreamToBufferDecode(PakDecoder_t* const decoder, const size_t inLen, const size_t outLen, const bool useZStream) { - if (!Pak_HasEnoughStreamedDataForDecode(decoder, inLen, useZStream)) + if (!Pak_HasEnoughStreamedDataForDecode(decoder, inLen)) return false; - if (!Pak_HasEnoughDecodeBufferLeft(decoder, outLen)) + if (!Pak_HasEnoughDecodeBufferAvailable(decoder, outLen)) return false; if (useZStream) diff --git a/src/rtech/pak/pakdecode.h b/src/rtech/pak/pakdecode.h index f54175d4..f6fda351 100644 --- a/src/rtech/pak/pakdecode.h +++ b/src/rtech/pak/pakdecode.h @@ -2,11 +2,9 @@ #define RTECH_PAKDECODE_H #include "rtech/ipakfile.h" -extern size_t Pak_InitDefaultDecoder(PakDecoder_t* const decoder, const uint8_t* const fileBuffer, - const uint64_t inputMask, const size_t dataSize, const size_t dataOffset, const size_t headerSize); - -extern size_t Pak_InitDecoder(PakDecoder_t* const decoder, const uint8_t* const fileBuffer, - const uint64_t inputMask, const size_t dataSize, const size_t dataOffset, const size_t headerSize, const bool useCustom); +extern size_t Pak_InitDecoder(PakDecoder_t* const decoder, const uint8_t* const inputBuf, uint8_t* const outputBuf, + const uint64_t inputMask, const uint64_t outputMask, const size_t dataSize, const size_t dataOffset, + const size_t headerSize, const bool useZStream); extern bool Pak_StreamToBufferDecode(PakDecoder_t* const decoder, const size_t inLen, const size_t outLen, const bool useCustom); diff --git a/src/rtech/pak/pakparse.cpp b/src/rtech/pak/pakparse.cpp index 20591a9d..ba1d8c7f 100644 --- a/src/rtech/pak/pakparse.cpp +++ b/src/rtech/pak/pakparse.cpp @@ -382,8 +382,10 @@ LABEL_18: decodeContext = &pak->pakDecoder; - decompressedSize = Pak_InitDecoder(&pak->pakDecoder, fileStream->buffer, - PAK_DECODE_IN_RING_BUFFER_MASK, v22->compressedSize - (v22->dataOffset - sizeof(PakFileHeader_t)), + decompressedSize = Pak_InitDecoder(&pak->pakDecoder, + fileStream->buffer, pak->decompBuffer, + PAK_DECODE_IN_RING_BUFFER_MASK, PAK_DECODE_OUT_RING_BUFFER_MASK, + v22->compressedSize - (v22->dataOffset - sizeof(PakFileHeader_t)), v22->dataOffset - sizeof(PakFileHeader_t), sizeof(PakFileHeader_t), useZStream); if (decompressedSize != v22->decompressedSize) @@ -392,9 +394,6 @@ LABEL_18: pak->memoryData.fileName, decompressedSize, pak->memoryData.pakHeader.decompressedSize); - - pak->pakDecoder.outputBuf = pak->decompBuffer; - pak->pakDecoder.outputMask = PAK_DECODE_OUT_RING_BUFFER_MASK; } else {