diff --git a/r5dev/core/stdafx.h b/r5dev/core/stdafx.h index 1637be75..ee52eac0 100644 --- a/r5dev/core/stdafx.h +++ b/r5dev/core/stdafx.h @@ -36,6 +36,11 @@ #include "thirdparty/imgui/include/imgui_impl_win32.h" #endif // !DEDICATED +#if !defined(SDKLAUNCHER) +#include "thirdparty/lzham/include/lzham_types.h" +#include "thirdparty/lzham/include/lzham.h" +#endif // !SDKLAUNCHER + #include "thirdparty/spdlog/include/spdlog.h" #include "thirdparty/spdlog/include/sinks/basic_file_sink.h" #include "thirdparty/spdlog/include/sinks/stdout_sinks.h" @@ -57,4 +62,4 @@ namespace MODULE g_mRadAudioDecoderDll = MODULE("binkawin64.dll"); MODULE g_mRadAudioSystemDll = MODULE("mileswin64.dll"); } -#endif // SDKLAUNCHER +#endif // !SDKLAUNCHER diff --git a/r5dev/dedicated.vcxproj b/r5dev/dedicated.vcxproj index 30414412..017a8898 100644 --- a/r5dev/dedicated.vcxproj +++ b/r5dev/dedicated.vcxproj @@ -135,7 +135,7 @@ Console true - lzhamlib_x64D.lib;lzhamcomp_x64D.lib;lzhamdecomp_x64D.lib;d3d11.lib;bcrypt.lib;%(AdditionalDependencies) + d3d11.lib;bcrypt.lib;%(AdditionalDependencies) del "..\..\..\$(ProjectName)" && copy /Y "$(TargetPath)" "..\..\..\ @@ -162,7 +162,7 @@ true true true - lzhamlib_x64.lib;lzhamcomp_x64.lib;lzhamdecomp_x64.lib;d3d11.lib;bcrypt.lib;%(AdditionalDependencies) + d3d11.lib;bcrypt.lib;%(AdditionalDependencies) del "..\..\..\$(ProjectName)" && copy /Y "$(TargetPath)" "..\..\..\ @@ -215,7 +215,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -360,6 +387,10 @@ + + NotUsing + NotUsing + NotUsing NotUsing @@ -372,6 +403,86 @@ NotUsing NotUsing + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + diff --git a/r5dev/dedicated.vcxproj.filters b/r5dev/dedicated.vcxproj.filters index 3bee9887..c110b5a1 100644 --- a/r5dev/dedicated.vcxproj.filters +++ b/r5dev/dedicated.vcxproj.filters @@ -97,6 +97,18 @@ {8288ba1a-7609-42ef-af3b-850727635a99} + + {8736d047-b4af-4c17-99ee-454cc96ec1ba} + + + {e84ad150-2358-4146-971a-02c5f045437c} + + + {eb98cd2b-4508-43a0-95e1-feacc7c83a8d} + + + {463e0739-1e5f-47a0-94d1-6cf5b6bf3ea6} + @@ -540,12 +552,6 @@ sdk\public\include - - thirdparty\lzham\include - - - thirdparty\lzham\include - sdk\mathlib @@ -564,6 +570,93 @@ windows + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\lzhamcomp\include + + + thirdparty\lzham\lzhamcomp\include + + + thirdparty\lzham\lzhamdecomp\include + + + thirdparty\lzham\lzhamdecomp\include + @@ -701,6 +794,69 @@ thirdparty\detours + + thirdparty\detours + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham\lzhamcomp + + + thirdparty\lzham\lzhamcomp + + + thirdparty\lzham\lzhamcomp + + + thirdparty\lzham\lzhamdecomp + + + thirdparty\lzham\lzhamdecomp + diff --git a/r5dev/r5dev.vcxproj b/r5dev/r5dev.vcxproj index dcab8624..8d942be3 100644 --- a/r5dev/r5dev.vcxproj +++ b/r5dev/r5dev.vcxproj @@ -58,6 +58,10 @@ + + NotUsing + NotUsing + NotUsing NotUsing @@ -106,6 +110,86 @@ NotUsing NotUsing + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + @@ -192,7 +276,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -452,7 +563,7 @@ true false r5dev.def - lzhamlib_x64D.lib;lzhamcomp_x64D.lib;lzhamdecomp_x64D.lib;d3d11.lib;bcrypt.lib;%(AdditionalDependencies) + d3d11.lib;bcrypt.lib;%(AdditionalDependencies) copy /Y "$(TargetPath)" "..\..\..\bin\$(TargetFileName)" @@ -488,7 +599,7 @@ true false r5dev.def - lzhamlib_x64.lib;lzhamcomp_x64.lib;lzhamdecomp_x64.lib;d3d11.lib;bcrypt.lib;%(AdditionalDependencies) + d3d11.lib;bcrypt.lib;%(AdditionalDependencies) copy /Y "$(TargetPath)" "..\..\..\$(TargetFileName)" && del "..\..\..\r5apexsdkd64.dll" && rename "..\..\..\$(TargetFileName)" "r5apexsdkd64.dll" diff --git a/r5dev/r5dev.vcxproj.filters b/r5dev/r5dev.vcxproj.filters index 4bd5cd1c..818da70f 100644 --- a/r5dev/r5dev.vcxproj.filters +++ b/r5dev/r5dev.vcxproj.filters @@ -127,6 +127,18 @@ {f52dfb17-f5bd-4258-91a2-500587bee708} + + {f450ee50-7010-49e2-9f91-05a74fcb6a8b} + + + {11645361-fd70-462f-ab8b-8a78283a5fc7} + + + {785353c2-6417-4213-b55f-3007a0b79801} + + + {5beb12b5-0422-4337-9be6-2e6c0a05a69b} + @@ -321,6 +333,69 @@ thirdparty\detours + + thirdparty\detours + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham + + + thirdparty\lzham\lzhamcomp + + + thirdparty\lzham\lzhamcomp + + + thirdparty\lzham\lzhamcomp + + + thirdparty\lzham\lzhamdecomp + + + thirdparty\lzham\lzhamdecomp + @@ -836,12 +911,6 @@ sdk\public\include - - thirdparty\lzham\include - - - thirdparty\lzham\include - sdk\mathlib @@ -872,6 +941,93 @@ sdk\milessdk + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\include + + + thirdparty\lzham\lzhamcomp\include + + + thirdparty\lzham\lzhamcomp\include + + + thirdparty\lzham\lzhamdecomp\include + + + thirdparty\lzham\lzhamdecomp\include + diff --git a/r5dev/sdklauncher.vcxproj b/r5dev/sdklauncher.vcxproj index 99e6e9fb..d5260590 100644 --- a/r5dev/sdklauncher.vcxproj +++ b/r5dev/sdklauncher.vcxproj @@ -140,7 +140,7 @@ Console true - detours.lib;%(AdditionalDependencies) + %(AdditionalDependencies) del "..\..\..\r5reloaded.exe" && copy /Y "$(TargetPath)" "..\..\..\$(TargetFileName)" @@ -174,7 +174,7 @@ true true true - detours.lib;%(AdditionalDependencies) + %(AdditionalDependencies) del "..\..\..\r5reloaded.exe" && copy /Y "$(TargetPath)" "..\..\..\$(TargetFileName)" @@ -186,6 +186,22 @@ Create + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + @@ -198,6 +214,9 @@ + + + diff --git a/r5dev/sdklauncher.vcxproj.filters b/r5dev/sdklauncher.vcxproj.filters index 5a7f2de0..70394cc2 100644 --- a/r5dev/sdklauncher.vcxproj.filters +++ b/r5dev/sdklauncher.vcxproj.filters @@ -13,6 +13,12 @@ {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + {82b18787-373d-42ce-8d8d-1e3adba8d3a0} + + + {dc968871-7ca2-452b-a5b1-350a12dd54aa} + @@ -21,6 +27,18 @@ Source Files + + Detours Files + + + Detours Files + + + Detours Files + + + Detours Files + @@ -37,6 +55,15 @@ Header Files + + Detours Files\include + + + Detours Files\include + + + Detours Files\include + diff --git a/r5dev/thirdparty/detours/src/creatwth.cpp b/r5dev/thirdparty/detours/src/creatwth.cpp new file mode 100644 index 00000000..f6720d7b --- /dev/null +++ b/r5dev/thirdparty/detours/src/creatwth.cpp @@ -0,0 +1,1783 @@ +////////////////////////////////////////////////////////////////////////////// +// +// Create a process with a DLL (creatwth.cpp of detours.lib) +// +// Microsoft Research Detours Package, Version 4.0.1 +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// + +// #define DETOUR_DEBUG 1 +#define DETOURS_INTERNAL +#include "../include/detours.h" +#include + +#if DETOURS_VERSION != 0x4c0c1 // 0xMAJORcMINORcPATCH +#error detours.h version mismatch +#endif + +#define IMPORT_DIRECTORY OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT] +#define BOUND_DIRECTORY OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT] +#define CLR_DIRECTORY OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR] +#define IAT_DIRECTORY OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT] + +////////////////////////////////////////////////////////////////////////////// +// +const GUID DETOUR_EXE_HELPER_GUID = { /* ea0251b9-5cde-41b5-98d0-2af4a26b0fee */ + 0xea0251b9, 0x5cde, 0x41b5, + { 0x98, 0xd0, 0x2a, 0xf4, 0xa2, 0x6b, 0x0f, 0xee }}; + +////////////////////////////////////////////////////////////////////////////// +// +// Enumerate through modules in the target process. +// +static PVOID LoadNtHeaderFromProcess(_In_ HANDLE hProcess, + _In_ HMODULE hModule, + _Out_ PIMAGE_NT_HEADERS32 pNtHeader) +{ + ZeroMemory(pNtHeader, sizeof(*pNtHeader)); + PBYTE pbModule = (PBYTE)hModule; + + if (pbModule == NULL) { + SetLastError(ERROR_INVALID_PARAMETER); + return NULL; + } + + MEMORY_BASIC_INFORMATION mbi; + ZeroMemory(&mbi, sizeof(mbi)); + + if (VirtualQueryEx(hProcess, hModule, &mbi, sizeof(mbi)) == 0) { + return NULL; + } + + IMAGE_DOS_HEADER idh; + if (!ReadProcessMemory(hProcess, pbModule, &idh, sizeof(idh), NULL)) { + DETOUR_TRACE(("ReadProcessMemory(idh@%p..%p) failed: %lu\n", + pbModule, pbModule + sizeof(idh), GetLastError())); + return NULL; + } + + if (idh.e_magic != IMAGE_DOS_SIGNATURE || + (DWORD)idh.e_lfanew > mbi.RegionSize || + (DWORD)idh.e_lfanew < sizeof(idh)) { + + SetLastError(ERROR_BAD_EXE_FORMAT); + return NULL; + } + + if (!ReadProcessMemory(hProcess, pbModule + idh.e_lfanew, + pNtHeader, sizeof(*pNtHeader), NULL)) { + DETOUR_TRACE(("ReadProcessMemory(inh@%p..%p:%p) failed: %lu\n", + pbModule + idh.e_lfanew, + pbModule + idh.e_lfanew + sizeof(*pNtHeader), + pbModule, + GetLastError())); + return NULL; + } + + if (pNtHeader->Signature != IMAGE_NT_SIGNATURE) { + SetLastError(ERROR_BAD_EXE_FORMAT); + return NULL; + } + + return pbModule + idh.e_lfanew; +} + +static HMODULE EnumerateModulesInProcess(_In_ HANDLE hProcess, + _In_opt_ HMODULE hModuleLast, + _Out_ PIMAGE_NT_HEADERS32 pNtHeader, + _Out_opt_ PVOID *pRemoteNtHeader) +{ + ZeroMemory(pNtHeader, sizeof(*pNtHeader)); + if (pRemoteNtHeader) { + *pRemoteNtHeader = NULL; + } + + PBYTE pbLast = (PBYTE)hModuleLast + MM_ALLOCATION_GRANULARITY; + + MEMORY_BASIC_INFORMATION mbi; + ZeroMemory(&mbi, sizeof(mbi)); + + // Find the next memory region that contains a mapped PE image. + // + + for (;; pbLast = (PBYTE)mbi.BaseAddress + mbi.RegionSize) { + if (VirtualQueryEx(hProcess, (PVOID)pbLast, &mbi, sizeof(mbi)) == 0) { + break; + } + + // Usermode address space has such an unaligned region size always at the + // end and only at the end. + // + if ((mbi.RegionSize & 0xfff) == 0xfff) { + break; + } + if (((PBYTE)mbi.BaseAddress + mbi.RegionSize) < pbLast) { + break; + } + + // Skip uncommitted regions and guard pages. + // + if ((mbi.State != MEM_COMMIT) || + ((mbi.Protect & 0xff) == PAGE_NOACCESS) || + (mbi.Protect & PAGE_GUARD)) { + continue; + } + + PVOID remoteHeader + = LoadNtHeaderFromProcess(hProcess, (HMODULE)pbLast, pNtHeader); + if (remoteHeader) { + if (pRemoteNtHeader) { + *pRemoteNtHeader = remoteHeader; + } + + return (HMODULE)pbLast; + } + } + return NULL; +} + +////////////////////////////////////////////////////////////////////////////// +// +// Find payloads in target process. +// + +static PVOID FindDetourSectionInRemoteModule(_In_ HANDLE hProcess, + _In_ HMODULE hModule, + _In_ const IMAGE_NT_HEADERS32 *pNtHeader, + _In_ PVOID pRemoteNtHeader) +{ + if (pNtHeader->FileHeader.SizeOfOptionalHeader == 0) { + SetLastError(ERROR_EXE_MARKED_INVALID); + return NULL; + } + + PIMAGE_SECTION_HEADER pRemoteSectionHeaders + = (PIMAGE_SECTION_HEADER)((PBYTE)pRemoteNtHeader + + sizeof(pNtHeader->Signature) + + sizeof(pNtHeader->FileHeader) + + pNtHeader->FileHeader.SizeOfOptionalHeader); + + IMAGE_SECTION_HEADER header; + for (DWORD n = 0; n < pNtHeader->FileHeader.NumberOfSections; ++n) { + if (!ReadProcessMemory(hProcess, pRemoteSectionHeaders + n, &header, sizeof(header), NULL)) { + DETOUR_TRACE(("ReadProcessMemory(ish@%p..%p) failed: %lu\n", + pRemoteSectionHeaders + n, + (PBYTE)(pRemoteSectionHeaders + n) + sizeof(header), + GetLastError())); + + return NULL; + } + + if (strcmp((PCHAR)header.Name, ".detour") == 0) { + if (header.VirtualAddress == 0 || + header.SizeOfRawData == 0) { + + break; + } + + SetLastError(NO_ERROR); + return (PBYTE)hModule + header.VirtualAddress; + } + } + + SetLastError(ERROR_EXE_MARKED_INVALID); + return NULL; +} + +static PVOID FindPayloadInRemoteDetourSection(_In_ HANDLE hProcess, + _In_ REFGUID rguid, + _Out_opt_ DWORD *pcbData, + _In_ PVOID pvRemoteDetoursSection) +{ + if (pcbData) { + *pcbData = 0; + } + + PBYTE pbData = (PBYTE)pvRemoteDetoursSection; + + DETOUR_SECTION_HEADER header; + if (!ReadProcessMemory(hProcess, pbData, &header, sizeof(header), NULL)) { + DETOUR_TRACE(("ReadProcessMemory(dsh@%p..%p) failed: %lu\n", + pbData, + pbData + sizeof(header), + GetLastError())); + return NULL; + } + + if (header.cbHeaderSize < sizeof(DETOUR_SECTION_HEADER) || + header.nSignature != DETOUR_SECTION_HEADER_SIGNATURE) { + SetLastError(ERROR_EXE_MARKED_INVALID); + return NULL; + } + + if (header.nDataOffset == 0) { + header.nDataOffset = header.cbHeaderSize; + } + + for (PVOID pvSection = pbData + header.nDataOffset; pvSection < pbData + header.cbDataSize;) { + DETOUR_SECTION_RECORD section; + if (!ReadProcessMemory(hProcess, pvSection, §ion, sizeof(section), NULL)) { + DETOUR_TRACE(("ReadProcessMemory(dsr@%p..%p) failed: %lu\n", + pvSection, + (PBYTE)pvSection + sizeof(section), + GetLastError())); + return NULL; + } + + if (DetourAreSameGuid(section.guid, rguid)) { + if (pcbData) { + *pcbData = section.cbBytes - sizeof(section); + } + SetLastError(NO_ERROR); + return (DETOUR_SECTION_RECORD *)pvSection + 1; + } + + pvSection = (PBYTE)pvSection + section.cbBytes; + } + + return NULL; +} + +_Success_(return != NULL) +PVOID WINAPI DetourFindRemotePayload(_In_ HANDLE hProcess, + _In_ REFGUID rguid, + _Out_opt_ DWORD *pcbData) +{ + if (hProcess == NULL) { + SetLastError(ERROR_INVALID_HANDLE); + return NULL; + } + + IMAGE_NT_HEADERS32 header; + PVOID pvRemoteHeader; + for (HMODULE hMod = NULL; (hMod = EnumerateModulesInProcess(hProcess, hMod, &header, &pvRemoteHeader)) != NULL;) { + PVOID pvData = FindDetourSectionInRemoteModule(hProcess, hMod, &header, pvRemoteHeader); + if (pvData != NULL) { + pvData = FindPayloadInRemoteDetourSection(hProcess, rguid, pcbData, pvData); + if (pvData != NULL) { + return pvData; + } + } + } + + SetLastError(ERROR_MOD_NOT_FOUND); + return NULL; +} + +////////////////////////////////////////////////////////////////////////////// +// +// Find a region of memory in which we can create a replacement import table. +// +static PBYTE FindAndAllocateNearBase(HANDLE hProcess, PBYTE pbModule, PBYTE pbBase, DWORD cbAlloc) +{ + MEMORY_BASIC_INFORMATION mbi; + ZeroMemory(&mbi, sizeof(mbi)); + + PBYTE pbLast = pbBase; + for (;; pbLast = (PBYTE)mbi.BaseAddress + mbi.RegionSize) { + + ZeroMemory(&mbi, sizeof(mbi)); + if (VirtualQueryEx(hProcess, (PVOID)pbLast, &mbi, sizeof(mbi)) == 0) { + if (GetLastError() == ERROR_INVALID_PARAMETER) { + break; + } + DETOUR_TRACE(("VirtualQueryEx(%p) failed: %lu\n", + pbLast, GetLastError())); + break; + } + // Usermode address space has such an unaligned region size always at the + // end and only at the end. + // + if ((mbi.RegionSize & 0xfff) == 0xfff) { + break; + } + + // Skip anything other than a pure free region. + // + if (mbi.State != MEM_FREE) { + continue; + } + + // Use the max of mbi.BaseAddress and pbBase, in case mbi.BaseAddress < pbBase. + PBYTE pbAddress = (PBYTE)mbi.BaseAddress > pbBase ? (PBYTE)mbi.BaseAddress : pbBase; + + // Round pbAddress up to the nearest MM allocation boundary. + const DWORD_PTR mmGranularityMinusOne = (DWORD_PTR)(MM_ALLOCATION_GRANULARITY -1); + pbAddress = (PBYTE)(((DWORD_PTR)pbAddress + mmGranularityMinusOne) & ~mmGranularityMinusOne); + +#ifdef _WIN64 + // The offset from pbModule to any replacement import must fit into 32 bits. + // For simplicity, we check that the offset to the last byte fits into 32 bits, + // instead of the largest offset we'll actually use. The values are very similar. + const size_t GB4 = ((((size_t)1) << 32) - 1); + if ((size_t)(pbAddress + cbAlloc - 1 - pbModule) > GB4) { + DETOUR_TRACE(("FindAndAllocateNearBase(1) failing due to distance >4GB %p\n", pbAddress)); + return NULL; + } +#else + UNREFERENCED_PARAMETER(pbModule); +#endif + + DETOUR_TRACE(("Free region %p..%p\n", + mbi.BaseAddress, + (PBYTE)mbi.BaseAddress + mbi.RegionSize)); + + for (; pbAddress < (PBYTE)mbi.BaseAddress + mbi.RegionSize; pbAddress += MM_ALLOCATION_GRANULARITY) { + PBYTE pbAlloc = (PBYTE)VirtualAllocEx(hProcess, pbAddress, cbAlloc, + MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + if (pbAlloc == NULL) { + DETOUR_TRACE(("VirtualAllocEx(%p) failed: %lu\n", pbAddress, GetLastError())); + continue; + } +#ifdef _WIN64 + // The offset from pbModule to any replacement import must fit into 32 bits. + if ((size_t)(pbAddress + cbAlloc - 1 - pbModule) > GB4) { + DETOUR_TRACE(("FindAndAllocateNearBase(2) failing due to distance >4GB %p\n", pbAddress)); + return NULL; + } +#endif + DETOUR_TRACE(("[%p..%p] Allocated for import table.\n", + pbAlloc, pbAlloc + cbAlloc)); + return pbAlloc; + } + } + return NULL; +} + +static inline DWORD PadToDword(DWORD dw) +{ + return (dw + 3) & ~3u; +} + +static inline DWORD PadToDwordPtr(DWORD dw) +{ + return (dw + 7) & ~7u; +} + +static inline HRESULT ReplaceOptionalSizeA(_Inout_z_count_(cchDest) LPSTR pszDest, + _In_ size_t cchDest, + _In_z_ LPCSTR pszSize) +{ + if (cchDest == 0 || pszDest == NULL || pszSize == NULL || + pszSize[0] == '\0' || pszSize[1] == '\0' || pszSize[2] != '\0') { + + // can not write into empty buffer or with string other than two chars. + return ERROR_INVALID_PARAMETER; + } + + for (; cchDest >= 2; cchDest--, pszDest++) { + if (pszDest[0] == '?' && pszDest[1] == '?') { + pszDest[0] = pszSize[0]; + pszDest[1] = pszSize[1]; + break; + } + } + + return S_OK; +} + +static BOOL RecordExeRestore(HANDLE hProcess, HMODULE hModule, DETOUR_EXE_RESTORE& der) +{ + // Save the various headers for DetourRestoreAfterWith. + ZeroMemory(&der, sizeof(der)); + der.cb = sizeof(der); + + der.pidh = (PBYTE)hModule; + der.cbidh = sizeof(der.idh); + if (!ReadProcessMemory(hProcess, der.pidh, &der.idh, sizeof(der.idh), NULL)) { + DETOUR_TRACE(("ReadProcessMemory(idh@%p..%p) failed: %lu\n", + der.pidh, der.pidh + der.cbidh, GetLastError())); + return FALSE; + } + DETOUR_TRACE(("IDH: %p..%p\n", der.pidh, der.pidh + der.cbidh)); + + // We read the NT header in two passes to get the full size. + // First we read just the Signature and FileHeader. + der.pinh = der.pidh + der.idh.e_lfanew; + der.cbinh = FIELD_OFFSET(IMAGE_NT_HEADERS, OptionalHeader); + if (!ReadProcessMemory(hProcess, der.pinh, &der.inh, der.cbinh, NULL)) { + DETOUR_TRACE(("ReadProcessMemory(inh@%p..%p) failed: %lu\n", + der.pinh, der.pinh + der.cbinh, GetLastError())); + return FALSE; + } + + // Second we read the OptionalHeader and Section headers. + der.cbinh = (FIELD_OFFSET(IMAGE_NT_HEADERS, OptionalHeader) + + der.inh.FileHeader.SizeOfOptionalHeader + + der.inh.FileHeader.NumberOfSections * sizeof(IMAGE_SECTION_HEADER)); + + if (der.cbinh > sizeof(der.raw)) { + return FALSE; + } + + if (!ReadProcessMemory(hProcess, der.pinh, &der.inh, der.cbinh, NULL)) { + DETOUR_TRACE(("ReadProcessMemory(inh@%p..%p) failed: %lu\n", + der.pinh, der.pinh + der.cbinh, GetLastError())); + return FALSE; + } + DETOUR_TRACE(("INH: %p..%p\n", der.pinh, der.pinh + der.cbinh)); + + // Third, we read the CLR header + + if (der.inh.OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC) { + if (der.inh32.CLR_DIRECTORY.VirtualAddress != 0 && + der.inh32.CLR_DIRECTORY.Size != 0) { + + DETOUR_TRACE(("CLR32.VirtAddr=%08lx, CLR.Size=%lu\n", + der.inh32.CLR_DIRECTORY.VirtualAddress, + der.inh32.CLR_DIRECTORY.Size)); + + der.pclr = ((PBYTE)hModule) + der.inh32.CLR_DIRECTORY.VirtualAddress; + } + } + else if (der.inh.OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR64_MAGIC) { + if (der.inh64.CLR_DIRECTORY.VirtualAddress != 0 && + der.inh64.CLR_DIRECTORY.Size != 0) { + + DETOUR_TRACE(("CLR64.VirtAddr=%08lx, CLR.Size=%lu\n", + der.inh64.CLR_DIRECTORY.VirtualAddress, + der.inh64.CLR_DIRECTORY.Size)); + + der.pclr = ((PBYTE)hModule) + der.inh64.CLR_DIRECTORY.VirtualAddress; + } + } + + if (der.pclr != 0) { + der.cbclr = sizeof(der.clr); + if (!ReadProcessMemory(hProcess, der.pclr, &der.clr, der.cbclr, NULL)) { + DETOUR_TRACE(("ReadProcessMemory(clr@%p..%p) failed: %lu\n", + der.pclr, der.pclr + der.cbclr, GetLastError())); + return FALSE; + } + DETOUR_TRACE(("CLR: %p..%p\n", der.pclr, der.pclr + der.cbclr)); + } + + return TRUE; +} + +////////////////////////////////////////////////////////////////////////////// +// +#if DETOURS_32BIT +#define DWORD_XX DWORD32 +#define IMAGE_NT_HEADERS_XX IMAGE_NT_HEADERS32 +#define IMAGE_NT_OPTIONAL_HDR_MAGIC_XX IMAGE_NT_OPTIONAL_HDR32_MAGIC +#define IMAGE_ORDINAL_FLAG_XX IMAGE_ORDINAL_FLAG32 +#define IMAGE_THUNK_DATAXX IMAGE_THUNK_DATA32 +#define UPDATE_IMPORTS_XX UpdateImports32 +#define DETOURS_BITS_XX 32 +#include "uimports.cpp" +#undef DETOUR_EXE_RESTORE_FIELD_XX +#undef DWORD_XX +#undef IMAGE_NT_HEADERS_XX +#undef IMAGE_NT_OPTIONAL_HDR_MAGIC_XX +#undef IMAGE_ORDINAL_FLAG_XX +#undef UPDATE_IMPORTS_XX +#endif // DETOURS_32BIT + +#if DETOURS_64BIT +#define DWORD_XX DWORD64 +#define IMAGE_NT_HEADERS_XX IMAGE_NT_HEADERS64 +#define IMAGE_NT_OPTIONAL_HDR_MAGIC_XX IMAGE_NT_OPTIONAL_HDR64_MAGIC +#define IMAGE_ORDINAL_FLAG_XX IMAGE_ORDINAL_FLAG64 +#define IMAGE_THUNK_DATAXX IMAGE_THUNK_DATA64 +#define UPDATE_IMPORTS_XX UpdateImports64 +#define DETOURS_BITS_XX 64 +#include "uimports.cpp" +#undef DETOUR_EXE_RESTORE_FIELD_XX +#undef DWORD_XX +#undef IMAGE_NT_HEADERS_XX +#undef IMAGE_NT_OPTIONAL_HDR_MAGIC_XX +#undef IMAGE_ORDINAL_FLAG_XX +#undef UPDATE_IMPORTS_XX +#endif // DETOURS_64BIT + +////////////////////////////////////////////////////////////////////////////// +// +#if DETOURS_64BIT + +C_ASSERT(sizeof(IMAGE_NT_HEADERS64) == sizeof(IMAGE_NT_HEADERS32) + 16); + +static BOOL UpdateFrom32To64(HANDLE hProcess, HMODULE hModule, WORD machine, + DETOUR_EXE_RESTORE& der) +{ + IMAGE_DOS_HEADER idh; + IMAGE_NT_HEADERS32 inh32; + IMAGE_NT_HEADERS64 inh64; + IMAGE_SECTION_HEADER sects[32]; + PBYTE pbModule = (PBYTE)hModule; + DWORD n; + + ZeroMemory(&inh32, sizeof(inh32)); + ZeroMemory(&inh64, sizeof(inh64)); + ZeroMemory(sects, sizeof(sects)); + + DETOUR_TRACE(("UpdateFrom32To64(%04x)\n", machine)); + //////////////////////////////////////////////////////// Read old headers. + // + if (!ReadProcessMemory(hProcess, pbModule, &idh, sizeof(idh), NULL)) { + DETOUR_TRACE(("ReadProcessMemory(idh@%p..%p) failed: %lu\n", + pbModule, pbModule + sizeof(idh), GetLastError())); + return FALSE; + } + DETOUR_TRACE(("ReadProcessMemory(idh@%p..%p)\n", + pbModule, pbModule + sizeof(idh))); + + PBYTE pnh = pbModule + idh.e_lfanew; + if (!ReadProcessMemory(hProcess, pnh, &inh32, sizeof(inh32), NULL)) { + DETOUR_TRACE(("ReadProcessMemory(inh@%p..%p) failed: %lu\n", + pnh, pnh + sizeof(inh32), GetLastError())); + return FALSE; + } + DETOUR_TRACE(("ReadProcessMemory(inh@%p..%p)\n", pnh, pnh + sizeof(inh32))); + + if (inh32.FileHeader.NumberOfSections > (sizeof(sects)/sizeof(sects[0]))) { + return FALSE; + } + + PBYTE psects = pnh + + FIELD_OFFSET(IMAGE_NT_HEADERS, OptionalHeader) + + inh32.FileHeader.SizeOfOptionalHeader; + ULONG cb = inh32.FileHeader.NumberOfSections * sizeof(IMAGE_SECTION_HEADER); + if (!ReadProcessMemory(hProcess, psects, §s, cb, NULL)) { + DETOUR_TRACE(("ReadProcessMemory(ish@%p..%p) failed: %lu\n", + psects, psects + cb, GetLastError())); + return FALSE; + } + DETOUR_TRACE(("ReadProcessMemory(ish@%p..%p)\n", psects, psects + cb)); + + ////////////////////////////////////////////////////////// Convert header. + // + inh64.Signature = inh32.Signature; + inh64.FileHeader = inh32.FileHeader; + inh64.FileHeader.Machine = machine; + inh64.FileHeader.SizeOfOptionalHeader = sizeof(IMAGE_OPTIONAL_HEADER64); + + inh64.OptionalHeader.Magic = IMAGE_NT_OPTIONAL_HDR64_MAGIC; + inh64.OptionalHeader.MajorLinkerVersion = inh32.OptionalHeader.MajorLinkerVersion; + inh64.OptionalHeader.MinorLinkerVersion = inh32.OptionalHeader.MinorLinkerVersion; + inh64.OptionalHeader.SizeOfCode = inh32.OptionalHeader.SizeOfCode; + inh64.OptionalHeader.SizeOfInitializedData = inh32.OptionalHeader.SizeOfInitializedData; + inh64.OptionalHeader.SizeOfUninitializedData = inh32.OptionalHeader.SizeOfUninitializedData; + inh64.OptionalHeader.AddressOfEntryPoint = inh32.OptionalHeader.AddressOfEntryPoint; + inh64.OptionalHeader.BaseOfCode = inh32.OptionalHeader.BaseOfCode; + inh64.OptionalHeader.ImageBase = inh32.OptionalHeader.ImageBase; + inh64.OptionalHeader.SectionAlignment = inh32.OptionalHeader.SectionAlignment; + inh64.OptionalHeader.FileAlignment = inh32.OptionalHeader.FileAlignment; + inh64.OptionalHeader.MajorOperatingSystemVersion + = inh32.OptionalHeader.MajorOperatingSystemVersion; + inh64.OptionalHeader.MinorOperatingSystemVersion + = inh32.OptionalHeader.MinorOperatingSystemVersion; + inh64.OptionalHeader.MajorImageVersion = inh32.OptionalHeader.MajorImageVersion; + inh64.OptionalHeader.MinorImageVersion = inh32.OptionalHeader.MinorImageVersion; + inh64.OptionalHeader.MajorSubsystemVersion = inh32.OptionalHeader.MajorSubsystemVersion; + inh64.OptionalHeader.MinorSubsystemVersion = inh32.OptionalHeader.MinorSubsystemVersion; + inh64.OptionalHeader.Win32VersionValue = inh32.OptionalHeader.Win32VersionValue; + inh64.OptionalHeader.SizeOfImage = inh32.OptionalHeader.SizeOfImage; + inh64.OptionalHeader.SizeOfHeaders = inh32.OptionalHeader.SizeOfHeaders; + inh64.OptionalHeader.CheckSum = inh32.OptionalHeader.CheckSum; + inh64.OptionalHeader.Subsystem = inh32.OptionalHeader.Subsystem; + inh64.OptionalHeader.DllCharacteristics = inh32.OptionalHeader.DllCharacteristics; + inh64.OptionalHeader.SizeOfStackReserve = inh32.OptionalHeader.SizeOfStackReserve; + inh64.OptionalHeader.SizeOfStackCommit = inh32.OptionalHeader.SizeOfStackCommit; + inh64.OptionalHeader.SizeOfHeapReserve = inh32.OptionalHeader.SizeOfHeapReserve; + inh64.OptionalHeader.SizeOfHeapCommit = inh32.OptionalHeader.SizeOfHeapCommit; + inh64.OptionalHeader.LoaderFlags = inh32.OptionalHeader.LoaderFlags; + inh64.OptionalHeader.NumberOfRvaAndSizes = inh32.OptionalHeader.NumberOfRvaAndSizes; + for (n = 0; n < IMAGE_NUMBEROF_DIRECTORY_ENTRIES; n++) { + inh64.OptionalHeader.DataDirectory[n] = inh32.OptionalHeader.DataDirectory[n]; + } + + /////////////////////////////////////////////////////// Write new headers. + // + DWORD dwProtect = 0; + if (!DetourVirtualProtectSameExecuteEx(hProcess, pbModule, inh64.OptionalHeader.SizeOfHeaders, + PAGE_EXECUTE_READWRITE, &dwProtect)) { + return FALSE; + } + + if (!WriteProcessMemory(hProcess, pnh, &inh64, sizeof(inh64), NULL)) { + DETOUR_TRACE(("WriteProcessMemory(inh@%p..%p) failed: %lu\n", + pnh, pnh + sizeof(inh64), GetLastError())); + return FALSE; + } + DETOUR_TRACE(("WriteProcessMemory(inh@%p..%p)\n", pnh, pnh + sizeof(inh64))); + + psects = pnh + + FIELD_OFFSET(IMAGE_NT_HEADERS, OptionalHeader) + + inh64.FileHeader.SizeOfOptionalHeader; + cb = inh64.FileHeader.NumberOfSections * sizeof(IMAGE_SECTION_HEADER); + if (!WriteProcessMemory(hProcess, psects, §s, cb, NULL)) { + DETOUR_TRACE(("WriteProcessMemory(ish@%p..%p) failed: %lu\n", + psects, psects + cb, GetLastError())); + return FALSE; + } + DETOUR_TRACE(("WriteProcessMemory(ish@%p..%p)\n", psects, psects + cb)); + + // Record the updated headers. + if (!RecordExeRestore(hProcess, hModule, der)) { + return FALSE; + } + + // Remove the import table. + if (der.pclr != NULL && (der.clr.Flags & COMIMAGE_FLAGS_ILONLY)) { + inh64.IMPORT_DIRECTORY.VirtualAddress = 0; + inh64.IMPORT_DIRECTORY.Size = 0; + + if (!WriteProcessMemory(hProcess, pnh, &inh64, sizeof(inh64), NULL)) { + DETOUR_TRACE(("WriteProcessMemory(inh@%p..%p) failed: %lu\n", + pnh, pnh + sizeof(inh64), GetLastError())); + return FALSE; + } + } + + DWORD dwOld = 0; + if (!VirtualProtectEx(hProcess, pbModule, inh64.OptionalHeader.SizeOfHeaders, + dwProtect, &dwOld)) { + return FALSE; + } + + return TRUE; +} +#endif // DETOURS_64BIT + +typedef BOOL(WINAPI *LPFN_ISWOW64PROCESS)(HANDLE, PBOOL); + +static BOOL IsWow64ProcessHelper(HANDLE hProcess, + PBOOL Wow64Process) +{ +#ifdef _X86_ + if (Wow64Process == NULL) { + return FALSE; + } + + // IsWow64Process is not available on all supported versions of Windows. + // + HMODULE hKernel32 = LoadLibraryW(L"KERNEL32.DLL"); + if (hKernel32 == NULL) { + DETOUR_TRACE(("LoadLibraryW failed: %lu\n", GetLastError())); + return FALSE; + } + + LPFN_ISWOW64PROCESS pfnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress( + hKernel32, "IsWow64Process"); + + if (pfnIsWow64Process == NULL) { + DETOUR_TRACE(("GetProcAddress failed: %lu\n", GetLastError())); + return FALSE; + } + return pfnIsWow64Process(hProcess, Wow64Process); +#else + return IsWow64Process(hProcess, Wow64Process); +#endif +} + +////////////////////////////////////////////////////////////////////////////// +// +BOOL WINAPI DetourUpdateProcessWithDll(_In_ HANDLE hProcess, + _In_reads_(nDlls) LPCSTR *rlpDlls, + _In_ DWORD nDlls) +{ + // Find the next memory region that contains a mapped PE image. + // + BOOL bIs32BitProcess; + BOOL bIs64BitOS = FALSE; + HMODULE hModule = NULL; + HMODULE hLast = NULL; + + DETOUR_TRACE(("DetourUpdateProcessWithDll(%p,dlls=%lu)\n", hProcess, nDlls)); + + for (;;) { + IMAGE_NT_HEADERS32 inh; + + if ((hLast = EnumerateModulesInProcess(hProcess, hLast, &inh, NULL)) == NULL) { + break; + } + + DETOUR_TRACE(("%p machine=%04x magic=%04x\n", + hLast, inh.FileHeader.Machine, inh.OptionalHeader.Magic)); + + if ((inh.FileHeader.Characteristics & IMAGE_FILE_DLL) == 0) { + hModule = hLast; + DETOUR_TRACE(("%p Found EXE\n", hLast)); + } + } + + if (hModule == NULL) { + SetLastError(ERROR_INVALID_OPERATION); + return FALSE; + } + + // Determine if the target process is 32bit or 64bit. This is a two-stop process: + // + // 1. First, determine if we're running on a 64bit operating system. + // - If we're running 64bit code (i.e. _WIN64 is defined), this is trivially true. + // - If we're running 32bit code (i.e. _WIN64 is not defined), test if + // we're running under Wow64. If so, it implies that the operating system + // is 64bit. + // +#ifdef _WIN64 + bIs64BitOS = TRUE; +#else + if (!IsWow64ProcessHelper(GetCurrentProcess(), &bIs64BitOS)) { + return FALSE; + } +#endif + + // 2. With the operating system bitness known, we can now consider the target process: + // - If we're running on a 64bit OS, the target process is 32bit in case + // it is running under Wow64. Otherwise, it's 64bit, running natively + // (without Wow64). + // - If we're running on a 32bit OS, the target process must be 32bit, too. + // + if (bIs64BitOS) { + if (!IsWow64ProcessHelper(hProcess, &bIs32BitProcess)) { + return FALSE; + } + } else { + bIs32BitProcess = TRUE; + } + + DETOUR_TRACE((" 32BitProcess=%d\n", bIs32BitProcess)); + + return DetourUpdateProcessWithDllEx(hProcess, + hModule, + bIs32BitProcess, + rlpDlls, + nDlls); +} + +BOOL WINAPI DetourUpdateProcessWithDllEx(_In_ HANDLE hProcess, + _In_ HMODULE hModule, + _In_ BOOL bIs32BitProcess, + _In_reads_(nDlls) LPCSTR *rlpDlls, + _In_ DWORD nDlls) +{ + // Find the next memory region that contains a mapped PE image. + // + BOOL bIs32BitExe = FALSE; + + DETOUR_TRACE(("DetourUpdateProcessWithDllEx(%p,%p,dlls=%lu)\n", hProcess, hModule, nDlls)); + + IMAGE_NT_HEADERS32 inh; + + if (hModule == NULL || !LoadNtHeaderFromProcess(hProcess, hModule, &inh)) { + SetLastError(ERROR_INVALID_OPERATION); + return FALSE; + } + + if (inh.OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC + && inh.FileHeader.Machine != 0) { + + bIs32BitExe = TRUE; + } + + DETOUR_TRACE((" 32BitExe=%d\n", bIs32BitExe)); + + if (hModule == NULL) { + SetLastError(ERROR_INVALID_OPERATION); + return FALSE; + } + + // Save the various headers for DetourRestoreAfterWith. + // + DETOUR_EXE_RESTORE der; + + if (!RecordExeRestore(hProcess, hModule, der)) { + return FALSE; + } + +#if defined(DETOURS_64BIT) + // Try to convert a neutral 32-bit managed binary to a 64-bit managed binary. + if (bIs32BitExe && !bIs32BitProcess) { + if (!der.pclr // Native binary + || (der.clr.Flags & COMIMAGE_FLAGS_ILONLY) == 0 // Or mixed-mode MSIL + || (der.clr.Flags & COMIMAGE_FLAGS_32BITREQUIRED) != 0) { // Or 32BIT Required MSIL + + SetLastError(ERROR_INVALID_HANDLE); + return FALSE; + } + + if (!UpdateFrom32To64(hProcess, hModule, +#if defined(DETOURS_X64) + IMAGE_FILE_MACHINE_AMD64, +#elif defined(DETOURS_IA64) + IMAGE_FILE_MACHINE_IA64, +#elif defined(DETOURS_ARM64) + IMAGE_FILE_MACHINE_ARM64, +#else +#error Must define one of DETOURS_X64 or DETOURS_IA64 or DETOURS_ARM64 on 64-bit. +#endif + der)) { + return FALSE; + } + bIs32BitExe = FALSE; + } +#endif // DETOURS_64BIT + + // Now decide if we can insert the detour. + +#if defined(DETOURS_32BIT) + if (bIs32BitProcess) { + // 32-bit native or 32-bit managed process on any platform. + if (!UpdateImports32(hProcess, hModule, rlpDlls, nDlls)) { + return FALSE; + } + } + else { + // 64-bit native or 64-bit managed process. + // + // Can't detour a 64-bit process with 32-bit code. + // Note: This happens for 32-bit PE binaries containing only + // manage code that have been marked as 64-bit ready. + // + SetLastError(ERROR_INVALID_HANDLE); + return FALSE; + } +#elif defined(DETOURS_64BIT) + if (bIs32BitProcess || bIs32BitExe) { + // Can't detour a 32-bit process with 64-bit code. + SetLastError(ERROR_INVALID_HANDLE); + return FALSE; + } + else { + // 64-bit native or 64-bit managed process on any platform. + if (!UpdateImports64(hProcess, hModule, rlpDlls, nDlls)) { + return FALSE; + } + } +#else +#pragma Must define one of DETOURS_32BIT or DETOURS_64BIT. +#endif // DETOURS_64BIT + + /////////////////////////////////////////////////// Update the CLR header. + // + if (der.pclr != NULL) { + DETOUR_CLR_HEADER clr; + CopyMemory(&clr, &der.clr, sizeof(clr)); + clr.Flags &= ~COMIMAGE_FLAGS_ILONLY; // Clear the IL_ONLY flag. + + DWORD dwProtect; + if (!DetourVirtualProtectSameExecuteEx(hProcess, der.pclr, sizeof(clr), PAGE_READWRITE, &dwProtect)) { + DETOUR_TRACE(("VirtualProtectEx(clr) write failed: %lu\n", GetLastError())); + return FALSE; + } + + if (!WriteProcessMemory(hProcess, der.pclr, &clr, sizeof(clr), NULL)) { + DETOUR_TRACE(("WriteProcessMemory(clr) failed: %lu\n", GetLastError())); + return FALSE; + } + + if (!VirtualProtectEx(hProcess, der.pclr, sizeof(clr), dwProtect, &dwProtect)) { + DETOUR_TRACE(("VirtualProtectEx(clr) restore failed: %lu\n", GetLastError())); + return FALSE; + } + DETOUR_TRACE(("CLR: %p..%p\n", der.pclr, der.pclr + der.cbclr)); + +#if DETOURS_64BIT + if (der.clr.Flags & COMIMAGE_FLAGS_32BITREQUIRED) { // Is the 32BIT Required Flag set? + // X64 never gets here because the process appears as a WOW64 process. + // However, on IA64, it doesn't appear to be a WOW process. + DETOUR_TRACE(("CLR Requires 32-bit\n")); + SetLastError(ERROR_INVALID_HANDLE); + return FALSE; + } +#endif // DETOURS_64BIT + } + + //////////////////////////////// Save the undo data to the target process. + // + if (!DetourCopyPayloadToProcess(hProcess, DETOUR_EXE_RESTORE_GUID, &der, sizeof(der))) { + DETOUR_TRACE(("DetourCopyPayloadToProcess failed: %lu\n", GetLastError())); + return FALSE; + } + return TRUE; +} + +////////////////////////////////////////////////////////////////////////////// +// +BOOL WINAPI DetourCreateProcessWithDllA(_In_opt_ LPCSTR lpApplicationName, + _Inout_opt_ LPSTR lpCommandLine, + _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes, + _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes, + _In_ BOOL bInheritHandles, + _In_ DWORD dwCreationFlags, + _In_opt_ LPVOID lpEnvironment, + _In_opt_ LPCSTR lpCurrentDirectory, + _In_ LPSTARTUPINFOA lpStartupInfo, + _Out_ LPPROCESS_INFORMATION lpProcessInformation, + _In_ LPCSTR lpDllName, + _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEA pfCreateProcessA) +{ + DWORD dwMyCreationFlags = (dwCreationFlags | CREATE_SUSPENDED); + PROCESS_INFORMATION pi; + BOOL fResult = FALSE; + + if (pfCreateProcessA == NULL) { + pfCreateProcessA = CreateProcessA; + } + + fResult = pfCreateProcessA(lpApplicationName, + lpCommandLine, + lpProcessAttributes, + lpThreadAttributes, + bInheritHandles, + dwMyCreationFlags, + lpEnvironment, + lpCurrentDirectory, + lpStartupInfo, + &pi); + + if (lpProcessInformation != NULL) { + CopyMemory(lpProcessInformation, &pi, sizeof(pi)); + } + + if (!fResult) { + return FALSE; + } + + LPCSTR rlpDlls[2]; + DWORD nDlls = 0; + if (lpDllName != NULL) { + rlpDlls[nDlls++] = lpDllName; + } + + if (!DetourUpdateProcessWithDll(pi.hProcess, rlpDlls, nDlls)) { + TerminateProcess(pi.hProcess, ~0u); + return FALSE; + } + + if (!(dwCreationFlags & CREATE_SUSPENDED)) { + ResumeThread(pi.hThread); + } + return TRUE; +} + + +BOOL WINAPI DetourCreateProcessWithDllW(_In_opt_ LPCWSTR lpApplicationName, + _Inout_opt_ LPWSTR lpCommandLine, + _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes, + _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes, + _In_ BOOL bInheritHandles, + _In_ DWORD dwCreationFlags, + _In_opt_ LPVOID lpEnvironment, + _In_opt_ LPCWSTR lpCurrentDirectory, + _In_ LPSTARTUPINFOW lpStartupInfo, + _Out_ LPPROCESS_INFORMATION lpProcessInformation, + _In_ LPCSTR lpDllName, + _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEW pfCreateProcessW) +{ + DWORD dwMyCreationFlags = (dwCreationFlags | CREATE_SUSPENDED); + PROCESS_INFORMATION pi; + + if (pfCreateProcessW == NULL) { + pfCreateProcessW = CreateProcessW; + } + + BOOL fResult = pfCreateProcessW(lpApplicationName, + lpCommandLine, + lpProcessAttributes, + lpThreadAttributes, + bInheritHandles, + dwMyCreationFlags, + lpEnvironment, + lpCurrentDirectory, + lpStartupInfo, + &pi); + + if (lpProcessInformation) { + CopyMemory(lpProcessInformation, &pi, sizeof(pi)); + } + + if (!fResult) { + return FALSE; + } + + LPCSTR rlpDlls[2]; + DWORD nDlls = 0; + if (lpDllName != NULL) { + rlpDlls[nDlls++] = lpDllName; + } + + if (!DetourUpdateProcessWithDll(pi.hProcess, rlpDlls, nDlls)) { + TerminateProcess(pi.hProcess, ~0u); + return FALSE; + } + + if (!(dwCreationFlags & CREATE_SUSPENDED)) { + ResumeThread(pi.hThread); + } + return TRUE; +} + +BOOL WINAPI DetourCopyPayloadToProcess(_In_ HANDLE hProcess, + _In_ REFGUID rguid, + _In_reads_bytes_(cbData) LPCVOID pvData, + _In_ DWORD cbData) +{ + return DetourCopyPayloadToProcessEx(hProcess, rguid, pvData, cbData) != NULL; +} + +_Success_(return != NULL) +PVOID WINAPI DetourCopyPayloadToProcessEx(_In_ HANDLE hProcess, + _In_ REFGUID rguid, + _In_reads_bytes_(cbData) LPCVOID pvData, + _In_ DWORD cbData) +{ + if (hProcess == NULL) { + SetLastError(ERROR_INVALID_HANDLE); + return NULL; + } + + DWORD cbTotal = (sizeof(IMAGE_DOS_HEADER) + + sizeof(IMAGE_NT_HEADERS) + + sizeof(IMAGE_SECTION_HEADER) + + sizeof(DETOUR_SECTION_HEADER) + + sizeof(DETOUR_SECTION_RECORD) + + cbData); + + PBYTE pbBase = (PBYTE)VirtualAllocEx(hProcess, NULL, cbTotal, + MEM_COMMIT, PAGE_READWRITE); + if (pbBase == NULL) { + DETOUR_TRACE(("VirtualAllocEx(%lu) failed: %lu\n", cbTotal, GetLastError())); + return NULL; + } + + // As you can see in the following code, + // the memory layout of the payload range "[pbBase, pbBase+cbTotal]" is a PE executable file, + // so DetourFreePayload can use "DetourGetContainingModule(Payload pointer)" to get the above "pbBase" pointer, + // pbBase: the memory block allocated by VirtualAllocEx will be released in DetourFreePayload by VirtualFree. + + PBYTE pbTarget = pbBase; + IMAGE_DOS_HEADER idh; + IMAGE_NT_HEADERS inh; + IMAGE_SECTION_HEADER ish; + DETOUR_SECTION_HEADER dsh; + DETOUR_SECTION_RECORD dsr; + SIZE_T cbWrote = 0; + + ZeroMemory(&idh, sizeof(idh)); + idh.e_magic = IMAGE_DOS_SIGNATURE; + idh.e_lfanew = sizeof(idh); + if (!WriteProcessMemory(hProcess, pbTarget, &idh, sizeof(idh), &cbWrote) || + cbWrote != sizeof(idh)) { + DETOUR_TRACE(("WriteProcessMemory(idh) failed: %lu\n", GetLastError())); + return NULL; + } + pbTarget += sizeof(idh); + + ZeroMemory(&inh, sizeof(inh)); + inh.Signature = IMAGE_NT_SIGNATURE; + inh.FileHeader.SizeOfOptionalHeader = sizeof(inh.OptionalHeader); + inh.FileHeader.Characteristics = IMAGE_FILE_DLL; + inh.FileHeader.NumberOfSections = 1; + inh.OptionalHeader.Magic = IMAGE_NT_OPTIONAL_HDR_MAGIC; + if (!WriteProcessMemory(hProcess, pbTarget, &inh, sizeof(inh), &cbWrote) || + cbWrote != sizeof(inh)) { + return NULL; + } + pbTarget += sizeof(inh); + + ZeroMemory(&ish, sizeof(ish)); + memcpy(ish.Name, ".detour", sizeof(ish.Name)); + ish.VirtualAddress = (DWORD)((pbTarget + sizeof(ish)) - pbBase); + ish.SizeOfRawData = (sizeof(DETOUR_SECTION_HEADER) + + sizeof(DETOUR_SECTION_RECORD) + + cbData); + if (!WriteProcessMemory(hProcess, pbTarget, &ish, sizeof(ish), &cbWrote) || + cbWrote != sizeof(ish)) { + return NULL; + } + pbTarget += sizeof(ish); + + ZeroMemory(&dsh, sizeof(dsh)); + dsh.cbHeaderSize = sizeof(dsh); + dsh.nSignature = DETOUR_SECTION_HEADER_SIGNATURE; + dsh.nDataOffset = sizeof(DETOUR_SECTION_HEADER); + dsh.cbDataSize = (sizeof(DETOUR_SECTION_HEADER) + + sizeof(DETOUR_SECTION_RECORD) + + cbData); + if (!WriteProcessMemory(hProcess, pbTarget, &dsh, sizeof(dsh), &cbWrote) || + cbWrote != sizeof(dsh)) { + return NULL; + } + pbTarget += sizeof(dsh); + + ZeroMemory(&dsr, sizeof(dsr)); + dsr.cbBytes = cbData + sizeof(DETOUR_SECTION_RECORD); + dsr.nReserved = 0; + dsr.guid = rguid; + if (!WriteProcessMemory(hProcess, pbTarget, &dsr, sizeof(dsr), &cbWrote) || + cbWrote != sizeof(dsr)) { + return NULL; + } + pbTarget += sizeof(dsr); + + if (!WriteProcessMemory(hProcess, pbTarget, pvData, cbData, &cbWrote) || + cbWrote != cbData) { + return NULL; + } + + DETOUR_TRACE(("Copied %lu byte payload into target process at %p\n", + cbData, pbTarget)); + + SetLastError(NO_ERROR); + return pbTarget; +} + +static BOOL s_fSearchedForHelper = FALSE; +static PDETOUR_EXE_HELPER s_pHelper = NULL; + +VOID CALLBACK DetourFinishHelperProcess(_In_ HWND, + _In_ HINSTANCE, + _In_ LPSTR, + _In_ INT) +{ + LPCSTR * rlpDlls = NULL; + DWORD Result = 9900; + DWORD cOffset = 0; + DWORD cSize = 0; + HANDLE hProcess = NULL; + + if (s_pHelper == NULL) { + DETOUR_TRACE(("DetourFinishHelperProcess called with s_pHelper = NULL.\n")); + Result = 9905; + goto Cleanup; + } + + hProcess = OpenProcess(PROCESS_ALL_ACCESS, FALSE, s_pHelper->pid); + if (hProcess == NULL) { + DETOUR_TRACE(("OpenProcess(pid=%lu) failed: %lu\n", + s_pHelper->pid, GetLastError())); + Result = 9901; + goto Cleanup; + } + + rlpDlls = new NOTHROW LPCSTR [s_pHelper->nDlls]; + cSize = s_pHelper->cb - sizeof(DETOUR_EXE_HELPER); + for (DWORD n = 0; n < s_pHelper->nDlls; n++) { + size_t cchDest = 0; + HRESULT hr = StringCchLengthA(&s_pHelper->rDlls[cOffset], cSize - cOffset, &cchDest); + if (!SUCCEEDED(hr)) { + Result = 9902; + goto Cleanup; + } + + rlpDlls[n] = &s_pHelper->rDlls[cOffset]; + cOffset += (DWORD)cchDest + 1; + } + + if (!DetourUpdateProcessWithDll(hProcess, rlpDlls, s_pHelper->nDlls)) { + DETOUR_TRACE(("DetourUpdateProcessWithDll(pid=%lu) failed: %lu\n", + s_pHelper->pid, GetLastError())); + Result = 9903; + goto Cleanup; + } + Result = 0; + + Cleanup: + if (rlpDlls != NULL) { + delete[] rlpDlls; + rlpDlls = NULL; + } + + // Note: s_pHelper is allocated as part of injecting the payload in DetourCopyPayloadToProcess(..), + // it's a fake section and not data allocated by the system PE loader. + + // Delete the payload after execution to release the memory occupied by it + if (s_pHelper != NULL) { + DetourFreePayload(s_pHelper); + s_pHelper = NULL; + } + + ExitProcess(Result); +} + +BOOL WINAPI DetourIsHelperProcess(VOID) +{ + PVOID pvData; + DWORD cbData; + + if (s_fSearchedForHelper) { + return (s_pHelper != NULL); + } + + s_fSearchedForHelper = TRUE; + pvData = DetourFindPayloadEx(DETOUR_EXE_HELPER_GUID, &cbData); + + if (pvData == NULL || cbData < sizeof(DETOUR_EXE_HELPER)) { + return FALSE; + } + + s_pHelper = (PDETOUR_EXE_HELPER)pvData; + if (s_pHelper->cb < sizeof(*s_pHelper)) { + s_pHelper = NULL; + return FALSE; + } + + return TRUE; +} + +static +BOOL WINAPI AllocExeHelper(_Out_ PDETOUR_EXE_HELPER *pHelper, + _In_ DWORD dwTargetPid, + _In_ DWORD nDlls, + _In_reads_(nDlls) LPCSTR *rlpDlls) +{ + PDETOUR_EXE_HELPER Helper = NULL; + BOOL Result = FALSE; + _Field_range_(0, cSize - 4) DWORD cOffset = 0; + DWORD cSize = 4; + + if (pHelper == NULL) { + goto Cleanup; + } + *pHelper = NULL; + + if (nDlls < 1 || nDlls > 4096) { + SetLastError(ERROR_INVALID_PARAMETER); + goto Cleanup; + } + + for (DWORD n = 0; n < nDlls; n++) { + HRESULT hr; + size_t cchDest = 0; + + hr = StringCchLengthA(rlpDlls[n], 4096, &cchDest); + if (!SUCCEEDED(hr)) { + goto Cleanup; + } + + cSize += (DWORD)cchDest + 1; + } + + Helper = (PDETOUR_EXE_HELPER) new NOTHROW BYTE[sizeof(DETOUR_EXE_HELPER) + cSize]; + if (Helper == NULL) { + goto Cleanup; + } + + Helper->cb = sizeof(DETOUR_EXE_HELPER) + cSize; + Helper->pid = dwTargetPid; + Helper->nDlls = nDlls; + + for (DWORD n = 0; n < nDlls; n++) { + HRESULT hr; + size_t cchDest = 0; + + if (cOffset > 0x10000 || cSize > 0x10000 || cOffset + 2 >= cSize) { + goto Cleanup; + } + + if (cOffset + 2 >= cSize || cOffset + 65536 < cSize) { + goto Cleanup; + } + + _Analysis_assume_(cOffset + 1 < cSize); + _Analysis_assume_(cOffset < 0x10000); + _Analysis_assume_(cSize < 0x10000); + + PCHAR psz = &Helper->rDlls[cOffset]; + + hr = StringCchCopyA(psz, cSize - cOffset, rlpDlls[n]); + if (!SUCCEEDED(hr)) { + goto Cleanup; + } + +// REVIEW 28020 The expression '1<=_Param_(2)& &_Param_(2)<=2147483647' is not true at this call. +// REVIEW 28313 Analysis will not proceed past this point because of annotation evaluation. The annotation expression *_Param_(3)<_Param_(2)&&*_Param_(3)<=stringLength$(_Param_(1)) cannot be true under any assumptions at this point in the program. +#pragma warning(suppress:28020 28313) + hr = StringCchLengthA(psz, cSize - cOffset, &cchDest); + if (!SUCCEEDED(hr)) { + goto Cleanup; + } + + // Replace "32." with "64." or "64." with "32." + + for (DWORD c = (DWORD)cchDest + 1; c > 3; c--) { +#if DETOURS_32BIT + if (psz[c - 3] == '3' && psz[c - 2] == '2' && psz[c - 1] == '.') { + psz[c - 3] = '6'; psz[c - 2] = '4'; + break; + } +#else + if (psz[c - 3] == '6' && psz[c - 2] == '4' && psz[c - 1] == '.') { + psz[c - 3] = '3'; psz[c - 2] = '2'; + break; + } +#endif + } + + cOffset += (DWORD)cchDest + 1; + } + + *pHelper = Helper; + Helper = NULL; + Result = TRUE; + + Cleanup: + if (Helper != NULL) { + delete[] (PBYTE)Helper; + Helper = NULL; + } + return Result; +} + +static +VOID WINAPI FreeExeHelper(PDETOUR_EXE_HELPER *pHelper) +{ + if (*pHelper != NULL) { + delete[] (PBYTE)*pHelper; + *pHelper = NULL; + } +} + +BOOL WINAPI DetourProcessViaHelperA(_In_ DWORD dwTargetPid, + _In_ LPCSTR lpDllName, + _In_ PDETOUR_CREATE_PROCESS_ROUTINEA pfCreateProcessA) +{ + return DetourProcessViaHelperDllsA(dwTargetPid, 1, &lpDllName, pfCreateProcessA); +} + + +BOOL WINAPI DetourProcessViaHelperDllsA(_In_ DWORD dwTargetPid, + _In_ DWORD nDlls, + _In_reads_(nDlls) LPCSTR *rlpDlls, + _In_ PDETOUR_CREATE_PROCESS_ROUTINEA pfCreateProcessA) +{ + BOOL Result = FALSE; + PROCESS_INFORMATION pi; + STARTUPINFOA si; + CHAR szExe[MAX_PATH]; + CHAR szCommand[MAX_PATH]; + PDETOUR_EXE_HELPER helper = NULL; + HRESULT hr; + DWORD nLen = GetEnvironmentVariableA("WINDIR", szExe, ARRAYSIZE(szExe)); + + DETOUR_TRACE(("DetourProcessViaHelperDlls(pid=%lu,dlls=%lu)\n", dwTargetPid, nDlls)); + if (nDlls < 1 || nDlls > 4096) { + SetLastError(ERROR_INVALID_PARAMETER); + goto Cleanup; + } + if (!AllocExeHelper(&helper, dwTargetPid, nDlls, rlpDlls)) { + goto Cleanup; + } + + if (nLen == 0 || nLen >= ARRAYSIZE(szExe)) { + goto Cleanup; + } + +#if DETOURS_OPTION_BITS +#if DETOURS_32BIT + hr = StringCchCatA(szExe, ARRAYSIZE(szExe), "\\sysnative\\rundll32.exe"); +#else // !DETOURS_32BIT + hr = StringCchCatA(szExe, ARRAYSIZE(szExe), "\\syswow64\\rundll32.exe"); +#endif // !DETOURS_32BIT +#else // DETOURS_OPTIONS_BITS + hr = StringCchCatA(szExe, ARRAYSIZE(szExe), "\\system32\\rundll32.exe"); +#endif // DETOURS_OPTIONS_BITS + if (!SUCCEEDED(hr)) { + goto Cleanup; + } + + //for East Asia languages and so on, like Chinese, print format with "%hs" can not work fine before user call _tsetlocale(LC_ALL,_T(".ACP")); + //so we can't use "%hs" in format string, because the dll that contain this code would inject to any process, even not call _tsetlocale(LC_ALL,_T(".ACP")) before + hr = StringCchPrintfA(szCommand, ARRAYSIZE(szCommand), + "rundll32.exe \"%s\",#1", &helper->rDlls[0]); + if (!SUCCEEDED(hr)) { + goto Cleanup; + } + + ZeroMemory(&pi, sizeof(pi)); + ZeroMemory(&si, sizeof(si)); + si.cb = sizeof(si); + + DETOUR_TRACE(("DetourProcessViaHelperDlls(\"%hs\", \"%hs\")\n", szExe, szCommand)); + if (pfCreateProcessA(szExe, szCommand, NULL, NULL, FALSE, CREATE_SUSPENDED, + NULL, NULL, &si, &pi)) { + + if (!DetourCopyPayloadToProcess(pi.hProcess, + DETOUR_EXE_HELPER_GUID, + helper, helper->cb)) { + DETOUR_TRACE(("DetourCopyPayloadToProcess failed: %lu\n", GetLastError())); + TerminateProcess(pi.hProcess, ~0u); + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + goto Cleanup; + } + + ResumeThread(pi.hThread); + WaitForSingleObject(pi.hProcess, INFINITE); + + DWORD dwResult = 500; + GetExitCodeProcess(pi.hProcess, &dwResult); + + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + + if (dwResult != 0) { + DETOUR_TRACE(("Rundll32.exe failed: result=%lu\n", dwResult)); + goto Cleanup; + } + Result = TRUE; + } + else { + DETOUR_TRACE(("CreateProcess failed: %lu\n", GetLastError())); + goto Cleanup; + } + + Cleanup: + FreeExeHelper(&helper); + return Result; +} + +BOOL WINAPI DetourProcessViaHelperW(_In_ DWORD dwTargetPid, + _In_ LPCSTR lpDllName, + _In_ PDETOUR_CREATE_PROCESS_ROUTINEW pfCreateProcessW) +{ + return DetourProcessViaHelperDllsW(dwTargetPid, 1, &lpDllName, pfCreateProcessW); +} + +BOOL WINAPI DetourProcessViaHelperDllsW(_In_ DWORD dwTargetPid, + _In_ DWORD nDlls, + _In_reads_(nDlls) LPCSTR *rlpDlls, + _In_ PDETOUR_CREATE_PROCESS_ROUTINEW pfCreateProcessW) +{ + BOOL Result = FALSE; + PROCESS_INFORMATION pi; + STARTUPINFOW si; + WCHAR szExe[MAX_PATH]; + WCHAR szCommand[MAX_PATH]; + PDETOUR_EXE_HELPER helper = NULL; + HRESULT hr; + WCHAR szDllName[MAX_PATH]; + int cchWrittenWideChar; + DWORD nLen = GetEnvironmentVariableW(L"WINDIR", szExe, ARRAYSIZE(szExe)); + + DETOUR_TRACE(("DetourProcessViaHelperDlls(pid=%lu,dlls=%lu)\n", dwTargetPid, nDlls)); + if (nDlls < 1 || nDlls > 4096) { + SetLastError(ERROR_INVALID_PARAMETER); + goto Cleanup; + } + if (!AllocExeHelper(&helper, dwTargetPid, nDlls, rlpDlls)) { + goto Cleanup; + } + + if (nLen == 0 || nLen >= ARRAYSIZE(szExe)) { + goto Cleanup; + } + +#if DETOURS_OPTION_BITS +#if DETOURS_32BIT + hr = StringCchCatW(szExe, ARRAYSIZE(szExe), L"\\sysnative\\rundll32.exe"); +#else // !DETOURS_32BIT + hr = StringCchCatW(szExe, ARRAYSIZE(szExe), L"\\syswow64\\rundll32.exe"); +#endif // !DETOURS_32BIT +#else // DETOURS_OPTIONS_BITS + hr = StringCchCatW(szExe, ARRAYSIZE(szExe), L"\\system32\\rundll32.exe"); +#endif // DETOURS_OPTIONS_BITS + if (!SUCCEEDED(hr)) { + goto Cleanup; + } + + //for East Asia languages and so on, like Chinese, print format with "%hs" can not work fine before user call _tsetlocale(LC_ALL,_T(".ACP")); + //so we can't use "%hs" in format string, because the dll that contain this code would inject to any process, even not call _tsetlocale(LC_ALL,_T(".ACP")) before + + cchWrittenWideChar = MultiByteToWideChar(CP_ACP, 0, &helper->rDlls[0], -1, szDllName, ARRAYSIZE(szDllName)); + if (cchWrittenWideChar >= ARRAYSIZE(szDllName) || cchWrittenWideChar <= 0) { + goto Cleanup; + } + hr = StringCchPrintfW(szCommand, ARRAYSIZE(szCommand), + L"rundll32.exe \"%s\",#1", szDllName); + if (!SUCCEEDED(hr)) { + goto Cleanup; + } + + ZeroMemory(&pi, sizeof(pi)); + ZeroMemory(&si, sizeof(si)); + si.cb = sizeof(si); + + DETOUR_TRACE(("DetourProcessViaHelperDlls(\"%ls\", \"%ls\")\n", szExe, szCommand)); + if (pfCreateProcessW(szExe, szCommand, NULL, NULL, FALSE, CREATE_SUSPENDED, + NULL, NULL, &si, &pi)) { + + if (!DetourCopyPayloadToProcess(pi.hProcess, + DETOUR_EXE_HELPER_GUID, + helper, helper->cb)) { + DETOUR_TRACE(("DetourCopyPayloadToProcess failed: %lu\n", GetLastError())); + TerminateProcess(pi.hProcess, ~0u); + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + goto Cleanup; + } + + ResumeThread(pi.hThread); + WaitForSingleObject(pi.hProcess, INFINITE); + + DWORD dwResult = 500; + GetExitCodeProcess(pi.hProcess, &dwResult); + + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + + if (dwResult != 0) { + DETOUR_TRACE(("Rundll32.exe failed: result=%lu\n", dwResult)); + goto Cleanup; + } + Result = TRUE; + } + else { + DETOUR_TRACE(("CreateProcess failed: %lu\n", GetLastError())); + goto Cleanup; + } + + Cleanup: + FreeExeHelper(&helper); + return Result; +} + +BOOL WINAPI DetourCreateProcessWithDllExA(_In_opt_ LPCSTR lpApplicationName, + _Inout_opt_ LPSTR lpCommandLine, + _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes, + _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes, + _In_ BOOL bInheritHandles, + _In_ DWORD dwCreationFlags, + _In_opt_ LPVOID lpEnvironment, + _In_opt_ LPCSTR lpCurrentDirectory, + _In_ LPSTARTUPINFOA lpStartupInfo, + _Out_ LPPROCESS_INFORMATION lpProcessInformation, + _In_ LPCSTR lpDllName, + _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEA pfCreateProcessA) +{ + if (pfCreateProcessA == NULL) { + pfCreateProcessA = CreateProcessA; + } + + PROCESS_INFORMATION backup; + if (lpProcessInformation == NULL) { + lpProcessInformation = &backup; + ZeroMemory(&backup, sizeof(backup)); + } + + if (!pfCreateProcessA(lpApplicationName, + lpCommandLine, + lpProcessAttributes, + lpThreadAttributes, + bInheritHandles, + dwCreationFlags | CREATE_SUSPENDED, + lpEnvironment, + lpCurrentDirectory, + lpStartupInfo, + lpProcessInformation)) { + return FALSE; + } + + LPCSTR szDll = lpDllName; + + if (!DetourUpdateProcessWithDll(lpProcessInformation->hProcess, &szDll, 1) && + !DetourProcessViaHelperA(lpProcessInformation->dwProcessId, + lpDllName, + pfCreateProcessA)) { + + TerminateProcess(lpProcessInformation->hProcess, ~0u); + CloseHandle(lpProcessInformation->hProcess); + CloseHandle(lpProcessInformation->hThread); + return FALSE; + } + + if (!(dwCreationFlags & CREATE_SUSPENDED)) { + ResumeThread(lpProcessInformation->hThread); + } + + if (lpProcessInformation == &backup) { + CloseHandle(lpProcessInformation->hProcess); + CloseHandle(lpProcessInformation->hThread); + } + + return TRUE; +} + +BOOL WINAPI DetourCreateProcessWithDllExW(_In_opt_ LPCWSTR lpApplicationName, + _Inout_opt_ LPWSTR lpCommandLine, + _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes, + _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes, + _In_ BOOL bInheritHandles, + _In_ DWORD dwCreationFlags, + _In_opt_ LPVOID lpEnvironment, + _In_opt_ LPCWSTR lpCurrentDirectory, + _In_ LPSTARTUPINFOW lpStartupInfo, + _Out_ LPPROCESS_INFORMATION lpProcessInformation, + _In_ LPCSTR lpDllName, + _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEW pfCreateProcessW) +{ + if (pfCreateProcessW == NULL) { + pfCreateProcessW = CreateProcessW; + } + + PROCESS_INFORMATION backup; + if (lpProcessInformation == NULL) { + lpProcessInformation = &backup; + ZeroMemory(&backup, sizeof(backup)); + } + + if (!pfCreateProcessW(lpApplicationName, + lpCommandLine, + lpProcessAttributes, + lpThreadAttributes, + bInheritHandles, + dwCreationFlags | CREATE_SUSPENDED, + lpEnvironment, + lpCurrentDirectory, + lpStartupInfo, + lpProcessInformation)) { + return FALSE; + } + + + LPCSTR sz = lpDllName; + + if (!DetourUpdateProcessWithDll(lpProcessInformation->hProcess, &sz, 1) && + !DetourProcessViaHelperW(lpProcessInformation->dwProcessId, + lpDllName, + pfCreateProcessW)) { + + TerminateProcess(lpProcessInformation->hProcess, ~0u); + CloseHandle(lpProcessInformation->hProcess); + CloseHandle(lpProcessInformation->hThread); + return FALSE; + } + + if (!(dwCreationFlags & CREATE_SUSPENDED)) { + ResumeThread(lpProcessInformation->hThread); + } + + if (lpProcessInformation == &backup) { + CloseHandle(lpProcessInformation->hProcess); + CloseHandle(lpProcessInformation->hThread); + } + return TRUE; +} + +BOOL WINAPI DetourCreateProcessWithDllsA(_In_opt_ LPCSTR lpApplicationName, + _Inout_opt_ LPSTR lpCommandLine, + _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes, + _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes, + _In_ BOOL bInheritHandles, + _In_ DWORD dwCreationFlags, + _In_opt_ LPVOID lpEnvironment, + _In_opt_ LPCSTR lpCurrentDirectory, + _In_ LPSTARTUPINFOA lpStartupInfo, + _Out_ LPPROCESS_INFORMATION lpProcessInformation, + _In_ DWORD nDlls, + _In_reads_(nDlls) LPCSTR *rlpDlls, + _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEA pfCreateProcessA) +{ + if (pfCreateProcessA == NULL) { + pfCreateProcessA = CreateProcessA; + } + + PROCESS_INFORMATION backup; + if (lpProcessInformation == NULL) { + lpProcessInformation = &backup; + ZeroMemory(&backup, sizeof(backup)); + } + + if (!pfCreateProcessA(lpApplicationName, + lpCommandLine, + lpProcessAttributes, + lpThreadAttributes, + bInheritHandles, + dwCreationFlags | CREATE_SUSPENDED, + lpEnvironment, + lpCurrentDirectory, + lpStartupInfo, + lpProcessInformation)) { + return FALSE; + } + + if (!DetourUpdateProcessWithDll(lpProcessInformation->hProcess, rlpDlls, nDlls) && + !DetourProcessViaHelperDllsA(lpProcessInformation->dwProcessId, + nDlls, + rlpDlls, + pfCreateProcessA)) { + + TerminateProcess(lpProcessInformation->hProcess, ~0u); + CloseHandle(lpProcessInformation->hProcess); + CloseHandle(lpProcessInformation->hThread); + return FALSE; + } + + if (!(dwCreationFlags & CREATE_SUSPENDED)) { + ResumeThread(lpProcessInformation->hThread); + } + + if (lpProcessInformation == &backup) { + CloseHandle(lpProcessInformation->hProcess); + CloseHandle(lpProcessInformation->hThread); + } + + return TRUE; +} + +BOOL WINAPI DetourCreateProcessWithDllsW(_In_opt_ LPCWSTR lpApplicationName, + _Inout_opt_ LPWSTR lpCommandLine, + _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes, + _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes, + _In_ BOOL bInheritHandles, + _In_ DWORD dwCreationFlags, + _In_opt_ LPVOID lpEnvironment, + _In_opt_ LPCWSTR lpCurrentDirectory, + _In_ LPSTARTUPINFOW lpStartupInfo, + _Out_ LPPROCESS_INFORMATION lpProcessInformation, + _In_ DWORD nDlls, + _In_reads_(nDlls) LPCSTR *rlpDlls, + _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEW pfCreateProcessW) +{ + if (pfCreateProcessW == NULL) { + pfCreateProcessW = CreateProcessW; + } + + PROCESS_INFORMATION backup; + if (lpProcessInformation == NULL) { + lpProcessInformation = &backup; + ZeroMemory(&backup, sizeof(backup)); + } + + if (!pfCreateProcessW(lpApplicationName, + lpCommandLine, + lpProcessAttributes, + lpThreadAttributes, + bInheritHandles, + dwCreationFlags | CREATE_SUSPENDED, + lpEnvironment, + lpCurrentDirectory, + lpStartupInfo, + lpProcessInformation)) { + return FALSE; + } + + + if (!DetourUpdateProcessWithDll(lpProcessInformation->hProcess, rlpDlls, nDlls) && + !DetourProcessViaHelperDllsW(lpProcessInformation->dwProcessId, + nDlls, + rlpDlls, + pfCreateProcessW)) { + + TerminateProcess(lpProcessInformation->hProcess, ~0u); + CloseHandle(lpProcessInformation->hProcess); + CloseHandle(lpProcessInformation->hThread); + return FALSE; + } + + if (!(dwCreationFlags & CREATE_SUSPENDED)) { + ResumeThread(lpProcessInformation->hThread); + } + + if (lpProcessInformation == &backup) { + CloseHandle(lpProcessInformation->hProcess); + CloseHandle(lpProcessInformation->hThread); + } + return TRUE; +} + +// +///////////////////////////////////////////////////////////////// End of File. diff --git a/r5dev/thirdparty/detours/src/uimports.cpp b/r5dev/thirdparty/detours/src/uimports.cpp new file mode 100644 index 00000000..cd1fe0f8 --- /dev/null +++ b/r5dev/thirdparty/detours/src/uimports.cpp @@ -0,0 +1,335 @@ +////////////////////////////////////////////////////////////////////////////// +// +// Add DLLs to a module import table (uimports.cpp of detours.lib) +// +// Microsoft Research Detours Package, Version 4.0.1 +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// Note that this file is included into creatwth.cpp one or more times +// (once for each supported module format). +// + +#include "../include/detours.h" + +#if DETOURS_VERSION != 0x4c0c1 // 0xMAJORcMINORcPATCH +#error detours.h version mismatch +#endif + +// UpdateImports32 aka UpdateImports64 +static BOOL UPDATE_IMPORTS_XX(HANDLE hProcess, + HMODULE hModule, + __in_ecount(nDlls) LPCSTR *plpDlls, + DWORD nDlls) +{ + BOOL fSucceeded = FALSE; + DWORD cbNew = 0; + + BYTE * pbNew = NULL; + DWORD i; + SIZE_T cbRead; + DWORD n; + + PBYTE pbModule = (PBYTE)hModule; + + IMAGE_DOS_HEADER idh; + ZeroMemory(&idh, sizeof(idh)); + if (!ReadProcessMemory(hProcess, pbModule, &idh, sizeof(idh), &cbRead) + || cbRead < sizeof(idh)) { + + DETOUR_TRACE(("ReadProcessMemory(idh@%p..%p) failed: %lu\n", + pbModule, pbModule + sizeof(idh), GetLastError())); + + finish: + if (pbNew != NULL) { + delete[] pbNew; + pbNew = NULL; + } + return fSucceeded; + } + + IMAGE_NT_HEADERS_XX inh; + ZeroMemory(&inh, sizeof(inh)); + + if (!ReadProcessMemory(hProcess, pbModule + idh.e_lfanew, &inh, sizeof(inh), &cbRead) + || cbRead < sizeof(inh)) { + DETOUR_TRACE(("ReadProcessMemory(inh@%p..%p) failed: %lu\n", + pbModule + idh.e_lfanew, + pbModule + idh.e_lfanew + sizeof(inh), + GetLastError())); + goto finish; + } + + if (inh.OptionalHeader.Magic != IMAGE_NT_OPTIONAL_HDR_MAGIC_XX) { + DETOUR_TRACE(("Wrong size image (%04x != %04x).\n", + inh.OptionalHeader.Magic, IMAGE_NT_OPTIONAL_HDR_MAGIC_XX)); + SetLastError(ERROR_INVALID_BLOCK); + goto finish; + } + + // Zero out the bound table so loader doesn't use it instead of our new table. + inh.BOUND_DIRECTORY.VirtualAddress = 0; + inh.BOUND_DIRECTORY.Size = 0; + + // Find the size of the mapped file. + DWORD dwSec = idh.e_lfanew + + FIELD_OFFSET(IMAGE_NT_HEADERS_XX, OptionalHeader) + + inh.FileHeader.SizeOfOptionalHeader; + + for (i = 0; i < inh.FileHeader.NumberOfSections; i++) { + IMAGE_SECTION_HEADER ish; + ZeroMemory(&ish, sizeof(ish)); + + if (!ReadProcessMemory(hProcess, pbModule + dwSec + sizeof(ish) * i, &ish, + sizeof(ish), &cbRead) + || cbRead < sizeof(ish)) { + + DETOUR_TRACE(("ReadProcessMemory(ish@%p..%p) failed: %lu\n", + pbModule + dwSec + sizeof(ish) * i, + pbModule + dwSec + sizeof(ish) * (i + 1), + GetLastError())); + goto finish; + } + + DETOUR_TRACE(("ish[%lu] : va=%08lx sr=%lu\n", i, ish.VirtualAddress, ish.SizeOfRawData)); + + // If the linker didn't suggest an IAT in the data directories, the + // loader will look for the section of the import directory to be used + // for this instead. Since we put out new IMPORT_DIRECTORY outside any + // section boundary, the loader will not find it. So we provide one + // explicitly to avoid the search. + // + if (inh.IAT_DIRECTORY.VirtualAddress == 0 && + inh.IMPORT_DIRECTORY.VirtualAddress >= ish.VirtualAddress && + inh.IMPORT_DIRECTORY.VirtualAddress < ish.VirtualAddress + ish.SizeOfRawData) { + + inh.IAT_DIRECTORY.VirtualAddress = ish.VirtualAddress; + inh.IAT_DIRECTORY.Size = ish.SizeOfRawData; + } + } + + if (inh.IMPORT_DIRECTORY.VirtualAddress != 0 && inh.IMPORT_DIRECTORY.Size == 0) { + + // Don't worry about changing the PE file, + // because the load information of the original PE header has been saved and will be restored. + // The change here is just for the following code to work normally + + PIMAGE_IMPORT_DESCRIPTOR pImageImport = (PIMAGE_IMPORT_DESCRIPTOR)(pbModule + inh.IMPORT_DIRECTORY.VirtualAddress); + + do { + IMAGE_IMPORT_DESCRIPTOR ImageImport; + if (!ReadProcessMemory(hProcess, pImageImport, &ImageImport, sizeof(ImageImport), NULL)) { + DETOUR_TRACE(("ReadProcessMemory failed: %lu\n", GetLastError())); + goto finish; + } + inh.IMPORT_DIRECTORY.Size += sizeof(IMAGE_IMPORT_DESCRIPTOR); + if (!ImageImport.Name) { + break; + } + ++pImageImport; + } while (TRUE); + + DWORD dwLastError = GetLastError(); + OutputDebugString(TEXT("[This PE file has an import table, but the import table size is marked as 0. This is an error.") + TEXT("If it is not repaired, the launched program will not work properly, Detours has automatically repaired its import table size for you! ! !]\r\n")); + if (GetLastError() != dwLastError) { + SetLastError(dwLastError); + } + } + + DETOUR_TRACE((" Imports: %p..%p\n", + pbModule + inh.IMPORT_DIRECTORY.VirtualAddress, + pbModule + inh.IMPORT_DIRECTORY.VirtualAddress + + inh.IMPORT_DIRECTORY.Size)); + + // Calculate new import directory size. Note that since inh is from another + // process, inh could have been corrupted. We need to protect against + // integer overflow in allocation calculations. + DWORD nOldDlls = inh.IMPORT_DIRECTORY.Size / sizeof(IMAGE_IMPORT_DESCRIPTOR); + DWORD obRem; + if (DWordMult(sizeof(IMAGE_IMPORT_DESCRIPTOR), nDlls, &obRem) != S_OK) { + DETOUR_TRACE(("too many new DLLs.\n")); + goto finish; + } + DWORD obOld; + if (DWordAdd(obRem, sizeof(IMAGE_IMPORT_DESCRIPTOR) * nOldDlls, &obOld) != S_OK) { + DETOUR_TRACE(("DLL entries overflow.\n")); + goto finish; + } + DWORD obTab = PadToDwordPtr(obOld); + // Check for integer overflow. + if (obTab < obOld) { + DETOUR_TRACE(("DLL entries padding overflow.\n")); + goto finish; + } + DWORD stSize; + if (DWordMult(sizeof(DWORD_XX) * 4, nDlls, &stSize) != S_OK) { + DETOUR_TRACE(("String table overflow.\n")); + goto finish; + } + DWORD obDll; + if (DWordAdd(obTab, stSize, &obDll) != S_OK) { + DETOUR_TRACE(("Import table size overflow\n")); + goto finish; + } + DWORD obStr = obDll; + cbNew = obStr; + for (n = 0; n < nDlls; n++) { + if (DWordAdd(cbNew, PadToDword((DWORD)strlen(plpDlls[n]) + 1), &cbNew) != S_OK) { + DETOUR_TRACE(("Overflow adding string table entry\n")); + goto finish; + } + } + pbNew = new BYTE [cbNew]; + if (pbNew == NULL) { + DETOUR_TRACE(("new BYTE [cbNew] failed.\n")); + goto finish; + } + ZeroMemory(pbNew, cbNew); + + PBYTE pbBase = pbModule; + PBYTE pbNext = pbBase + + inh.OptionalHeader.BaseOfCode + + inh.OptionalHeader.SizeOfCode + + inh.OptionalHeader.SizeOfInitializedData + + inh.OptionalHeader.SizeOfUninitializedData; + if (pbBase < pbNext) { + pbBase = pbNext; + } + DETOUR_TRACE(("pbBase = %p\n", pbBase)); + + PBYTE pbNewIid = FindAndAllocateNearBase(hProcess, pbModule, pbBase, cbNew); + if (pbNewIid == NULL) { + DETOUR_TRACE(("FindAndAllocateNearBase failed.\n")); + goto finish; + } + + PIMAGE_IMPORT_DESCRIPTOR piid = (PIMAGE_IMPORT_DESCRIPTOR)pbNew; + IMAGE_THUNK_DATAXX *pt = NULL; + + DWORD obBase = (DWORD)(pbNewIid - pbModule); + DWORD dwProtect = 0; + + if (inh.IMPORT_DIRECTORY.VirtualAddress != 0) { + // Read the old import directory if it exists. + DETOUR_TRACE(("IMPORT_DIRECTORY perms=%lx\n", dwProtect)); + + if (!ReadProcessMemory(hProcess, + pbModule + inh.IMPORT_DIRECTORY.VirtualAddress, + &piid[nDlls], + nOldDlls * sizeof(IMAGE_IMPORT_DESCRIPTOR), &cbRead) + || cbRead < nOldDlls * sizeof(IMAGE_IMPORT_DESCRIPTOR)) { + + DETOUR_TRACE(("ReadProcessMemory(imports) failed: %lu\n", GetLastError())); + goto finish; + } + } + + for (n = 0; n < nDlls; n++) { + HRESULT hrRet = StringCchCopyA((char*)pbNew + obStr, cbNew - obStr, plpDlls[n]); + if (FAILED(hrRet)) { + DETOUR_TRACE(("StringCchCopyA failed: %08lx\n", hrRet)); + goto finish; + } + + // After copying the string, we patch up the size "??" bits if any. + hrRet = ReplaceOptionalSizeA((char*)pbNew + obStr, + cbNew - obStr, + DETOURS_STRINGIFY(DETOURS_BITS_XX)); + if (FAILED(hrRet)) { + DETOUR_TRACE(("ReplaceOptionalSizeA failed: %08lx\n", hrRet)); + goto finish; + } + + DWORD nOffset = obTab + (sizeof(IMAGE_THUNK_DATAXX) * (4 * n)); + piid[n].OriginalFirstThunk = obBase + nOffset; + + // We need 2 thunks for the import table and 2 thunks for the IAT. + // One for an ordinal import and one to mark the end of the list. + pt = ((IMAGE_THUNK_DATAXX*)(pbNew + nOffset)); + pt[0].u1.Ordinal = IMAGE_ORDINAL_FLAG_XX + 1; + pt[1].u1.Ordinal = 0; + + nOffset = obTab + (sizeof(IMAGE_THUNK_DATAXX) * ((4 * n) + 2)); + piid[n].FirstThunk = obBase + nOffset; + pt = ((IMAGE_THUNK_DATAXX*)(pbNew + nOffset)); + pt[0].u1.Ordinal = IMAGE_ORDINAL_FLAG_XX + 1; + pt[1].u1.Ordinal = 0; + piid[n].TimeDateStamp = 0; + piid[n].ForwarderChain = 0; + piid[n].Name = obBase + obStr; + + obStr += PadToDword((DWORD)strlen(plpDlls[n]) + 1); + } + _Analysis_assume_(obStr <= cbNew); + +#if 0 + for (i = 0; i < nDlls + nOldDlls; i++) { + DETOUR_TRACE(("%8d. Look=%08x Time=%08x Fore=%08x Name=%08x Addr=%08x\n", + i, + piid[i].OriginalFirstThunk, + piid[i].TimeDateStamp, + piid[i].ForwarderChain, + piid[i].Name, + piid[i].FirstThunk)); + if (piid[i].OriginalFirstThunk == 0 && piid[i].FirstThunk == 0) { + break; + } + } +#endif + + if (!WriteProcessMemory(hProcess, pbNewIid, pbNew, obStr, NULL)) { + DETOUR_TRACE(("WriteProcessMemory(iid) failed: %lu\n", GetLastError())); + goto finish; + } + + DETOUR_TRACE(("obBaseBef = %08lx..%08lx\n", + inh.IMPORT_DIRECTORY.VirtualAddress, + inh.IMPORT_DIRECTORY.VirtualAddress + inh.IMPORT_DIRECTORY.Size)); + DETOUR_TRACE(("obBaseAft = %08lx..%08lx\n", obBase, obBase + obStr)); + + // In this case the file didn't have an import directory in first place, + // so we couldn't fix the missing IAT above. We still need to explicitly + // provide an IAT to prevent to loader from looking for one. + // + if (inh.IAT_DIRECTORY.VirtualAddress == 0) { + inh.IAT_DIRECTORY.VirtualAddress = obBase; + inh.IAT_DIRECTORY.Size = cbNew; + } + + inh.IMPORT_DIRECTORY.VirtualAddress = obBase; + inh.IMPORT_DIRECTORY.Size = cbNew; + + /////////////////////// Update the NT header for the new import directory. + // + if (!DetourVirtualProtectSameExecuteEx(hProcess, pbModule, inh.OptionalHeader.SizeOfHeaders, + PAGE_EXECUTE_READWRITE, &dwProtect)) { + DETOUR_TRACE(("VirtualProtectEx(inh) write failed: %lu\n", GetLastError())); + goto finish; + } + + inh.OptionalHeader.CheckSum = 0; + + if (!WriteProcessMemory(hProcess, pbModule, &idh, sizeof(idh), NULL)) { + DETOUR_TRACE(("WriteProcessMemory(idh) failed: %lu\n", GetLastError())); + goto finish; + } + DETOUR_TRACE(("WriteProcessMemory(idh:%p..%p)\n", pbModule, pbModule + sizeof(idh))); + + if (!WriteProcessMemory(hProcess, pbModule + idh.e_lfanew, &inh, sizeof(inh), NULL)) { + DETOUR_TRACE(("WriteProcessMemory(inh) failed: %lu\n", GetLastError())); + goto finish; + } + DETOUR_TRACE(("WriteProcessMemory(inh:%p..%p)\n", + pbModule + idh.e_lfanew, + pbModule + idh.e_lfanew + sizeof(inh))); + + if (!VirtualProtectEx(hProcess, pbModule, inh.OptionalHeader.SizeOfHeaders, + dwProtect, &dwProtect)) { + DETOUR_TRACE(("VirtualProtectEx(idh) restore failed: %lu\n", GetLastError())); + goto finish; + } + + fSucceeded = TRUE; + goto finish; +} diff --git a/r5dev/thirdparty/lzham/include/lzham_assert.h b/r5dev/thirdparty/lzham/include/lzham_assert.h new file mode 100644 index 00000000..d8a68515 --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_assert.h @@ -0,0 +1,40 @@ +// File: lzham_assert.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +const unsigned int LZHAM_FAIL_EXCEPTION_CODE = 256U; +void lzham_enable_fail_exceptions(bool enabled); + +void lzham_assert(const char* pExp, const char* pFile, unsigned line); +void lzham_fail(const char* pExp, const char* pFile, unsigned line); + +#ifdef NDEBUG + #define LZHAM_ASSERT(x) ((void)0) +#else + #define LZHAM_ASSERT(_exp) (void)( (!!(_exp)) || (lzham_assert(#_exp, __FILE__, __LINE__), 0) ) + #define LZHAM_ASSERTS_ENABLED 1 +#endif + +#define LZHAM_VERIFY(_exp) (void)( (!!(_exp)) || (lzham_assert(#_exp, __FILE__, __LINE__), 0) ) + +#define LZHAM_FAIL(msg) do { lzham_fail(#msg, __FILE__, __LINE__); } while(0) + +#define LZHAM_ASSERT_OPEN_RANGE(x, l, h) LZHAM_ASSERT((x >= l) && (x < h)) +#define LZHAM_ASSERT_CLOSED_RANGE(x, l, h) LZHAM_ASSERT((x >= l) && (x <= h)) + +void lzham_trace(const char* pFmt, va_list args); +void lzham_trace(const char* pFmt, ...); + +// Borrowed from boost libraries. +template struct assume_failure; +template <> struct assume_failure { enum { blah = 1 }; }; +template struct assume_try { }; + +#define LZHAM_JOINER_FINAL(a, b) a##b +#define LZHAM_JOINER(a, b) LZHAM_JOINER_FINAL(a, b) +#define LZHAM_JOIN(a, b) LZHAM_JOINER(a, b) +#if defined(__GNUC__) + #define LZHAM_ASSUME(p) typedef assume_try < sizeof(assume_failure< (bool)(p) > ) > LZHAM_JOIN(assume_typedef, __COUNTER__) __attribute__((unused)) +#else + #define LZHAM_ASSUME(p) typedef assume_try < sizeof(assume_failure< (bool)(p) > ) > LZHAM_JOIN(assume_typedef, __COUNTER__) +#endif diff --git a/r5dev/thirdparty/lzham/include/lzham_checksum.h b/r5dev/thirdparty/lzham/include/lzham_checksum.h new file mode 100644 index 00000000..515f3389 --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_checksum.h @@ -0,0 +1,13 @@ +// File: lzham_checksum.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +namespace lzham +{ + const uint cInitAdler32 = 1U; + uint adler32(const void* pBuf, size_t buflen, uint adler32 = cInitAdler32); + + const uint cInitCRC32 = 0U; + uint crc32(uint crc, const lzham_uint8 *ptr, size_t buf_len); + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/include/lzham_config.h b/r5dev/thirdparty/lzham/include/lzham_config.h new file mode 100644 index 00000000..e250c7ce --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_config.h @@ -0,0 +1,23 @@ +// File: lzham_config.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +#ifdef _DEBUG + #define LZHAM_BUILD_DEBUG + + #ifndef DEBUG + #define DEBUG + #endif +#else + #define LZHAM_BUILD_RELEASE + + #ifndef NDEBUG + #define NDEBUG + #endif + + #ifdef DEBUG + #error DEBUG cannot be defined in LZHAM_BUILD_RELEASE + #endif +#endif +#define LZHAM_BUFFERED_PRINTF 0 +#define LZHAM_PERF_SECTIONS 0 \ No newline at end of file diff --git a/r5dev/thirdparty/lzham/include/lzham_core.h b/r5dev/thirdparty/lzham/include/lzham_core.h new file mode 100644 index 00000000..3d4f3d3e --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_core.h @@ -0,0 +1,170 @@ +// File: lzham_core.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once +#include "core/stdafx.h" + +#if defined(_MSC_VER) + #pragma warning (disable: 4127) // conditional expression is constant +#endif + +#if defined(_XBOX) && !defined(LZHAM_ANSI_CPLUSPLUS) + // X360 + #include + #define _HAS_EXCEPTIONS 0 + #define NOMINMAX + + #define LZHAM_PLATFORM_X360 1 + #define LZHAM_USE_WIN32_API 1 + #define LZHAM_USE_WIN32_ATOMIC_FUNCTIONS 1 + #define LZHAM_64BIT_POINTERS 0 + #define LZHAM_CPU_HAS_64BIT_REGISTERS 1 + #define LZHAM_BIG_ENDIAN_CPU 1 + #define LZHAM_USE_UNALIGNED_INT_LOADS 1 + #define LZHAM_RESTRICT __restrict + #define LZHAM_FORCE_INLINE __forceinline + #define LZHAM_NOTE_UNUSED(x) (void)x + +#elif defined(WIN32) && !defined(LZHAM_ANSI_CPLUSPLUS) + // MSVC or MinGW, x86 or x64, Win32 API's for threading and Win32 Interlocked API's or GCC built-ins for atomic ops. + #ifdef NDEBUG + // Ensure checked iterators are disabled. + #define _SECURE_SCL 0 + #define _HAS_ITERATOR_DEBUGGING 0 + #endif + #ifndef _DLL + // If we're using the DLL form of the run-time libs, we're also going to be enabling exceptions because we'll be building CLR apps. + // Otherwise, we disable exceptions for a small speed boost. + //#define _HAS_EXCEPTIONS 0 + #endif + #define NOMINMAX + + #ifndef _WIN32_WINNT + #define _WIN32_WINNT 0x500 + #endif + + #ifndef WIN32_LEAN_AND_MEAN + #define WIN32_LEAN_AND_MEAN + #endif + + #include + + #define LZHAM_USE_WIN32_API 1 + + #if defined(__MINGW32__) || defined(__MINGW64__) + #define LZHAM_USE_GCC_ATOMIC_BUILTINS 1 + #else + #define LZHAM_USE_WIN32_ATOMIC_FUNCTIONS 1 + #endif + + #define LZHAM_PLATFORM_PC 1 + + #ifdef _WIN64 + #define LZHAM_PLATFORM_PC_X64 1 + #define LZHAM_64BIT_POINTERS 1 + #define LZHAM_CPU_HAS_64BIT_REGISTERS 1 + #define LZHAM_LITTLE_ENDIAN_CPU 1 + #else + #define LZHAM_PLATFORM_PC_X86 1 + #define LZHAM_64BIT_POINTERS 0 + #define LZHAM_CPU_HAS_64BIT_REGISTERS 0 + #define LZHAM_LITTLE_ENDIAN_CPU 1 + #endif + + #define LZHAM_USE_UNALIGNED_INT_LOADS 1 + #define LZHAM_RESTRICT __restrict + #define LZHAM_FORCE_INLINE __forceinline + + #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__) + #define LZHAM_USE_MSVC_INTRINSICS 1 + #endif + + #define LZHAM_NOTE_UNUSED(x) (void)x + +#elif defined(__GNUC__) && !defined(LZHAM_ANSI_CPLUSPLUS) + // GCC x86 or x64, pthreads for threading and GCC built-ins for atomic ops. + #define LZHAM_PLATFORM_PC 1 + + #if defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) + #define LZHAM_PLATFORM_PC_X64 1 + #define LZHAM_64BIT_POINTERS 1 + #define LZHAM_CPU_HAS_64BIT_REGISTERS 1 + #else + #define LZHAM_PLATFORM_PC_X86 1 + #define LZHAM_64BIT_POINTERS 0 + #define LZHAM_CPU_HAS_64BIT_REGISTERS 0 + #endif + + #define LZHAM_USE_UNALIGNED_INT_LOADS 1 + + #define LZHAM_LITTLE_ENDIAN_CPU 1 + + #define LZHAM_USE_PTHREADS_API 1 + #define LZHAM_USE_GCC_ATOMIC_BUILTINS 1 + + #define LZHAM_RESTRICT + + #if defined(__clang__) + #define LZHAM_FORCE_INLINE inline + #else + #define LZHAM_FORCE_INLINE inline __attribute__((__always_inline__,__gnu_inline__)) + #endif + + #define LZHAM_NOTE_UNUSED(x) (void)x +#else + // Vanilla ANSI-C/C++ + // No threading support, unaligned loads are NOT okay. + #if defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) + #define LZHAM_64BIT_POINTERS 1 + #define LZHAM_CPU_HAS_64BIT_REGISTERS 1 + #else + #define LZHAM_64BIT_POINTERS 0 + #define LZHAM_CPU_HAS_64BIT_REGISTERS 0 + #endif + + #define LZHAM_USE_UNALIGNED_INT_LOADS 0 + + #if __BIG_ENDIAN__ + #define LZHAM_BIG_ENDIAN_CPU 1 + #else + #define LZHAM_LITTLE_ENDIAN_CPU 1 + #endif + + #define LZHAM_USE_GCC_ATOMIC_BUILTINS 0 + #define LZHAM_USE_WIN32_ATOMIC_FUNCTIONS 0 + + #define LZHAM_RESTRICT + #define LZHAM_FORCE_INLINE inline + + #define LZHAM_NOTE_UNUSED(x) (void)x +#endif + +#if LZHAM_LITTLE_ENDIAN_CPU + const bool c_lzham_little_endian_platform = true; +#else + const bool c_lzham_little_endian_platform = false; +#endif + +const bool c_lzham_big_endian_platform = !c_lzham_little_endian_platform; + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lzham.h" +#include "lzham_config.h" +#include "lzham_types.h" +#include "lzham_assert.h" +#include "lzham_platform.h" + +#include "lzham_helpers.h" +#include "lzham_traits.h" +#include "lzham_mem.h" +#include "lzham_math.h" +#include "lzham_utils.h" +#include "lzham_vector.h" diff --git a/r5dev/thirdparty/lzham/include/lzham_helpers.h b/r5dev/thirdparty/lzham/include/lzham_helpers.h new file mode 100644 index 00000000..11e0a119 --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_helpers.h @@ -0,0 +1,54 @@ +// File: lzham_helpers.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +#define LZHAM_NO_COPY_OR_ASSIGNMENT_OP(c) c(const c&); c& operator= (const c&); + +namespace lzham +{ + namespace helpers + { + template struct rel_ops + { + friend inline bool operator!=(const T& x, const T& y) { return (!(x == y)); } + friend inline bool operator> (const T& x, const T& y) { return (y < x); } + friend inline bool operator<=(const T& x, const T& y) { return (!(y < x)); } + friend inline bool operator>=(const T& x, const T& y) { return (!(x < y)); } + }; + + template + inline T* construct(T* p) + { + return new (static_cast(p)) T; + } + + template + inline T* construct(T* p, const U& init) + { + return new (static_cast(p)) T(init); + } + + template + inline void construct_array(T* p, uint n); + + template + inline void construct_array(T* p, uint n, const U& init) + { + T* q = p + n; + for ( ; p != q; ++p) + new (static_cast(p)) T(init); + } + + template + inline void destruct(T* p) + { + LZHAM_NOTE_UNUSED(p); + p->~T(); + } + + template + inline void destruct_array(T* p, uint n); + + } // namespace helpers + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/include/lzham_huffman_codes.h b/r5dev/thirdparty/lzham/include/lzham_huffman_codes.h new file mode 100644 index 00000000..caab1a68 --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_huffman_codes.h @@ -0,0 +1,14 @@ +// File: lzham_huffman_codes.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +namespace lzham +{ + //const uint cHuffmanMaxSupportedSyms = 600; + const uint cHuffmanMaxSupportedSyms = 1024; + + uint get_generate_huffman_codes_table_size(); + + bool generate_huffman_codes(void* pContext, uint num_syms, const uint16* pFreq, uint8* pCodesizes, uint& max_code_size, uint& total_freq_ret); + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/include/lzham_lzbase.h b/r5dev/thirdparty/lzham/include/lzham_lzbase.h new file mode 100644 index 00000000..8904ddd4 --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_lzbase.h @@ -0,0 +1,45 @@ +// File: lzham_lzbase.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +#include "../lzhamdecomp/lzham_lzdecompbase.h" + +//#define LZHAM_LZVERIFY +//#define LZHAM_DISABLE_RAW_BLOCKS + +namespace lzham +{ + struct CLZBase : CLZDecompBase + { + uint8 m_slot_tab0[4096]; + uint8 m_slot_tab1[512]; + uint8 m_slot_tab2[256]; + + void init_slot_tabs(); + + inline void compute_lzx_position_slot(uint dist, uint& slot, uint& ofs) + { + uint s; + if (dist < 0x1000) + s = m_slot_tab0[dist]; + else if (dist < 0x100000) + s = m_slot_tab1[dist >> 11]; + else if (dist < 0x1000000) + s = m_slot_tab2[dist >> 16]; + else if (dist < 0x2000000) + s = 48 + ((dist - 0x1000000) >> 23); + else if (dist < 0x4000000) + s = 50 + ((dist - 0x2000000) >> 24); + else + s = 52 + ((dist - 0x4000000) >> 25); + + ofs = (dist - m_lzx_position_base[s]) & m_lzx_position_extra_mask[s]; + slot = s; + + LZHAM_ASSERT(s < m_num_lzx_slots); + LZHAM_ASSERT((m_lzx_position_base[slot] + ofs) == dist); + LZHAM_ASSERT(ofs < (1U << m_lzx_position_extra_bits[slot])); + } + }; + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/include/lzham_match_accel.h b/r5dev/thirdparty/lzham/include/lzham_match_accel.h new file mode 100644 index 00000000..384ea7dd --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_match_accel.h @@ -0,0 +1,146 @@ +// File: lzham_match_accel.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once +#include "lzham_lzbase.h" +#include "lzham_threading.h" + +namespace lzham +{ + const uint cMatchAccelMaxSupportedProbes = 128; + + struct node + { + uint m_left; + uint m_right; + }; + + LZHAM_DEFINE_BITWISE_MOVABLE(node); + +#pragma pack(push, 1) + struct dict_match + { + uint m_dist; + uint16 m_len; + + inline uint get_dist() const { return m_dist & 0x7FFFFFFF; } + inline uint get_len() const { return m_len + 2; } + inline bool is_last() const { return (int)m_dist < 0; } + }; +#pragma pack(pop) + + LZHAM_DEFINE_BITWISE_MOVABLE(dict_match); + + class search_accelerator + { + public: + search_accelerator(); + + // If all_matches is true, the match finder returns all found matches with no filtering. + // Otherwise, the finder will tend to return lists of matches with mostly unique lengths. + // For each length, it will discard matches with worse distances (in the coding sense). + bool init(CLZBase* pLZBase, task_pool* pPool, uint max_helper_threads, uint max_dict_size, uint max_matches, bool all_matches, uint max_probes); + + void reset(); + void flush(); + + inline uint get_max_dict_size() const { return m_max_dict_size; } + inline uint get_max_dict_size_mask() const { return m_max_dict_size_mask; } + inline uint get_cur_dict_size() const { return m_cur_dict_size; } + + inline uint get_lookahead_pos() const { return m_lookahead_pos; } + inline uint get_lookahead_size() const { return m_lookahead_size; } + + inline uint get_char(int delta_pos) const { return m_dict[(m_lookahead_pos + delta_pos) & m_max_dict_size_mask]; } + inline uint get_char(uint cur_dict_pos, int delta_pos) const { return m_dict[(cur_dict_pos + delta_pos) & m_max_dict_size_mask]; } + inline const uint8* get_ptr(uint pos) const { return &m_dict[pos]; } + + uint get_max_helper_threads() const { return m_max_helper_threads; } + + inline uint operator[](uint pos) const { return m_dict[pos]; } + + uint get_max_add_bytes() const; + bool add_bytes_begin(uint num_bytes, const uint8* pBytes); + inline atomic32_t get_num_completed_helper_threads() const { return m_num_completed_helper_threads; } + void add_bytes_end(); + + // Returns the lookahead's raw position/size/dict_size at the time add_bytes_begin() is called. + inline uint get_fill_lookahead_pos() const { return m_fill_lookahead_pos; } + inline uint get_fill_lookahead_size() const { return m_fill_lookahead_size; } + inline uint get_fill_dict_size() const { return m_fill_dict_size; } + + uint get_len2_match(uint lookahead_ofs); + dict_match* find_matches(uint lookahead_ofs, bool spin = true); + + void advance_bytes(uint num_bytes); + + LZHAM_FORCE_INLINE uint get_match_len(uint lookahead_ofs, int dist, uint max_match_len, uint start_match_len = 0) const + { + LZHAM_ASSERT(lookahead_ofs < m_lookahead_size); + LZHAM_ASSERT(start_match_len <= max_match_len); + LZHAM_ASSERT(max_match_len <= (get_lookahead_size() - lookahead_ofs)); + + const int find_dict_size = m_cur_dict_size + lookahead_ofs; + if (dist > find_dict_size) + return 0; + + const uint comp_pos = static_cast((m_lookahead_pos + lookahead_ofs - dist) & m_max_dict_size_mask); + const uint lookahead_pos = (m_lookahead_pos + lookahead_ofs) & m_max_dict_size_mask; + + const uint8* pComp = &m_dict[comp_pos]; + const uint8* pLookahead = &m_dict[lookahead_pos]; + + uint match_len; + for (match_len = start_match_len; match_len < max_match_len; match_len++) + if (pComp[match_len] != pLookahead[match_len]) + break; + + return match_len; + } + + public: + CLZBase* m_pLZBase; + task_pool* m_pTask_pool; + uint m_max_helper_threads; + + uint m_max_dict_size; + uint m_max_dict_size_mask; + + uint m_lookahead_pos; + uint m_lookahead_size; + + uint m_cur_dict_size; + + lzham::vector m_dict; + + enum { cHashSize = 65536 }; + lzham::vector m_hash; + lzham::vector m_nodes; + + lzham::vector m_matches; + lzham::vector m_match_refs; + + lzham::vector m_hash_thread_index; + + enum { cDigramHashSize = 4096 }; + lzham::vector m_digram_hash; + lzham::vector m_digram_next; + + uint m_fill_lookahead_pos; + uint m_fill_lookahead_size; + uint m_fill_dict_size; + + uint m_max_probes; + uint m_max_matches; + + bool m_all_matches; + + volatile atomic32_t m_next_match_ref; + + volatile atomic32_t m_num_completed_helper_threads; + + void find_all_matches_callback(uint64 data, void* pData_ptr); + bool find_all_matches(uint num_bytes); + bool find_len2_matches(); + }; + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/include/lzham_math.h b/r5dev/thirdparty/lzham/include/lzham_math.h new file mode 100644 index 00000000..299f299b --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_math.h @@ -0,0 +1,113 @@ +// File: lzham_math.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +#if defined(LZHAM_USE_MSVC_INTRINSICS) && !defined(__MINGW32__) + #include + #if defined(_MSC_VER) + #pragma intrinsic(_BitScanReverse) + #endif +#endif + +namespace lzham +{ + namespace math + { + // Yes I know these should probably be pass by ref, not val: + // http://www.stepanovpapers.com/notes.pdf + // Just don't use them on non-simple (non built-in) types! + template inline T minimum(T a, T b) { return (a < b) ? a : b; } + + template inline T minimum(T a, T b, T c) { return minimum(minimum(a, b), c); } + + template inline T maximum(T a, T b) { return (a > b) ? a : b; } + + template inline T maximum(T a, T b, T c) { return maximum(maximum(a, b), c); } + + template inline T clamp(T value, T low, T high) { return (value < low) ? low : ((value > high) ? high : value); } + + inline bool is_power_of_2(uint32 x) { return x && ((x & (x - 1U)) == 0U); } + inline bool is_power_of_2(uint64 x) { return x && ((x & (x - 1U)) == 0U); } + + template inline T align_up_pointer(T p, uint alignment) + { + LZHAM_ASSERT(is_power_of_2(alignment)); + ptr_bits_t q = reinterpret_cast(p); + q = (q + alignment - 1) & (~((uint_ptr)alignment - 1)); + return reinterpret_cast(q); + } + + // From "Hackers Delight" + // val remains unchanged if it is already a power of 2. + inline uint32 next_pow2(uint32 val) + { + val--; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + + // val remains unchanged if it is already a power of 2. + inline uint64 next_pow2(uint64 val) + { + val--; + val |= val >> 32; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + + inline uint floor_log2i(uint v) + { + uint l = 0; + while (v > 1U) + { + v >>= 1; + l++; + } + return l; + } + + inline uint ceil_log2i(uint v) + { + uint l = floor_log2i(v); + if ((l != cIntBits) && (v > (1U << l))) + l++; + return l; + } + + // Returns the total number of bits needed to encode v. + // This needs to be fast - it's used heavily when determining Polar codelengths. + inline uint total_bits(uint v) + { + unsigned long l = 0; +#if defined(__MINGW32__) + if (v) + { + l = 32 -__builtin_clz(v); + } +#elif defined(LZHAM_USE_MSVC_INTRINSICS) + if (_BitScanReverse(&l, v)) + { + l++; + } +#else + while (v > 0U) + { + v >>= 1; + l++; + } +#endif + return l; + } + + } + +} // namespace lzham + diff --git a/r5dev/thirdparty/lzham/include/lzham_mem.h b/r5dev/thirdparty/lzham/include/lzham_mem.h new file mode 100644 index 00000000..d258efff --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_mem.h @@ -0,0 +1,112 @@ +// File: lzham_mem.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +namespace lzham +{ + void lzham_mem_init(); + + void* lzham_malloc(size_t size, size_t* pActual_size = NULL); + void* lzham_realloc(void* p, size_t size, size_t* pActual_size = NULL, bool movable = true); + void lzham_free(void* p); + size_t lzham_msize(void* p); + + template + inline T* lzham_new() + { + T* p = static_cast(lzham_malloc(sizeof(T))); + if (!p) return NULL; + if (LZHAM_IS_SCALAR_TYPE(T)) + return p; + return helpers::construct(p); + } + + template + inline T* lzham_new(const A& init0) + { + T* p = static_cast(lzham_malloc(sizeof(T))); + if (!p) return NULL; + return new (static_cast(p)) T(init0); + } + + template + inline T* lzham_new(const A& init0, const B& init1) + { + T* p = static_cast(lzham_malloc(sizeof(T))); + if (!p) return NULL; + return new (static_cast(p)) T(init0, init1); + } + + template + inline T* lzham_new(const A& init0, const B& init1, const C& init2) + { + T* p = static_cast(lzham_malloc(sizeof(T))); + if (!p) return NULL; + return new (static_cast(p)) T(init0, init1, init2); + } + + template + inline T* lzham_new(const A& init0, const B& init1, const C& init2, const D& init3) + { + T* p = static_cast(lzham_malloc(sizeof(T))); + if (!p) return NULL; + return new (static_cast(p)) T(init0, init1, init2, init3); + } + + template + inline T* lzham_new_array(uint32 num) + { + if (!num) num = 1; + + uint8* q = static_cast(lzham_malloc(LZHAM_MIN_ALLOC_ALIGNMENT + sizeof(T) * num)); + if (!q) + return NULL; + + T* p = reinterpret_cast(q + LZHAM_MIN_ALLOC_ALIGNMENT); + + reinterpret_cast(p)[-1] = num; + reinterpret_cast(p)[-2] = ~num; + + if (!LZHAM_IS_SCALAR_TYPE(T)) + { + helpers::construct_array(p, num); + } + return p; + } + + template + inline void lzham_delete(T* p) + { + if (p) + { + if (!LZHAM_IS_SCALAR_TYPE(T)) + { + helpers::destruct(p); + } + lzham_free(p); + } + } + + template + inline void lzham_delete_array(T* p) + { + if (p) + { + const uint32 num = reinterpret_cast(p)[-1]; + const uint32 num_check = reinterpret_cast(p)[-2]; + LZHAM_ASSERT(num && (num == ~num_check)); + if (num == ~num_check) + { + if (!LZHAM_IS_SCALAR_TYPE(T)) + { + helpers::destruct_array(p, num); + } + + lzham_free(reinterpret_cast(p) - LZHAM_MIN_ALLOC_ALIGNMENT); + } + } + } + + void lzham_print_mem_stats(); + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/include/lzham_null_threading.h b/r5dev/thirdparty/lzham/include/lzham_null_threading.h new file mode 100644 index 00000000..00fb0337 --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_null_threading.h @@ -0,0 +1,97 @@ +// File: lzham_task_pool_null.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +namespace lzham +{ + class semaphore + { + LZHAM_NO_COPY_OR_ASSIGNMENT_OP(semaphore); + + public: + inline semaphore(long initialCount = 0, long maximumCount = 1, const char* pName = NULL) + { + initialCount, maximumCount, pName; + } + + inline ~semaphore() + { + } + + inline void release(long releaseCount = 1, long *pPreviousCount = NULL) + { + releaseCount, pPreviousCount; + } + + inline bool wait(uint32 milliseconds = UINT32_MAX) + { + milliseconds; + return true; + } + }; + + class task_pool + { + public: + inline task_pool() { } + inline task_pool(uint num_threads) { num_threads; } + inline ~task_pool() { } + + inline bool init(uint num_threads) { num_threads; return true; } + inline void deinit(); + + inline uint get_num_threads() const { return 0; } + inline uint get_num_outstanding_tasks() const { return 0; } + + // C-style task callback + typedef void (*task_callback_func)(uint64 data, void* pData_ptr); + inline bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = NULL) + { + pFunc(data, pData_ptr); + return true; + } + + class executable_task + { + public: + virtual void execute_task(uint64 data, void* pData_ptr) = 0; + }; + + // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! + inline bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = NULL) + { + pObj->execute_task(data, pData_ptr); + return true; + } + + template + inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = NULL) + { + (pObject->*pObject_method)(data, pData_ptr); + return true; + } + + template + inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = NULL) + { + for (uint i = 0; i < num_tasks; i++) + { + (pObject->*pObject_method)(first_data + i, pData_ptr); + } + return true; + } + + void join() { } + }; + + inline void lzham_sleep(unsigned int milliseconds) + { + milliseconds; + } + + inline uint lzham_get_max_helper_threads() + { + return 0; + } + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/include/lzham_platform.h b/r5dev/thirdparty/lzham/include/lzham_platform.h new file mode 100644 index 00000000..0cc58beb --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_platform.h @@ -0,0 +1,284 @@ +// File: lzham_platform.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +bool lzham_is_debugger_present(void); +void lzham_debug_break(void); +void lzham_output_debug_string(const char* p); + +// actually in lzham_assert.cpp +void lzham_assert(const char* pExp, const char* pFile, unsigned line); +void lzham_fail(const char* pExp, const char* pFile, unsigned line); + +#ifdef WIN32 + #define LZHAM_BREAKPOINT DebuggerBreak(); + #define LZHAM_BUILTIN_EXPECT(c, v) c +#elif defined(__GNUC__) + #define LZHAM_BREAKPOINT asm("int $3"); + #define LZHAM_BUILTIN_EXPECT(c, v) __builtin_expect(c, v) +#else + #define LZHAM_BREAKPOINT + #define LZHAM_BUILTIN_EXPECT(c, v) c +#endif + +#if defined(__GNUC__) && LZHAM_PLATFORM_PC +extern __inline__ __attribute__((__always_inline__,__gnu_inline__)) void lzham_yield_processor() +{ + __asm__ __volatile__("pause"); +} +#elif LZHAM_PLATFORM_X360 +#define lzham_yield_processor() \ + YieldProcessor(); \ + __asm { or r0, r0, r0 } \ + YieldProcessor(); \ + __asm { or r1, r1, r1 } \ + YieldProcessor(); \ + __asm { or r0, r0, r0 } \ + YieldProcessor(); \ + __asm { or r1, r1, r1 } \ + YieldProcessor(); \ + __asm { or r0, r0, r0 } \ + YieldProcessor(); \ + __asm { or r1, r1, r1 } \ + YieldProcessor(); \ + __asm { or r0, r0, r0 } \ + YieldProcessor(); \ + __asm { or r1, r1, r1 } +#else +LZHAM_FORCE_INLINE void lzham_yield_processor() +{ +#if LZHAM_USE_MSVC_INTRINSICS + #if LZHAM_PLATFORM_PC_X64 + _mm_pause(); + #else + YieldProcessor(); + #endif +#else + // No implementation +#endif +} +#endif + +#ifndef _MSC_VER + int sprintf_s(char *buffer, size_t sizeOfBuffer, const char *format, ...); + int vsprintf_s(char *buffer, size_t sizeOfBuffer, const char *format, va_list args); +#endif + +#if LZHAM_PLATFORM_X360 + #define LZHAM_MEMORY_EXPORT_BARRIER MemoryBarrier(); +#else + // Barriers shouldn't be necessary on x86/x64. + // TODO: Should use __sync_synchronize() on other platforms that support GCC. + #define LZHAM_MEMORY_EXPORT_BARRIER +#endif + +#if LZHAM_PLATFORM_X360 + #define LZHAM_MEMORY_IMPORT_BARRIER MemoryBarrier(); +#else + // Barriers shouldn't be necessary on x86/x64. + // TODO: Should use __sync_synchronize() on other platforms that support GCC. + #define LZHAM_MEMORY_IMPORT_BARRIER +#endif + +// Note: It's very important that LZHAM_READ_BIG_ENDIAN_UINT32() is fast on the target platform. +// This is used to read every DWORD from the input stream. + +#if LZHAM_USE_UNALIGNED_INT_LOADS + #if LZHAM_BIG_ENDIAN_CPU + #define LZHAM_READ_BIG_ENDIAN_UINT32(p) *reinterpret_cast(p) + #else + #if defined(LZHAM_USE_MSVC_INTRINSICS) + #define LZHAM_READ_BIG_ENDIAN_UINT32(p) _byteswap_ulong(*reinterpret_cast(p)) + #elif defined(__GNUC__) + #define LZHAM_READ_BIG_ENDIAN_UINT32(p) __builtin_bswap32(*reinterpret_cast(p)) + #else + #define LZHAM_READ_BIG_ENDIAN_UINT32(p) utils::swap32(*reinterpret_cast(p)) + #endif + #endif +#else + #define LZHAM_READ_BIG_ENDIAN_UINT32(p) ((reinterpret_cast(p)[0] << 24) | (reinterpret_cast(p)[1] << 16) | (reinterpret_cast(p)[2] << 8) | (reinterpret_cast(p)[3])) +#endif + +#if LZHAM_USE_WIN32_ATOMIC_FUNCTIONS + extern "C" __int64 _InterlockedCompareExchange64(__int64 volatile * Destination, __int64 Exchange, __int64 Comperand); + #if defined(_MSC_VER) + #pragma intrinsic(_InterlockedCompareExchange64) + #endif +#endif // LZHAM_USE_WIN32_ATOMIC_FUNCTIONS + +namespace lzham +{ +#if LZHAM_USE_WIN32_ATOMIC_FUNCTIONS + typedef LONG atomic32_t; + typedef LONGLONG atomic64_t; + + // Returns the original value. + inline atomic32_t atomic_compare_exchange32(atomic32_t volatile *pDest, atomic32_t exchange, atomic32_t comparand) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return InterlockedCompareExchange(pDest, exchange, comparand); + } + + // Returns the original value. + inline atomic64_t atomic_compare_exchange64(atomic64_t volatile *pDest, atomic64_t exchange, atomic64_t comparand) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 7) == 0); + return _InterlockedCompareExchange64(pDest, exchange, comparand); + } + + // Returns the resulting incremented value. + inline atomic32_t atomic_increment32(atomic32_t volatile *pDest) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return InterlockedIncrement(pDest); + } + + // Returns the resulting decremented value. + inline atomic32_t atomic_decrement32(atomic32_t volatile *pDest) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return InterlockedDecrement(pDest); + } + + // Returns the original value. + inline atomic32_t atomic_exchange32(atomic32_t volatile *pDest, atomic32_t val) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return InterlockedExchange(pDest, val); + } + + // Returns the resulting value. + inline atomic32_t atomic_add32(atomic32_t volatile *pDest, atomic32_t val) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return InterlockedExchangeAdd(pDest, val) + val; + } + + // Returns the original value. + inline atomic32_t atomic_exchange_add(atomic32_t volatile *pDest, atomic32_t val) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return InterlockedExchangeAdd(pDest, val); + } +#elif LZHAM_USE_GCC_ATOMIC_BUILTINS + typedef long atomic32_t; + typedef long long atomic64_t; + + // Returns the original value. + inline atomic32_t atomic_compare_exchange32(atomic32_t volatile *pDest, atomic32_t exchange, atomic32_t comparand) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return __sync_val_compare_and_swap(pDest, comparand, exchange); + } + + // Returns the original value. + inline atomic64_t atomic_compare_exchange64(atomic64_t volatile *pDest, atomic64_t exchange, atomic64_t comparand) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 7) == 0); + return __sync_val_compare_and_swap(pDest, comparand, exchange); + } + + // Returns the resulting incremented value. + inline atomic32_t atomic_increment32(atomic32_t volatile *pDest) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return __sync_add_and_fetch(pDest, 1); + } + + // Returns the resulting decremented value. + inline atomic32_t atomic_decrement32(atomic32_t volatile *pDest) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return __sync_sub_and_fetch(pDest, 1); + } + + // Returns the original value. + inline atomic32_t atomic_exchange32(atomic32_t volatile *pDest, atomic32_t val) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return __sync_lock_test_and_set(pDest, val); + } + + // Returns the resulting value. + inline atomic32_t atomic_add32(atomic32_t volatile *pDest, atomic32_t val) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return __sync_add_and_fetch(pDest, val); + } + + // Returns the original value. + inline atomic32_t atomic_exchange_add(atomic32_t volatile *pDest, atomic32_t val) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return __sync_fetch_and_add(pDest, val); + } +#else + #define LZHAM_NO_ATOMICS 1 + + // Atomic ops not supported - but try to do something reasonable. Assumes no threading at all. + typedef long atomic32_t; + typedef long long atomic64_t; + + inline atomic32_t atomic_compare_exchange32(atomic32_t volatile *pDest, atomic32_t exchange, atomic32_t comparand) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + atomic32_t cur = *pDest; + if (cur == comparand) + *pDest = exchange; + return cur; + } + + inline atomic64_t atomic_compare_exchange64(atomic64_t volatile *pDest, atomic64_t exchange, atomic64_t comparand) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 7) == 0); + atomic64_t cur = *pDest; + if (cur == comparand) + *pDest = exchange; + return cur; + } + + inline atomic32_t atomic_increment32(atomic32_t volatile *pDest) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return (*pDest += 1); + } + + inline atomic32_t atomic_decrement32(atomic32_t volatile *pDest) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return (*pDest -= 1); + } + + inline atomic32_t atomic_exchange32(atomic32_t volatile *pDest, atomic32_t val) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + atomic32_t cur = *pDest; + *pDest = val; + return cur; + } + + inline atomic32_t atomic_add32(atomic32_t volatile *pDest, atomic32_t val) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return (*pDest += val); + } + + inline atomic32_t atomic_exchange_add(atomic32_t volatile *pDest, atomic32_t val) + { + LZHAM_ASSERT((reinterpret_cast(pDest) & 3) == 0); + atomic32_t cur = *pDest; + *pDest += val; + return cur; + } + +#endif + +#if LZHAM_BUFFERED_PRINTF + void lzham_buffered_printf(const char *format, ...); + void lzham_flush_buffered_printf(); +#else + inline void lzham_buffered_printf(const char *format, ...) { (void)format; } + inline void lzham_flush_buffered_printf() { } +#endif + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/include/lzham_polar_codes.h b/r5dev/thirdparty/lzham/include/lzham_polar_codes.h new file mode 100644 index 00000000..c478d9d6 --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_polar_codes.h @@ -0,0 +1,14 @@ +// File: lzham_polar_codes.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +namespace lzham +{ + //const uint cPolarMaxSupportedSyms = 600; + const uint cPolarMaxSupportedSyms = 1024; + + uint get_generate_polar_codes_table_size(); + + bool generate_polar_codes(void* pContext, uint num_syms, const uint16* pFreq, uint8* pCodesizes, uint& max_code_size, uint& total_freq_ret); + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/include/lzham_prefix_coding.h b/r5dev/thirdparty/lzham/include/lzham_prefix_coding.h new file mode 100644 index 00000000..a22903d6 --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_prefix_coding.h @@ -0,0 +1,144 @@ +// File: lzham_prefix_coding.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +namespace lzham +{ + namespace prefix_coding + { + const uint cMaxExpectedCodeSize = 16; + const uint cMaxSupportedSyms = 1024; + const uint cMaxTableBits = 11; + + bool limit_max_code_size(uint num_syms, uint8* pCodesizes, uint max_code_size); + + bool generate_codes(uint num_syms, const uint8* pCodesizes, uint16* pCodes); + + class decoder_tables + { + public: + inline decoder_tables() : + m_table_shift(0), m_table_max_code(0), m_decode_start_code_size(0), m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) + { + } + + inline decoder_tables(const decoder_tables& other) : + m_table_shift(0), m_table_max_code(0), m_decode_start_code_size(0), m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) + { + *this = other; + } + + inline decoder_tables& operator= (const decoder_tables& rhs) + { + assign(rhs); + return *this; + } + + inline bool assign(const decoder_tables& rhs) + { + if (this == &rhs) + return true; + + uint32* pCur_lookup = m_lookup; + uint16* pCur_sorted_symbol_order = m_sorted_symbol_order; + + memcpy(this, &rhs, sizeof(*this)); + + if ((pCur_lookup) && (pCur_sorted_symbol_order) && (rhs.m_cur_lookup_size == m_cur_lookup_size) && (rhs.m_cur_sorted_symbol_order_size == m_cur_sorted_symbol_order_size)) + { + m_lookup = pCur_lookup; + m_sorted_symbol_order = pCur_sorted_symbol_order; + + memcpy(m_lookup, rhs.m_lookup, sizeof(m_lookup[0]) * m_cur_lookup_size); + memcpy(m_sorted_symbol_order, rhs.m_sorted_symbol_order, sizeof(m_sorted_symbol_order[0]) * m_cur_sorted_symbol_order_size); + } + else + { + lzham_delete_array(pCur_lookup); + m_lookup = NULL; + + if (rhs.m_lookup) + { + m_lookup = lzham_new_array(m_cur_lookup_size); + if (!m_lookup) + return false; + memcpy(m_lookup, rhs.m_lookup, sizeof(m_lookup[0]) * m_cur_lookup_size); + } + + lzham_delete_array(pCur_sorted_symbol_order); + m_sorted_symbol_order = NULL; + + if (rhs.m_sorted_symbol_order) + { + m_sorted_symbol_order = lzham_new_array(m_cur_sorted_symbol_order_size); + if (!m_sorted_symbol_order) + return false; + memcpy(m_sorted_symbol_order, rhs.m_sorted_symbol_order, sizeof(m_sorted_symbol_order[0]) * m_cur_sorted_symbol_order_size); + } + } + + return true; + } + + inline void clear() + { + if (m_lookup) + { + lzham_delete_array(m_lookup); + m_lookup = 0; + m_cur_lookup_size = 0; + } + + if (m_sorted_symbol_order) + { + lzham_delete_array(m_sorted_symbol_order); + m_sorted_symbol_order = NULL; + m_cur_sorted_symbol_order_size = 0; + } + } + + inline ~decoder_tables() + { + if (m_lookup) + lzham_delete_array(m_lookup); + + if (m_sorted_symbol_order) + lzham_delete_array(m_sorted_symbol_order); + } + + // DO NOT use any complex classes here - it is bitwise copied. + + uint m_num_syms; + uint m_total_used_syms; + uint m_table_bits; + uint m_table_shift; + uint m_table_max_code; + uint m_decode_start_code_size; + + uint8 m_min_code_size; + uint8 m_max_code_size; + + uint m_max_codes[cMaxExpectedCodeSize + 1]; + int m_val_ptrs[cMaxExpectedCodeSize + 1]; + + uint m_cur_lookup_size; + uint32* m_lookup; + + uint m_cur_sorted_symbol_order_size; + uint16* m_sorted_symbol_order; + + inline uint get_unshifted_max_code(uint len) const + { + LZHAM_ASSERT( (len >= 1) && (len <= cMaxExpectedCodeSize) ); + uint k = m_max_codes[len - 1]; + if (!k) + return UINT_MAX; + return (k - 1) >> (16 - len); + } + }; + + bool generate_decoder_tables(uint num_syms, const uint8* pCodesizes, decoder_tables* pTables, uint table_bits); + + } // namespace prefix_coding + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/include/lzham_pthreads_threading.h b/r5dev/thirdparty/lzham/include/lzham_pthreads_threading.h new file mode 100644 index 00000000..fe1de038 --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_pthreads_threading.h @@ -0,0 +1,383 @@ +// File: lzham_task_pool_pthreads.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +#if LZHAM_USE_PTHREADS_API + +#if LZHAM_NO_ATOMICS +#error No atomic operations defined in lzham_platform.h! +#endif + +#include +#include +#include + +namespace lzham +{ + class semaphore + { + LZHAM_NO_COPY_OR_ASSIGNMENT_OP(semaphore); + + public: + inline semaphore(long initialCount = 0, long maximumCount = 1, const char* pName = NULL) + { + LZHAM_NOTE_UNUSED(maximumCount), LZHAM_NOTE_UNUSED(pName); + LZHAM_ASSERT(maximumCount >= initialCount); + if (sem_init(&m_sem, 0, initialCount)) + { + LZHAM_FAIL("semaphore: sem_init() failed"); + } + } + + inline ~semaphore() + { + sem_destroy(&m_sem); + } + + inline void release(long releaseCount = 1) + { + LZHAM_ASSERT(releaseCount >= 1); + + int status = 0; +#ifdef WIN32 + if (1 == releaseCount) + status = sem_post(&m_sem); + else + status = sem_post_multiple(&m_sem, releaseCount); +#else + while (releaseCount > 0) + { + status = sem_post(&m_sem); + if (status) + break; + releaseCount--; + } +#endif + + if (status) + { + LZHAM_FAIL("semaphore: sem_post() or sem_post_multiple() failed"); + } + } + + inline bool wait(uint32 milliseconds = UINT32_MAX) + { + int status; + if (milliseconds == UINT32_MAX) + { + status = sem_wait(&m_sem); + } + else + { + struct timespec interval; + interval.tv_sec = milliseconds / 1000; + interval.tv_nsec = (milliseconds % 1000) * 1000000L; + status = sem_timedwait(&m_sem, &interval); + } + + if (status) + { + if (errno != ETIMEDOUT) + { + LZHAM_FAIL("semaphore: sem_wait() or sem_timedwait() failed"); + } + return false; + } + + return true; + } + + private: + sem_t m_sem; + }; + + class spinlock + { + public: + inline spinlock() + { + if (pthread_spin_init(&m_spinlock, 0)) + { + LZHAM_FAIL("spinlock: pthread_spin_init() failed"); + } + } + + inline ~spinlock() + { + pthread_spin_destroy(&m_spinlock); + } + + inline void lock() + { + if (pthread_spin_lock(&m_spinlock)) + { + LZHAM_FAIL("spinlock: pthread_spin_lock() failed"); + } + } + + inline void unlock() + { + if (pthread_spin_unlock(&m_spinlock)) + { + LZHAM_FAIL("spinlock: pthread_spin_unlock() failed"); + } + } + + private: + pthread_spinlock_t m_spinlock; + }; + + template + class tsstack + { + public: + inline tsstack() : m_top(0) + { + } + + inline ~tsstack() + { + } + + inline void clear() + { + m_spinlock.lock(); + m_top = 0; + m_spinlock.unlock(); + } + + inline bool try_push(const T& obj) + { + bool result = false; + m_spinlock.lock(); + if (m_top < (int)cMaxSize) + { + m_stack[m_top++] = obj; + result = true; + } + m_spinlock.unlock(); + return result; + } + + inline bool pop(T& obj) + { + bool result = false; + m_spinlock.lock(); + if (m_top > 0) + { + obj = m_stack[--m_top]; + result = true; + } + m_spinlock.unlock(); + return result; + } + + private: + spinlock m_spinlock; + T m_stack[cMaxSize]; + int m_top; + }; + + class task_pool + { + public: + task_pool(); + task_pool(uint num_threads); + ~task_pool(); + + enum { cMaxThreads = LZHAM_MAX_HELPER_THREADS }; + bool init(uint num_threads); + void deinit(); + + inline uint get_num_threads() const { return m_num_threads; } + inline uint get_num_outstanding_tasks() const { return m_num_outstanding_tasks; } + + // C-style task callback + typedef void (*task_callback_func)(uint64 data, void* pData_ptr); + bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = NULL); + + class executable_task + { + public: + virtual void execute_task(uint64 data, void* pData_ptr) = 0; + }; + + // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! + bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = NULL); + + template + inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = NULL); + + template + inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = NULL); + + void join(); + + private: + struct task + { + inline task() : m_data(0), m_pData_ptr(NULL), m_pObj(NULL), m_flags(0) { } + + uint64 m_data; + void* m_pData_ptr; + + union + { + task_callback_func m_callback; + executable_task* m_pObj; + }; + + uint m_flags; + }; + + tsstack m_task_stack; + + uint m_num_threads; + pthread_t m_threads[cMaxThreads]; + + semaphore m_tasks_available; + + enum task_flags + { + cTaskFlagObject = 1 + }; + + volatile atomic32_t m_num_outstanding_tasks; + volatile atomic32_t m_exit_flag; + + void process_task(task& tsk); + + static void* thread_func(void *pContext); + }; + + enum object_task_flags + { + cObjectTaskFlagDefault = 0, + cObjectTaskFlagDeleteAfterExecution = 1 + }; + + template + class object_task : public task_pool::executable_task + { + public: + object_task(uint flags = cObjectTaskFlagDefault) : + m_pObject(NULL), + m_pMethod(NULL), + m_flags(flags) + { + } + + typedef void (T::*object_method_ptr)(uint64 data, void* pData_ptr); + + object_task(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) : + m_pObject(pObject), + m_pMethod(pMethod), + m_flags(flags) + { + LZHAM_ASSERT(pObject && pMethod); + } + + void init(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) + { + LZHAM_ASSERT(pObject && pMethod); + + m_pObject = pObject; + m_pMethod = pMethod; + m_flags = flags; + } + + T* get_object() const { return m_pObject; } + object_method_ptr get_method() const { return m_pMethod; } + + virtual void execute_task(uint64 data, void* pData_ptr) + { + (m_pObject->*m_pMethod)(data, pData_ptr); + + if (m_flags & cObjectTaskFlagDeleteAfterExecution) + lzham_delete(this); + } + + protected: + T* m_pObject; + + object_method_ptr m_pMethod; + + uint m_flags; + }; + + template + inline bool task_pool::queue_object_task(S* pObject, T pObject_method, uint64 data, void* pData_ptr) + { + object_task *pTask = lzham_new< object_task >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); + if (!pTask) + return false; + return queue_task(pTask, data, pData_ptr); + } + + template + inline bool task_pool::queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr) + { + LZHAM_ASSERT(m_num_threads); + LZHAM_ASSERT(pObject); + LZHAM_ASSERT(num_tasks); + if (!num_tasks) + return true; + + bool status = true; + + uint i; + for (i = 0; i < num_tasks; i++) + { + task tsk; + + tsk.m_pObj = lzham_new< object_task >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); + if (!tsk.m_pObj) + { + status = false; + break; + } + + tsk.m_data = first_data + i; + tsk.m_pData_ptr = pData_ptr; + tsk.m_flags = cTaskFlagObject; + + if (!m_task_stack.try_push(tsk)) + { + status = false; + break; + } + } + + if (i) + { + atomic_add32(&m_num_outstanding_tasks, i); + + m_tasks_available.release(i); + } + + return status; + } + + inline void lzham_sleep(unsigned int milliseconds) + { +#ifdef WIN32 + struct timespec interval; + interval.tv_sec = milliseconds / 1000; + interval.tv_nsec = (milliseconds % 1000) * 1000000L; + pthread_delay_np(&interval); +#else + while (milliseconds) + { + int msecs_to_sleep = LZHAM_MIN(milliseconds, 1000); + usleep(msecs_to_sleep * 1000); + milliseconds -= msecs_to_sleep; + } +#endif + } + + // TODO: Implement + uint lzham_get_max_helper_threads(); + +} // namespace lzham + +#endif // LZHAM_USE_PTHREADS_API diff --git a/r5dev/thirdparty/lzham/include/lzham_symbol_codec.h b/r5dev/thirdparty/lzham/include/lzham_symbol_codec.h new file mode 100644 index 00000000..824a4c74 --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_symbol_codec.h @@ -0,0 +1,556 @@ +// File: lzham_symbol_codec.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once +#include "lzham_prefix_coding.h" + +namespace lzham +{ + class symbol_codec; + class adaptive_arith_data_model; + + const uint cSymbolCodecArithMinLen = 0x01000000U; + const uint cSymbolCodecArithMaxLen = 0xFFFFFFFFU; + + const uint cSymbolCodecArithProbBits = 11; + const uint cSymbolCodecArithProbScale = 1 << cSymbolCodecArithProbBits; + const uint cSymbolCodecArithProbHalfScale = 1 << (cSymbolCodecArithProbBits - 1); + const uint cSymbolCodecArithProbMoveBits = 5; + + typedef uint64 bit_cost_t; + const uint32 cBitCostScaleShift = 24; + const uint32 cBitCostScale = (1U << cBitCostScaleShift); + const bit_cost_t cBitCostMax = UINT64_MAX; + + inline bit_cost_t convert_to_scaled_bitcost(uint bits) { LZHAM_ASSERT(bits <= 255); uint32 scaled_bits = bits << cBitCostScaleShift; return static_cast(scaled_bits); } + + extern uint32 g_prob_cost[cSymbolCodecArithProbScale]; + + class raw_quasi_adaptive_huffman_data_model + { + public: + raw_quasi_adaptive_huffman_data_model(bool encoding = true, uint total_syms = 0, bool fast_encoding = false, bool use_polar_codes = false); + raw_quasi_adaptive_huffman_data_model(const raw_quasi_adaptive_huffman_data_model& other); + ~raw_quasi_adaptive_huffman_data_model(); + + bool assign(const raw_quasi_adaptive_huffman_data_model& rhs); + raw_quasi_adaptive_huffman_data_model& operator= (const raw_quasi_adaptive_huffman_data_model& rhs); + + void clear(); + + bool init(bool encoding, uint total_syms, bool fast_encoding, bool use_polar_codes, const uint16 *pInitial_sym_freq = NULL); + bool reset(); + + inline uint get_total_syms() const { return m_total_syms; } + + void rescale(); + void reset_update_rate(); + + bool update(uint sym); + + inline bit_cost_t get_cost(uint sym) const { return convert_to_scaled_bitcost(m_code_sizes[sym]); } + + public: + lzham::vector m_initial_sym_freq; + + lzham::vector m_sym_freq; + + lzham::vector m_codes; + lzham::vector m_code_sizes; + + prefix_coding::decoder_tables* m_pDecode_tables; + + uint m_total_syms; + + uint m_max_cycle; + uint m_update_cycle; + uint m_symbols_until_update; + + uint m_total_count; + + uint8 m_decoder_table_bits; + bool m_encoding; + bool m_fast_updating; + bool m_use_polar_codes; + + bool update(); + + friend class symbol_codec; + }; + + struct quasi_adaptive_huffman_data_model : public raw_quasi_adaptive_huffman_data_model + { +#if LZHAM_64BIT_POINTERS + // Ensures sizeof(quasi_adaptive_huffman_data_model) is 128 bytes on x64 (it's 64 on x86). + char m_unused_alignment[128 - sizeof(raw_quasi_adaptive_huffman_data_model)]; +#endif + }; + + class adaptive_bit_model + { + public: + adaptive_bit_model(); + adaptive_bit_model(float prob0); + adaptive_bit_model(const adaptive_bit_model& other); + + inline adaptive_bit_model& operator= (const adaptive_bit_model& rhs) { m_bit_0_prob = rhs.m_bit_0_prob; return *this; } + + inline void clear() { m_bit_0_prob = 1U << (cSymbolCodecArithProbBits - 1); } + + void set_probability_0(float prob0); + + inline void update(uint bit) + { + if (!bit) + m_bit_0_prob += ((cSymbolCodecArithProbScale - m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); + else + m_bit_0_prob -= (m_bit_0_prob >> cSymbolCodecArithProbMoveBits); + LZHAM_ASSERT(m_bit_0_prob >= 1); + LZHAM_ASSERT(m_bit_0_prob < cSymbolCodecArithProbScale); + } + + inline bit_cost_t get_cost(uint bit) const { return g_prob_cost[bit ? (cSymbolCodecArithProbScale - m_bit_0_prob) : m_bit_0_prob]; } + + public: + uint16 m_bit_0_prob; + + friend class symbol_codec; + friend class adaptive_arith_data_model; + }; + + // This class is not actually used by LZHAM - it's only here for comparison/experimental purposes. + class adaptive_arith_data_model + { + public: + adaptive_arith_data_model(bool encoding = true, uint total_syms = 0); + adaptive_arith_data_model(const adaptive_arith_data_model& other); + ~adaptive_arith_data_model(); + + adaptive_arith_data_model& operator= (const adaptive_arith_data_model& rhs); + + void clear(); + + bool init(bool encoding, uint total_syms); + bool init(bool encoding, uint total_syms, bool fast_encoding, bool use_polar_codes = false) { LZHAM_NOTE_UNUSED(fast_encoding), LZHAM_NOTE_UNUSED(use_polar_codes); return init(encoding, total_syms); } + void reset(); + + void reset_update_rate(); + + bool update(uint sym); + + uint get_total_syms() const { return m_total_syms; } + bit_cost_t get_cost(uint sym) const; + + public: + uint m_total_syms; + typedef lzham::vector adaptive_bit_model_vector; + adaptive_bit_model_vector m_probs; + + friend class symbol_codec; + }; + +#if LZHAM_CPU_HAS_64BIT_REGISTERS + #define LZHAM_SYMBOL_CODEC_USE_64_BIT_BUFFER 1 +#else + #define LZHAM_SYMBOL_CODEC_USE_64_BIT_BUFFER 0 +#endif + + class symbol_codec + { + public: + symbol_codec(); + + void reset(); + + // clear() is like reset(), except it also frees all memory. + void clear(); + + // Encoding + bool start_encoding(uint expected_file_size); + bool encode_bits(uint bits, uint num_bits); + bool encode_arith_init(); + bool encode_align_to_byte(); + bool encode(uint sym, quasi_adaptive_huffman_data_model& model); + bool encode(uint bit, adaptive_bit_model& model, bool update_model = true); + bool encode(uint sym, adaptive_arith_data_model& model); + + inline uint encode_get_total_bits_written() const { return m_total_bits_written; } + + bool stop_encoding(bool support_arith); + + const lzham::vector& get_encoding_buf() const { return m_output_buf; } + lzham::vector& get_encoding_buf() { return m_output_buf; } + + // Decoding + + typedef void (*need_bytes_func_ptr)(size_t num_bytes_consumed, void *pPrivate_data, const uint8* &pBuf, size_t &buf_size, bool &eof_flag); + + bool start_decoding(const uint8* pBuf, size_t buf_size, bool eof_flag = true, need_bytes_func_ptr pNeed_bytes_func = NULL, void *pPrivate_data = NULL); + + inline void decode_set_input_buffer(const uint8* pBuf, size_t buf_size, const uint8* pBuf_next, bool eof_flag) + { + m_pDecode_buf = pBuf; + m_pDecode_buf_next = pBuf_next; + m_decode_buf_size = buf_size; + m_pDecode_buf_end = pBuf + buf_size; + m_decode_buf_eof = eof_flag; + } + inline uint64 decode_get_bytes_consumed() const { return m_pDecode_buf_next - m_pDecode_buf; } + inline uint64 decode_get_bits_remaining() const { return ((m_pDecode_buf_end - m_pDecode_buf_next) << 3) + m_bit_count; } + + void start_arith_decoding(); + uint decode_bits(uint num_bits); + uint decode_peek_bits(uint num_bits); + void decode_remove_bits(uint num_bits); + void decode_align_to_byte(); + int decode_remove_byte_from_bit_buf(); + uint decode(quasi_adaptive_huffman_data_model& model); + uint decode(adaptive_bit_model& model, bool update_model = true); + uint decode(adaptive_arith_data_model& model); + uint64 stop_decoding(); + + uint get_total_model_updates() const { return m_total_model_updates; } + + public: + const uint8* m_pDecode_buf; + const uint8* m_pDecode_buf_next; + const uint8* m_pDecode_buf_end; + size_t m_decode_buf_size; + bool m_decode_buf_eof; + + need_bytes_func_ptr m_pDecode_need_bytes_func; + void* m_pDecode_private_data; + +#if LZHAM_SYMBOL_CODEC_USE_64_BIT_BUFFER + typedef uint64 bit_buf_t; + enum { cBitBufSize = 64 }; +#else + typedef uint32 bit_buf_t; + enum { cBitBufSize = 32 }; +#endif + + bit_buf_t m_bit_buf; + int m_bit_count; + + uint m_total_model_updates; + + lzham::vector m_output_buf; + lzham::vector m_arith_output_buf; + + struct output_symbol + { + uint m_bits; + + enum + { + cArithSym = -1, + cAlignToByteSym = -2, + cArithInit = -3 + }; + int16 m_num_bits; + + uint16 m_arith_prob0; + }; + lzham::vector m_output_syms; + + uint m_total_bits_written; + + uint m_arith_base; + uint m_arith_value; + uint m_arith_length; + uint m_arith_total_bits; + + quasi_adaptive_huffman_data_model* m_pSaved_huff_model; + void* m_pSaved_model; + uint m_saved_node_index; + + bool put_bits_init(uint expected_size); + bool record_put_bits(uint bits, uint num_bits); + + void arith_propagate_carry(); + bool arith_renorm_enc_interval(); + void arith_start_encoding(); + bool arith_stop_encoding(); + + bool put_bits(uint bits, uint num_bits); + bool put_bits_align_to_byte(); + bool flush_bits(); + bool assemble_output_buf(); + + uint get_bits(uint num_bits); + void remove_bits(uint num_bits); + + void decode_need_bytes(); + + enum + { + cNull, + cEncoding, + cDecoding + } m_mode; + }; + +// Optional macros for faster decompression. These macros implement the symbol_codec class's decode functionality. +// This is hard to debug (and just plain ugly), but using these macros eliminate function calls, and they place the most important +// member variables on the stack so they're hopefully put in registers (avoiding horrible load hit stores on some CPU's). +// The user must define the LZHAM_DECODE_NEEDS_BYTES macro, which is invoked when the decode buffer is exhausted. + +#define LZHAM_SYMBOL_CODEC_DECODE_DECLARE(codec) \ + uint arith_value = 0; \ + uint arith_length = 0; \ + symbol_codec::bit_buf_t bit_buf = 0; \ + int bit_count = 0; \ + const uint8* pDecode_buf_next = NULL; + +#define LZHAM_SYMBOL_CODEC_DECODE_BEGIN(codec) \ + arith_value = codec.m_arith_value; \ + arith_length = codec.m_arith_length; \ + bit_buf = codec.m_bit_buf; \ + bit_count = codec.m_bit_count; \ + pDecode_buf_next = codec.m_pDecode_buf_next; + +#define LZHAM_SYMBOL_CODEC_DECODE_END(codec) \ + codec.m_arith_value = arith_value; \ + codec.m_arith_length = arith_length; \ + codec.m_bit_buf = bit_buf; \ + codec.m_bit_count = bit_count; \ + codec.m_pDecode_buf_next = pDecode_buf_next; + +// The user must declare the LZHAM_DECODE_NEEDS_BYTES macro. + +#define LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, result, num_bits) \ +{ \ + while (LZHAM_BUILTIN_EXPECT(bit_count < (int)(num_bits), 0)) \ + { \ + uint r; \ + if (LZHAM_BUILTIN_EXPECT(pDecode_buf_next == codec.m_pDecode_buf_end, 0)) \ + { \ + if (LZHAM_BUILTIN_EXPECT(!codec.m_decode_buf_eof, 1)) \ + { \ + LZHAM_SYMBOL_CODEC_DECODE_END(codec) \ + LZHAM_DECODE_NEEDS_BYTES \ + LZHAM_SYMBOL_CODEC_DECODE_BEGIN(codec) \ + } \ + r = 0; \ + if (LZHAM_BUILTIN_EXPECT(pDecode_buf_next < codec.m_pDecode_buf_end, 1)) r = *pDecode_buf_next++; \ + } \ + else \ + r = *pDecode_buf_next++; \ + bit_count += 8; \ + bit_buf |= (static_cast(r) << (symbol_codec::cBitBufSize - bit_count)); \ + } \ + result = (num_bits) ? static_cast(bit_buf >> (symbol_codec::cBitBufSize - (num_bits))) : 0; \ + bit_buf <<= (num_bits); \ + bit_count -= (num_bits); \ +} + +#define LZHAM_SYMBOL_CODEC_DECODE_ARITH_BIT(codec, result, model) \ +{ \ + adaptive_bit_model *pModel; \ + pModel = &model; \ + while (LZHAM_BUILTIN_EXPECT(arith_length < cSymbolCodecArithMinLen, 0)) \ + { \ + uint c; codec.m_pSaved_model = pModel; \ + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, c, 8); \ + pModel = static_cast(codec.m_pSaved_model); \ + arith_value = (arith_value << 8) | c; \ + arith_length <<= 8; \ + } \ + uint x = pModel->m_bit_0_prob * (arith_length >> cSymbolCodecArithProbBits); \ + result = (arith_value >= x); \ + if (!result) \ + { \ + pModel->m_bit_0_prob += ((cSymbolCodecArithProbScale - pModel->m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); \ + arith_length = x; \ + } \ + else \ + { \ + pModel->m_bit_0_prob -= (pModel->m_bit_0_prob >> cSymbolCodecArithProbMoveBits); \ + arith_value -= x; \ + arith_length -= x; \ + } \ +} + +#define LZHAM_SYMBOL_CODEC_DECODE_ADAPTIVE_ARITHMETIC(codec, result, model) \ +{ \ + adaptive_arith_data_model *pArith_data_model; \ + pArith_data_model = &model; \ + uint node_index; \ + node_index = 1; \ + do \ + { \ + while (LZHAM_BUILTIN_EXPECT(arith_length < cSymbolCodecArithMinLen, 0)) \ + { \ + uint c; codec.m_saved_node_index = node_index; codec.m_pSaved_model = pArith_data_model; \ + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, c, 8); \ + node_index = codec.m_saved_node_index; pArith_data_model = static_cast(codec.m_pSaved_model); \ + arith_value = (arith_value << 8) | c; \ + arith_length <<= 8; \ + } \ + adaptive_bit_model *pBit_model; pBit_model = &pArith_data_model->m_probs[node_index]; \ + uint x = pBit_model->m_bit_0_prob * (arith_length >> cSymbolCodecArithProbBits); \ + uint bit; bit = (arith_value >= x); \ + if (!bit) \ + { \ + pBit_model->m_bit_0_prob += ((cSymbolCodecArithProbScale - pBit_model->m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); \ + arith_length = x; \ + } \ + else \ + { \ + pBit_model->m_bit_0_prob -= (pBit_model->m_bit_0_prob >> cSymbolCodecArithProbMoveBits); \ + arith_value -= x; \ + arith_length -= x; \ + } \ + node_index = (node_index << 1) + bit; \ + } while (node_index < pArith_data_model->m_total_syms); \ + result = node_index - pArith_data_model->m_total_syms; \ +} + +#if LZHAM_SYMBOL_CODEC_USE_64_BIT_BUFFER +#define LZHAM_SYMBOL_CODEC_DECODE_ADAPTIVE_HUFFMAN(codec, result, model) \ +{ \ + quasi_adaptive_huffman_data_model* pModel; const prefix_coding::decoder_tables* pTables; \ + pModel = &model; pTables = model.m_pDecode_tables; \ + if (LZHAM_BUILTIN_EXPECT(bit_count < 24, 0)) \ + { \ + uint c; \ + pDecode_buf_next += sizeof(uint32); \ + if (LZHAM_BUILTIN_EXPECT(pDecode_buf_next >= codec.m_pDecode_buf_end, 0)) \ + { \ + pDecode_buf_next -= sizeof(uint32); \ + while (bit_count < 24) \ + { \ + if (!codec.m_decode_buf_eof) \ + { \ + codec.m_pSaved_huff_model = pModel; \ + LZHAM_SYMBOL_CODEC_DECODE_END(codec) \ + LZHAM_DECODE_NEEDS_BYTES \ + LZHAM_SYMBOL_CODEC_DECODE_BEGIN(codec) \ + pModel = codec.m_pSaved_huff_model; pTables = pModel->m_pDecode_tables; \ + } \ + c = 0; if (pDecode_buf_next < codec.m_pDecode_buf_end) c = *pDecode_buf_next++; \ + bit_count += 8; \ + bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ + } \ + } \ + else \ + { \ + c = LZHAM_READ_BIG_ENDIAN_UINT32(pDecode_buf_next - sizeof(uint32)); \ + bit_count += 32; \ + bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ + } \ + } \ + uint k = static_cast((bit_buf >> (symbol_codec::cBitBufSize - 16)) + 1); \ + uint len; \ + if (LZHAM_BUILTIN_EXPECT(k <= pTables->m_table_max_code, 1)) \ + { \ + uint32 t = pTables->m_lookup[bit_buf >> (symbol_codec::cBitBufSize - pTables->m_table_bits)]; \ + result = t & UINT16_MAX; \ + len = t >> 16; \ + } \ + else \ + { \ + len = pTables->m_decode_start_code_size; \ + for ( ; ; ) \ + { \ + if (LZHAM_BUILTIN_EXPECT(k <= pTables->m_max_codes[len - 1], 0)) \ + break; \ + len++; \ + } \ + int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast(bit_buf >> (symbol_codec::cBitBufSize - len)); \ + if (((uint)val_ptr >= pModel->m_total_syms)) val_ptr = 0; \ + result = pTables->m_sorted_symbol_order[val_ptr]; \ + } \ + bit_buf <<= len; \ + bit_count -= len; \ + uint freq = pModel->m_sym_freq[result]; \ + freq++; \ + pModel->m_sym_freq[result] = static_cast(freq); \ + LZHAM_ASSERT(freq <= UINT16_MAX); \ + if (LZHAM_BUILTIN_EXPECT(--pModel->m_symbols_until_update == 0, 0)) \ + { \ + pModel->update(); \ + } \ +} +#else +#define LZHAM_SYMBOL_CODEC_DECODE_ADAPTIVE_HUFFMAN(codec, result, model) \ +{ \ + quasi_adaptive_huffman_data_model* pModel; const prefix_coding::decoder_tables* pTables; \ + pModel = &model; pTables = model.m_pDecode_tables; \ + while (LZHAM_BUILTIN_EXPECT(bit_count < (symbol_codec::cBitBufSize - 8), 1)) \ + { \ + uint c; \ + if (LZHAM_BUILTIN_EXPECT(pDecode_buf_next == codec.m_pDecode_buf_end, 0)) \ + { \ + if (LZHAM_BUILTIN_EXPECT(!codec.m_decode_buf_eof, 1)) \ + { \ + codec.m_pSaved_huff_model = pModel; \ + LZHAM_SYMBOL_CODEC_DECODE_END(codec) \ + LZHAM_DECODE_NEEDS_BYTES \ + LZHAM_SYMBOL_CODEC_DECODE_BEGIN(codec) \ + pModel = codec.m_pSaved_huff_model; pTables = pModel->m_pDecode_tables; \ + } \ + c = 0; if (LZHAM_BUILTIN_EXPECT(pDecode_buf_next < codec.m_pDecode_buf_end, 1)) c = *pDecode_buf_next++; \ + } \ + else \ + c = *pDecode_buf_next++; \ + bit_count += 8; \ + bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ + } \ + uint k = static_cast((bit_buf >> (symbol_codec::cBitBufSize - 16)) + 1); \ + uint len; \ + if (LZHAM_BUILTIN_EXPECT(k <= pTables->m_table_max_code, 1)) \ + { \ + uint32 t = pTables->m_lookup[bit_buf >> (symbol_codec::cBitBufSize - pTables->m_table_bits)]; \ + result = t & UINT16_MAX; \ + len = t >> 16; \ + } \ + else \ + { \ + len = pTables->m_decode_start_code_size; \ + for ( ; ; ) \ + { \ + if (LZHAM_BUILTIN_EXPECT(k <= pTables->m_max_codes[len - 1], 0)) \ + break; \ + len++; \ + } \ + int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast(bit_buf >> (symbol_codec::cBitBufSize - len)); \ + if (LZHAM_BUILTIN_EXPECT(((uint)val_ptr >= pModel->m_total_syms), 0)) val_ptr = 0; \ + result = pTables->m_sorted_symbol_order[val_ptr]; \ + } \ + bit_buf <<= len; \ + bit_count -= len; \ + uint freq = pModel->m_sym_freq[result]; \ + freq++; \ + pModel->m_sym_freq[result] = static_cast(freq); \ + LZHAM_ASSERT(freq <= UINT16_MAX); \ + if (LZHAM_BUILTIN_EXPECT(--pModel->m_symbols_until_update == 0, 0)) \ + { \ + pModel->update(); \ + } \ +} +#endif + +#define LZHAM_SYMBOL_CODEC_DECODE_ALIGN_TO_BYTE(codec) if (bit_count & 7) { int dummy_result; LZHAM_NOTE_UNUSED(dummy_result); LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, dummy_result, bit_count & 7); } + +#define LZHAM_SYMBOL_CODEC_DECODE_REMOVE_BYTE_FROM_BIT_BUF(codec, result) \ +{ \ + result = -1; \ + if (bit_count >= 8) \ + { \ + result = static_cast(bit_buf >> (symbol_codec::cBitBufSize - 8)); \ + bit_buf <<= 8; \ + bit_count -= 8; \ + } \ +} + +#define LZHAM_SYMBOL_CODEC_DECODE_ARITH_START(codec) \ +{ \ + for ( arith_value = 0, arith_length = 0; arith_length < 4; ++arith_length ) \ + { \ + uint val; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, val, 8); \ + arith_value = (arith_value << 8) | val; \ + } \ + arith_length = cSymbolCodecArithMaxLen; \ +} + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/include/lzham_threading.h b/r5dev/thirdparty/lzham/include/lzham_threading.h new file mode 100644 index 00000000..b8a1dbef --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_threading.h @@ -0,0 +1,12 @@ +// File: lzham_threading.h +// See Copyright Notice and license at the end of include/lzham.h + +#if LZHAM_USE_WIN32_API + #include "lzham_win32_threading.h" +#elif LZHAM_USE_PTHREADS_API + #include "lzham_pthreads_threading.h" +#else + #include "lzham_null_threading.h" +#endif + + diff --git a/r5dev/thirdparty/lzham/include/lzham_timer.h b/r5dev/thirdparty/lzham/include/lzham_timer.h new file mode 100644 index 00000000..a522430a --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_timer.h @@ -0,0 +1,99 @@ +// File: lzham_timer.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +namespace lzham +{ + typedef unsigned long long timer_ticks; + + class lzham_timer + { + public: + lzham_timer(); + lzham_timer(timer_ticks start_ticks); + + void start(); + void start(timer_ticks start_ticks); + + void stop(); + + double get_elapsed_secs() const; + inline double get_elapsed_ms() const { return get_elapsed_secs() * 1000.0f; } + timer_ticks get_elapsed_us() const; + + static void init(); + static inline timer_ticks get_ticks_per_sec() { return g_freq; } + static timer_ticks get_init_ticks(); + static timer_ticks get_ticks(); + static double ticks_to_secs(timer_ticks ticks); + static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; } + static inline double get_secs() { return ticks_to_secs(get_ticks()); } + static inline double get_ms() { return ticks_to_ms(get_ticks()); } + + private: + static timer_ticks g_init_ticks; + static timer_ticks g_freq; + static double g_inv_freq; + + timer_ticks m_start_time; + timer_ticks m_stop_time; + + bool m_started : 1; + bool m_stopped : 1; + }; + + enum var_args_t { cVarArgs }; + +#if LZHAM_PERF_SECTIONS + class scoped_perf_section + { + public: + inline scoped_perf_section() : + m_start_ticks(lzham_timer::get_ticks()) + { + m_name[0] = '?'; + m_name[1] = '\0'; + } + + inline scoped_perf_section(const char *pName) : + m_start_ticks(lzham_timer::get_ticks()) + { + strcpy_s(m_name, pName); + + lzham_buffered_printf("Thread: 0x%08X, BEGIN Time: %3.3fms, Section: %s\n", GetCurrentThreadId(), lzham_timer::ticks_to_ms(m_start_ticks), m_name); + } + + inline scoped_perf_section(var_args_t, const char *pName, ...) : + m_start_ticks(lzham_timer::get_ticks()) + { + va_list args; + va_start(args, pName); + vsprintf_s(m_name, sizeof(m_name), pName, args); + va_end(args); + + lzham_buffered_printf("Thread: 0x%08X, BEGIN Time: %3.3fms, Section: %s\n", GetCurrentThreadId(), lzham_timer::ticks_to_ms(m_start_ticks), m_name); + } + + inline ~scoped_perf_section() + { + double end_ms = lzham_timer::get_ms(); + double start_ms = lzham_timer::ticks_to_ms(m_start_ticks); + + lzham_buffered_printf("Thread: 0x%08X, END Time: %3.3fms, Total: %3.3fms, Section: %s\n", GetCurrentThreadId(), end_ms, end_ms - start_ms, m_name); + } + + private: + char m_name[64]; + timer_ticks m_start_ticks; + }; +#else + class scoped_perf_section + { + public: + inline scoped_perf_section() { } + inline scoped_perf_section(const char *pName) { (void)pName; } + inline scoped_perf_section(var_args_t, const char *pName, ...) { (void)pName; } + }; +#endif // LZHAM_PERF_SECTIONS + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/include/lzham_traits.h b/r5dev/thirdparty/lzham/include/lzham_traits.h new file mode 100644 index 00000000..fbb68ce6 --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_traits.h @@ -0,0 +1,137 @@ +// File: lzham_traits.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +namespace lzham +{ + template + struct scalar_type + { + enum { cFlag = false }; + static inline void construct(T* p) { helpers::construct(p); } + static inline void construct(T* p, const T& init) { helpers::construct(p, init); } + static inline void construct_array(T* p, uint n) { helpers::construct_array(p, n); } + static inline void destruct(T* p) { helpers::destruct(p); } + static inline void destruct_array(T* p, uint n) { helpers::destruct_array(p, n); } + }; + + template struct scalar_type + { + enum { cFlag = true }; + static inline void construct(T** p) { memset(p, 0, sizeof(T*)); } + static inline void construct(T** p, T* init) { *p = init; } + static inline void construct_array(T** p, uint n) { memset(p, 0, sizeof(T*) * n); } + static inline void destruct(T** p) { LZHAM_NOTE_UNUSED(p); } + static inline void destruct_array(T** p, uint n) { LZHAM_NOTE_UNUSED(p); LZHAM_NOTE_UNUSED(n); } + }; + +#define LZHAM_DEFINE_BUILT_IN_TYPE(X) \ + template<> struct scalar_type { \ + enum { cFlag = true }; \ + static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \ + static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \ + static inline void construct_array(X* p, uint n) { memset(p, 0, sizeof(X) * n); } \ + static inline void destruct(X* p) { LZHAM_NOTE_UNUSED(p); } \ + static inline void destruct_array(X* p, uint n) { LZHAM_NOTE_UNUSED(p); LZHAM_NOTE_UNUSED(n); } }; + + LZHAM_DEFINE_BUILT_IN_TYPE(bool) + LZHAM_DEFINE_BUILT_IN_TYPE(char) + LZHAM_DEFINE_BUILT_IN_TYPE(unsigned char) + LZHAM_DEFINE_BUILT_IN_TYPE(short) + LZHAM_DEFINE_BUILT_IN_TYPE(unsigned short) + LZHAM_DEFINE_BUILT_IN_TYPE(int) + LZHAM_DEFINE_BUILT_IN_TYPE(unsigned int) + LZHAM_DEFINE_BUILT_IN_TYPE(long) + LZHAM_DEFINE_BUILT_IN_TYPE(unsigned long) + LZHAM_DEFINE_BUILT_IN_TYPE(float) + LZHAM_DEFINE_BUILT_IN_TYPE(double) + LZHAM_DEFINE_BUILT_IN_TYPE(long double) + #if defined(WIN32) + LZHAM_DEFINE_BUILT_IN_TYPE(__int64) + LZHAM_DEFINE_BUILT_IN_TYPE(unsigned __int64) + #endif + +#undef LZHAM_DEFINE_BUILT_IN_TYPE + +// See: http://erdani.org/publications/cuj-2004-06.pdf + + template + struct bitwise_movable { enum { cFlag = false }; }; + +// Defines type Q as bitwise movable. +#define LZHAM_DEFINE_BITWISE_MOVABLE(Q) template<> struct bitwise_movable { enum { cFlag = true }; }; + + template + struct bitwise_copyable { enum { cFlag = false }; }; + + // Defines type Q as bitwise copyable. +#define LZHAM_DEFINE_BITWISE_COPYABLE(Q) template<> struct bitwise_copyable { enum { cFlag = true }; }; + +#define LZHAM_IS_POD(T) __is_pod(T) + +#define LZHAM_IS_SCALAR_TYPE(T) (scalar_type::cFlag) + +#define LZHAM_IS_BITWISE_COPYABLE(T) ((scalar_type::cFlag) || (bitwise_copyable::cFlag) || LZHAM_IS_POD(T)) + +#define LZHAM_IS_BITWISE_MOVABLE(T) (LZHAM_IS_BITWISE_COPYABLE(T) || (bitwise_movable::cFlag)) + +#define LZHAM_HAS_DESTRUCTOR(T) ((!scalar_type::cFlag) && (!__is_pod(T))) + + // From yasli_traits.h: + // Credit goes to Boost; + // also found in the C++ Templates book by Vandevoorde and Josuttis + + typedef char (&yes_t)[1]; + typedef char (&no_t)[2]; + + template yes_t class_test(int U::*); + template no_t class_test(...); + + template struct is_class + { + enum { value = (sizeof(class_test(0)) == sizeof(yes_t)) }; + }; + + template struct is_pointer + { + enum { value = false }; + }; + + template struct is_pointer + { + enum { value = true }; + }; + + LZHAM_DEFINE_BITWISE_COPYABLE(empty_type); + LZHAM_DEFINE_BITWISE_MOVABLE(empty_type); + + namespace helpers + { + template + inline void construct_array(T* p, uint n) + { + if (LZHAM_IS_SCALAR_TYPE(T)) + { + memset(p, 0, sizeof(T) * n); + } + else + { + T* q = p + n; + for ( ; p != q; ++p) + new (static_cast(p)) T; + } + } + + template + inline void destruct_array(T* p, uint n) + { + if ( LZHAM_HAS_DESTRUCTOR(T) ) + { + T* q = p + n; + for ( ; p != q; ++p) + p->~T(); + } + } + } + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/include/lzham_types.h b/r5dev/thirdparty/lzham/include/lzham_types.h new file mode 100644 index 00000000..a0227e8a --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_types.h @@ -0,0 +1,74 @@ +// File: types.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +namespace lzham +{ + typedef unsigned char uint8; + typedef signed char int8; + typedef unsigned char uint8; + typedef unsigned short uint16; + typedef signed short int16; + typedef unsigned int uint32; + typedef uint32 uint; + typedef signed int int32; + + #ifdef __GNUC__ + typedef unsigned long long uint64; + typedef long long int64; + #else + typedef unsigned __int64 uint64; + typedef signed __int64 int64; + #endif + + const uint8 UINT8_MIN = 0; + //const uint8 UINT8_MAX = 0xFFU; + const uint16 UINT16_MIN = 0; + //const uint16 UINT16_MAX = 0xFFFFU; + const uint32 UINT32_MIN = 0; + //const uint32 UINT32_MAX = 0xFFFFFFFFU; + const uint64 UINT64_MIN = 0; + //const uint64 UINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64; + + //const int8 INT8_MIN = -128; + //const int8 INT8_MAX = 127; + //const int16 INT16_MIN = -32768; + //const int16 INT16_MAX = 32767; + //const int32 INT32_MIN = (-2147483647 - 1); + //const int32 INT32_MAX = 2147483647; + //const int64 INT64_MIN = (int64)0x8000000000000000ULL; //(-9223372036854775807i64 - 1); + //const int64 INT64_MAX = (int64)0x7FFFFFFFFFFFFFFFULL; //9223372036854775807i64; + +#if LZHAM_64BIT_POINTERS + typedef uint64 uint_ptr; + typedef uint64 uint32_ptr; + typedef int64 signed_size_t; + typedef uint64 ptr_bits_t; + const ptr_bits_t PTR_BITS_XOR = 0xDB0DD4415C87DCF7ULL; +#else + typedef unsigned int uint_ptr; + typedef unsigned int uint32_ptr; + typedef signed int signed_size_t; + typedef uint32 ptr_bits_t; + const ptr_bits_t PTR_BITS_XOR = 0x5C87DCF7UL; +#endif + + enum + { + cInvalidIndex = -1 + }; + + const uint cIntBits = sizeof(uint) * CHAR_BIT; + + template struct int_traits { enum { cMin = INT_MIN, cMax = INT_MAX, cSigned = true }; }; + template<> struct int_traits { enum { cMin = INT8_MIN, cMax = INT8_MAX, cSigned = true }; }; + template<> struct int_traits { enum { cMin = INT16_MIN, cMax = INT16_MAX, cSigned = true }; }; + template<> struct int_traits { enum { cMin = INT32_MIN, cMax = INT32_MAX, cSigned = true }; }; + + template<> struct int_traits { enum { cMin = 0, cMax = UINT_MAX, cSigned = false }; }; + template<> struct int_traits { enum { cMin = 0, cMax = UINT8_MAX, cSigned = false }; }; + template<> struct int_traits { enum { cMin = 0, cMax = UINT16_MAX, cSigned = false }; }; + + struct empty_type { }; + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/include/lzham_utils.h b/r5dev/thirdparty/lzham/include/lzham_utils.h new file mode 100644 index 00000000..0e8f5e8b --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_utils.h @@ -0,0 +1,58 @@ +// File: lzham_utils.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +#define LZHAM_GET_ALIGNMENT(v) ((!sizeof(v)) ? 1 : (__alignof(v) ? __alignof(v) : sizeof(uint32))) + +#define LZHAM_MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define LZHAM_MAX(a, b) (((a) < (b)) ? (b) : (a)) + +template T decay_array_to_subtype(T (&a)[N]); +#define LZHAM_ARRAY_SIZE(X) (sizeof(X) / sizeof(decay_array_to_subtype(X))) + +namespace lzham +{ + namespace utils + { + template inline void swap(T& l, T& r) + { + T temp(l); + l = r; + r = temp; + } + + template inline void zero_object(T& obj) + { + memset(&obj, 0, sizeof(obj)); + } + + static inline uint32 swap32(uint32 x) { return ((x << 24U) | ((x << 8U) & 0x00FF0000U) | ((x >> 8U) & 0x0000FF00U) | (x >> 24U)); } + + inline uint count_leading_zeros16(uint v) + { + LZHAM_ASSERT(v < 0x10000); + + uint temp; + uint n = 16; + + temp = v >> 8; + if (temp) { n -= 8; v = temp; } + + temp = v >> 4; + if (temp) { n -= 4; v = temp; } + + temp = v >> 2; + if (temp) { n -= 2; v = temp; } + + temp = v >> 1; + if (temp) { n -= 1; v = temp; } + + if (v & 1) n--; + + return n; + } + + } // namespace utils + +} // namespace lzham + diff --git a/r5dev/thirdparty/lzham/include/lzham_vector.h b/r5dev/thirdparty/lzham/include/lzham_vector.h new file mode 100644 index 00000000..90f3236d --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_vector.h @@ -0,0 +1,588 @@ +// File: lzham_vector.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +namespace lzham +{ + struct elemental_vector + { + void* m_p; + uint m_size; + uint m_capacity; + + typedef void (*object_mover)(void* pDst, void* pSrc, uint num); + + bool increase_capacity(uint min_new_capacity, bool grow_hint, uint element_size, object_mover pRelocate, bool nofail); + }; + + template + class vector : public helpers::rel_ops< vector > + { + public: + typedef T* iterator; + typedef const T* const_iterator; + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + + inline vector() : + m_p(NULL), + m_size(0), + m_capacity(0) + { + } + + inline vector(uint n, const T& init) : + m_p(NULL), + m_size(0), + m_capacity(0) + { + increase_capacity(n, false); + helpers::construct_array(m_p, n, init); + m_size = n; + } + + inline vector(const vector& other) : + m_p(NULL), + m_size(0), + m_capacity(0) + { + increase_capacity(other.m_size, false); + + m_size = other.m_size; + + if (LZHAM_IS_BITWISE_COPYABLE(T)) + memcpy(m_p, other.m_p, m_size * sizeof(T)); + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (uint i = m_size; i > 0; i--) + helpers::construct(pDst++, *pSrc++); + } + } + + inline explicit vector(uint size) : + m_p(NULL), + m_size(0), + m_capacity(0) + { + try_resize(size); + } + + inline ~vector() + { + if (m_p) + { + scalar_type::destruct_array(m_p, m_size); + lzham_free(m_p); + } + } + + inline vector& operator= (const vector& other) + { + if (this == &other) + return *this; + + if (m_capacity >= other.m_size) + try_resize(0); + else + { + clear(); + if (!increase_capacity(other.m_size, false)) + { + LZHAM_FAIL("lzham::vector operator=: Out of memory!"); + return *this; + } + } + + if (LZHAM_IS_BITWISE_COPYABLE(T)) + memcpy(m_p, other.m_p, other.m_size * sizeof(T)); + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (uint i = other.m_size; i > 0; i--) + helpers::construct(pDst++, *pSrc++); + } + + m_size = other.m_size; + + return *this; + } + + inline const T* begin() const { return m_p; } + T* begin() { return m_p; } + + inline const T* end() const { return m_p + m_size; } + T* end() { return m_p + m_size; } + + inline bool empty() const { return !m_size; } + inline uint size() const { return m_size; } + inline uint size_in_bytes() const { return m_size * sizeof(T); } + inline uint capacity() const { return m_capacity; } + + // operator[] will assert on out of range indices, but in final builds there is (and will never be) any range checking on this method. + inline const T& operator[] (uint i) const { LZHAM_ASSERT(i < m_size); return m_p[i]; } + inline T& operator[] (uint i) { LZHAM_ASSERT(i < m_size); return m_p[i]; } + + // at() always includes range checking, even in final builds, unlike operator []. + // The first element is returned if the index is out of range. + inline const T& at(uint i) const { LZHAM_ASSERT(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; } + inline T& at(uint i) { LZHAM_ASSERT(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; } + + inline const T& front() const { LZHAM_ASSERT(m_size); return m_p[0]; } + inline T& front() { LZHAM_ASSERT(m_size); return m_p[0]; } + + inline const T& back() const { LZHAM_ASSERT(m_size); return m_p[m_size - 1]; } + inline T& back() { LZHAM_ASSERT(m_size); return m_p[m_size - 1]; } + + inline const T* get_ptr() const { return m_p; } + inline T* get_ptr() { return m_p; } + + inline void clear() + { + if (m_p) + { + scalar_type::destruct_array(m_p, m_size); + lzham_free(m_p); + m_p = NULL; + m_size = 0; + m_capacity = 0; + } + } + + inline void clear_no_destruction() + { + if (m_p) + { + lzham_free(m_p); + m_p = NULL; + m_size = 0; + m_capacity = 0; + } + } + + inline bool try_reserve(uint new_capacity) + { + return increase_capacity(new_capacity, true, true); + } + + inline bool try_resize(uint new_size, bool grow_hint = false) + { + if (m_size != new_size) + { + if (new_size < m_size) + scalar_type::destruct_array(m_p + new_size, m_size - new_size); + else + { + if (new_size > m_capacity) + { + if (!increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint, true)) + return false; + } + + scalar_type::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + + return true; + } + + inline bool try_resize_no_construct(uint new_size, bool grow_hint = false) + { + if (new_size > m_capacity) + { + if (!increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint, true)) + return false; + } + + m_size = new_size; + + return true; + } + + inline T* try_enlarge(uint i) + { + uint cur_size = m_size; + if (!try_resize(cur_size + i, true)) + return NULL; + return get_ptr() + cur_size; + } + + inline bool try_push_back(const T& obj) + { + LZHAM_ASSERT(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + { + if (!increase_capacity(m_size + 1, true, true)) + return false; + } + + scalar_type::construct(m_p + m_size, obj); + m_size++; + + return true; + } + + inline void pop_back() + { + LZHAM_ASSERT(m_size); + + if (m_size) + { + m_size--; + scalar_type::destruct(&m_p[m_size]); + } + } + + inline bool insert(uint index, const T* p, uint n) + { + LZHAM_ASSERT(index <= m_size); + if (!n) + return true; + + const uint orig_size = m_size; + if (!try_resize(m_size + n, true)) + return false; + + const uint num_to_move = orig_size - index; + if (num_to_move) + { + if (LZHAM_IS_BITWISE_COPYABLE(T)) + memmove(m_p + index + n, m_p + index, sizeof(T) * num_to_move); + else + { + const T* pSrc = m_p + orig_size - 1; + T* pDst = const_cast(pSrc) + n; + + for (uint i = 0; i < num_to_move; i++) + { + LZHAM_ASSERT((pDst - m_p) < (int)m_size); + *pDst-- = *pSrc--; + } + } + } + + T* pDst = m_p + index; + + if (LZHAM_IS_BITWISE_COPYABLE(T)) + memcpy(pDst, p, sizeof(T) * n); + else + { + for (uint i = 0; i < n; i++) + { + LZHAM_ASSERT((pDst - m_p) < (int)m_size); + *pDst++ = *p++; + } + } + + return true; + } + + // push_front() isn't going to be very fast - it's only here for usability. + inline bool try_push_front(const T& obj) + { + return insert(0, &obj, 1); + } + + bool append(const vector& other) + { + if (other.m_size) + return insert(m_size, &other[0], other.m_size); + return true; + } + + bool append(const T* p, uint n) + { + if (n) + return insert(m_size, p, n); + return true; + } + + inline void erase(uint start, uint n) + { + LZHAM_ASSERT((start + n) <= m_size); + if ((start + n) > m_size) + return; + + if (!n) + return; + + const uint num_to_move = m_size - (start + n); + + T* pDst = m_p + start; + + const T* pSrc = m_p + start + n; + + if (LZHAM_IS_BITWISE_COPYABLE(T)) + memmove(pDst, pSrc, num_to_move * sizeof(T)); + else + { + T* pDst_end = pDst + num_to_move; + + while (pDst != pDst_end) + *pDst++ = *pSrc++; + + scalar_type::destruct_array(pDst_end, n); + } + + m_size -= n; + } + + inline void erase(uint index) + { + erase(index, 1); + } + + inline void erase(T* p) + { + LZHAM_ASSERT((p >= m_p) && (p < (m_p + m_size))); + erase(static_cast(p - m_p)); + } + + void erase_unordered(uint index) + { + LZHAM_ASSERT(index < m_size); + + if ((index + 1) < m_size) + (*this)[index] = back(); + + pop_back(); + } + + inline bool operator== (const vector& rhs) const + { + if (m_size != rhs.m_size) + return false; + else if (m_size) + { + if (scalar_type::cFlag) + return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0; + else + { + const T* pSrc = m_p; + const T* pDst = rhs.m_p; + for (uint i = m_size; i; i--) + if (!(*pSrc++ == *pDst++)) + return false; + } + } + + return true; + } + + inline bool operator< (const vector& rhs) const + { + const uint min_size = math::minimum(m_size, rhs.m_size); + + const T* pSrc = m_p; + const T* pSrc_end = m_p + min_size; + const T* pDst = rhs.m_p; + + while ((pSrc < pSrc_end) && (*pSrc == *pDst)) + { + pSrc++; + pDst++; + } + + if (pSrc < pSrc_end) + return *pSrc < *pDst; + + return m_size < rhs.m_size; + } + + inline void swap(vector& other) + { + utils::swap(m_p, other.m_p); + utils::swap(m_size, other.m_size); + utils::swap(m_capacity, other.m_capacity); + } + + inline void sort() + { + std::sort(begin(), end()); + } + + inline void unique() + { + if (!empty()) + { + sort(); + + resize(std::unique(begin(), end()) - begin()); + } + } + + inline void reverse() + { + uint j = m_size >> 1; + for (uint i = 0; i < j; i++) + utils::swap(m_p[i], m_p[m_size - 1 - i]); + } + + inline int find(const T& key) const + { + const T* p = m_p; + const T* p_end = m_p + m_size; + + uint index = 0; + + while (p != p_end) + { + if (key == *p) + return index; + + p++; + index++; + } + + return cInvalidIndex; + } + + inline int find_sorted(const T& key) const + { + if (m_size) + { + // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice. + int i = ((m_size + 1) >> 1) - 1; + int m = m_size; + + for ( ; ; ) + { + LZHAM_ASSERT_OPEN_RANGE(i, 0, (int)m_size); + const T* pKey_i = m_p + i; + int cmp = key < *pKey_i; + if ((!cmp) && (key == *pKey_i)) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + + LZHAM_ASSERT_OPEN_RANGE(i, 0, (int)m_size); + pKey_i = m_p + i; + cmp = key < *pKey_i; + if ((!cmp) && (key == *pKey_i)) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + } + } + + return cInvalidIndex; + } + + template + inline int find_sorted(const T& key, Q less_than) const + { + if (m_size) + { + // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice. + int i = ((m_size + 1) >> 1) - 1; + int m = m_size; + + for ( ; ; ) + { + LZHAM_ASSERT_OPEN_RANGE(i, 0, (int)m_size); + const T* pKey_i = m_p + i; + int cmp = less_than(key, *pKey_i); + if ((!cmp) && (!less_than(*pKey_i, key))) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + + LZHAM_ASSERT_OPEN_RANGE(i, 0, (int)m_size); + pKey_i = m_p + i; + cmp = less_than(key, *pKey_i); + if ((!cmp) && (!less_than(*pKey_i, key))) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + } + } + + return cInvalidIndex; + } + + inline uint count_occurences(const T& key) const + { + uint c = 0; + + const T* p = m_p; + const T* p_end = m_p + m_size; + + while (p != p_end) + { + if (key == *p) + c++; + + p++; + } + + return c; + } + + inline void set_all(const T& o) + { + if ((sizeof(T) == 1) && (scalar_type::cFlag)) + memset(m_p, *reinterpret_cast(&o), m_size); + else + { + T* pDst = m_p; + T* pDst_end = pDst + m_size; + while (pDst != pDst_end) + *pDst++ = o; + } + } + + private: + T* m_p; + uint m_size; + uint m_capacity; + + template struct is_vector { enum { cFlag = false }; }; + template struct is_vector< vector > { enum { cFlag = true }; }; + + static void object_mover(void* pDst_void, void* pSrc_void, uint num) + { + T* pSrc = static_cast(pSrc_void); + T* const pSrc_end = pSrc + num; + T* pDst = static_cast(pDst_void); + + while (pSrc != pSrc_end) + { + new (static_cast(pDst)) T(*pSrc); + pSrc->~T(); + pSrc++; + pDst++; + } + } + + inline bool increase_capacity(uint min_new_capacity, bool grow_hint, bool nofail = false) + { + return reinterpret_cast(this)->increase_capacity( + min_new_capacity, grow_hint, sizeof(T), + (LZHAM_IS_BITWISE_MOVABLE(T) || (is_vector::cFlag)) ? NULL : object_mover, nofail); + } + }; + + template struct bitwise_movable< vector > { enum { cFlag = true }; }; + + extern void vector_test(); + + template + inline void swap(vector& a, vector& b) + { + a.swap(b); + } + +} // namespace lzham + diff --git a/r5dev/thirdparty/lzham/include/lzham_win32_threading.h b/r5dev/thirdparty/lzham/include/lzham_win32_threading.h new file mode 100644 index 00000000..64125ac1 --- /dev/null +++ b/r5dev/thirdparty/lzham/include/lzham_win32_threading.h @@ -0,0 +1,368 @@ +// File: lzham_task_pool_win32.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +#if LZHAM_USE_WIN32_API + +#if LZHAM_NO_ATOMICS +#error No atomic operations defined in lzham_platform.h! +#endif + +namespace lzham +{ + class semaphore + { + LZHAM_NO_COPY_OR_ASSIGNMENT_OP(semaphore); + + public: + semaphore(long initialCount = 0, long maximumCount = 1, const char* pName = NULL) + { + m_handle = CreateSemaphoreA(NULL, initialCount, maximumCount, pName); + if (NULL == m_handle) + { + LZHAM_FAIL("semaphore: CreateSemaphore() failed"); + } + } + + ~semaphore() + { + if (m_handle) + { + CloseHandle(m_handle); + m_handle = NULL; + } + } + + inline HANDLE get_handle(void) const { return m_handle; } + + void release(long releaseCount = 1) + { + if (0 == ReleaseSemaphore(m_handle, releaseCount, NULL)) + { + LZHAM_FAIL("semaphore: ReleaseSemaphore() failed"); + } + } + + bool wait(uint32 milliseconds = UINT32_MAX) + { + LZHAM_ASSUME(INFINITE == UINT32_MAX); + + DWORD result = WaitForSingleObject(m_handle, milliseconds); + + if (WAIT_FAILED == result) + { + LZHAM_FAIL("semaphore: WaitForSingleObject() failed"); + } + + return WAIT_OBJECT_0 == result; + } + + private: + HANDLE m_handle; + }; + + template + class tsstack + { + public: + inline tsstack(bool use_freelist = true) : + m_use_freelist(use_freelist) + { + LZHAM_VERIFY(((ptr_bits_t)this & (LZHAM_GET_ALIGNMENT(tsstack) - 1)) == 0); + InitializeSListHead(&m_stack_head); + InitializeSListHead(&m_freelist_head); + } + + inline ~tsstack() + { + clear(); + } + + inline void clear() + { + for ( ; ; ) + { + node* pNode = (node*)InterlockedPopEntrySList(&m_stack_head); + if (!pNode) + break; + + LZHAM_MEMORY_IMPORT_BARRIER + + helpers::destruct(&pNode->m_obj); + + lzham_free(pNode); + } + + flush_freelist(); + } + + inline void flush_freelist() + { + if (!m_use_freelist) + return; + + for ( ; ; ) + { + node* pNode = (node*)InterlockedPopEntrySList(&m_freelist_head); + if (!pNode) + break; + + LZHAM_MEMORY_IMPORT_BARRIER + + lzham_free(pNode); + } + } + + inline bool try_push(const T& obj) + { + node* pNode = alloc_node(); + if (!pNode) + return false; + + helpers::construct(&pNode->m_obj, obj); + + LZHAM_MEMORY_EXPORT_BARRIER + + InterlockedPushEntrySList(&m_stack_head, &pNode->m_slist_entry); + + return true; + } + + inline bool pop(T& obj) + { + node* pNode = (node*)InterlockedPopEntrySList(&m_stack_head); + if (!pNode) + return false; + + LZHAM_MEMORY_IMPORT_BARRIER + + obj = pNode->m_obj; + + helpers::destruct(&pNode->m_obj); + + free_node(pNode); + + return true; + } + + private: + SLIST_HEADER m_stack_head; + SLIST_HEADER m_freelist_head; + + struct node + { + SLIST_ENTRY m_slist_entry; + T m_obj; + }; + + bool m_use_freelist; + + inline node* alloc_node() + { + node* pNode = m_use_freelist ? (node*)InterlockedPopEntrySList(&m_freelist_head) : NULL; + + if (!pNode) + pNode = (node*)lzham_malloc(sizeof(node)); + + return pNode; + } + + inline void free_node(node* pNode) + { + if (m_use_freelist) + InterlockedPushEntrySList(&m_freelist_head, &pNode->m_slist_entry); + else + lzham_free(pNode); + } + }; + + class task_pool + { + public: + task_pool(); + task_pool(uint num_threads); + ~task_pool(); + + enum { cMaxThreads = LZHAM_MAX_HELPER_THREADS }; + bool init(uint num_threads); + void deinit(); + + inline uint get_num_threads() const { return m_num_threads; } + inline uint get_num_outstanding_tasks() const { return m_num_outstanding_tasks; } + + // C-style task callback + typedef void (*task_callback_func)(uint64 data, void* pData_ptr); + bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = NULL); + + class executable_task + { + public: + virtual void execute_task(uint64 data, void* pData_ptr) = 0; + }; + + // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! + bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = NULL); + + template + inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = NULL); + + template + inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = NULL); + + void join(); + + private: + struct task + { + //inline task() : m_data(0), m_pData_ptr(NULL), m_pObj(NULL), m_flags(0) { } + + uint64 m_data; + void* m_pData_ptr; + + union + { + task_callback_func m_callback; + executable_task* m_pObj; + }; + + uint m_flags; + }; + + tsstack m_task_stack; + + uint m_num_threads; + HANDLE m_threads[cMaxThreads]; + + semaphore m_tasks_available; + + enum task_flags + { + cTaskFlagObject = 1 + }; + + volatile atomic32_t m_num_outstanding_tasks; + volatile atomic32_t m_exit_flag; + + void process_task(task& tsk); + + static unsigned __stdcall thread_func(void* pContext); + }; + + enum object_task_flags + { + cObjectTaskFlagDefault = 0, + cObjectTaskFlagDeleteAfterExecution = 1 + }; + + template + class object_task : public task_pool::executable_task + { + public: + object_task(uint flags = cObjectTaskFlagDefault) : + m_pObject(NULL), + m_pMethod(NULL), + m_flags(flags) + { + } + + typedef void (T::*object_method_ptr)(uint64 data, void* pData_ptr); + + object_task(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) : + m_pObject(pObject), + m_pMethod(pMethod), + m_flags(flags) + { + LZHAM_ASSERT(pObject && pMethod); + } + + void init(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) + { + LZHAM_ASSERT(pObject && pMethod); + + m_pObject = pObject; + m_pMethod = pMethod; + m_flags = flags; + } + + T* get_object() const { return m_pObject; } + object_method_ptr get_method() const { return m_pMethod; } + + virtual void execute_task(uint64 data, void* pData_ptr) + { + (m_pObject->*m_pMethod)(data, pData_ptr); + + if (m_flags & cObjectTaskFlagDeleteAfterExecution) + lzham_delete(this); + } + + protected: + T* m_pObject; + + object_method_ptr m_pMethod; + + uint m_flags; + }; + + template + inline bool task_pool::queue_object_task(S* pObject, T pObject_method, uint64 data, void* pData_ptr) + { + object_task *pTask = lzham_new< object_task >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); + if (!pTask) + return false; + return queue_task(pTask, data, pData_ptr); + } + + template + inline bool task_pool::queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr) + { + LZHAM_ASSERT(m_num_threads); + LZHAM_ASSERT(pObject); + LZHAM_ASSERT(num_tasks); + if (!num_tasks) + return true; + + bool status = true; + + uint i; + for (i = 0; i < num_tasks; i++) + { + task tsk; + + tsk.m_pObj = lzham_new< object_task >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); + if (!tsk.m_pObj) + { + status = false; + break; + } + + tsk.m_data = first_data + i; + tsk.m_pData_ptr = pData_ptr; + tsk.m_flags = cTaskFlagObject; + + if (!m_task_stack.try_push(tsk)) + { + status = false; + break; + } + } + + if (i) + { + atomic_add32(&m_num_outstanding_tasks, i); + + m_tasks_available.release(i); + } + + return status; + } + + inline void lzham_sleep(unsigned int milliseconds) + { + Sleep(milliseconds); + } + + uint lzham_get_max_helper_threads(); + +} // namespace lzham + +#endif // LZHAM_USE_WIN32_API diff --git a/r5dev/thirdparty/lzham/libs/lzhamcomp_x64.lib b/r5dev/thirdparty/lzham/libs/lzhamcomp_x64.lib deleted file mode 100644 index 7f5d6e43..00000000 Binary files a/r5dev/thirdparty/lzham/libs/lzhamcomp_x64.lib and /dev/null differ diff --git a/r5dev/thirdparty/lzham/libs/lzhamcomp_x64D.lib b/r5dev/thirdparty/lzham/libs/lzhamcomp_x64D.lib deleted file mode 100644 index f593696f..00000000 Binary files a/r5dev/thirdparty/lzham/libs/lzhamcomp_x64D.lib and /dev/null differ diff --git a/r5dev/thirdparty/lzham/libs/lzhamdecomp_x64.lib b/r5dev/thirdparty/lzham/libs/lzhamdecomp_x64.lib deleted file mode 100644 index 5772956d..00000000 Binary files a/r5dev/thirdparty/lzham/libs/lzhamdecomp_x64.lib and /dev/null differ diff --git a/r5dev/thirdparty/lzham/libs/lzhamdecomp_x64D.lib b/r5dev/thirdparty/lzham/libs/lzhamdecomp_x64D.lib deleted file mode 100644 index a6665324..00000000 Binary files a/r5dev/thirdparty/lzham/libs/lzhamdecomp_x64D.lib and /dev/null differ diff --git a/r5dev/thirdparty/lzham/libs/lzhamlib_x64.lib b/r5dev/thirdparty/lzham/libs/lzhamlib_x64.lib deleted file mode 100644 index 18ac9a96..00000000 Binary files a/r5dev/thirdparty/lzham/libs/lzhamlib_x64.lib and /dev/null differ diff --git a/r5dev/thirdparty/lzham/libs/lzhamlib_x64D.lib b/r5dev/thirdparty/lzham/libs/lzhamlib_x64D.lib deleted file mode 100644 index 121dc906..00000000 Binary files a/r5dev/thirdparty/lzham/libs/lzhamlib_x64D.lib and /dev/null differ diff --git a/r5dev/thirdparty/lzham/lzham_api.cpp b/r5dev/thirdparty/lzham/lzham_api.cpp new file mode 100644 index 00000000..c622b6c5 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzham_api.cpp @@ -0,0 +1,179 @@ +// File: lzham_api.cpp - Dynamic DLL entrypoints. +// See Copyright Notice and license at the end of include/lzham.h +#include "include/lzham_core.h" +#include "lzhamdecomp/lzham_decomp.h" +#include "lzhamcomp/lzham_comp.h" + +extern "C" LZHAM_DLL_EXPORT lzham_uint32 lzham_get_version(void) +{ + return LZHAM_DLL_VERSION; +} + +extern "C" LZHAM_DLL_EXPORT void lzham_set_memory_callbacks(lzham_realloc_func pRealloc, lzham_msize_func pMSize, void* pUser_data) +{ + lzham::lzham_lib_set_memory_callbacks(pRealloc, pMSize, pUser_data); +} + +extern "C" LZHAM_DLL_EXPORT lzham_decompress_state_ptr lzham_decompress_init(const lzham_decompress_params *pParams) +{ + return lzham::lzham_lib_decompress_init(pParams); +} + +extern "C" LZHAM_DLL_EXPORT lzham_decompress_state_ptr lzham_decompress_reinit(lzham_decompress_state_ptr p, const lzham_decompress_params *pParams) +{ + return lzham::lzham_lib_decompress_reinit(p, pParams); +} + +extern "C" LZHAM_DLL_EXPORT lzham_decompress_checksums* lzham_decompress_deinit(lzham_decompress_state_ptr p) +{ + return lzham::lzham_lib_decompress_deinit(p); +} + +extern "C" LZHAM_DLL_EXPORT lzham_decompress_status_t lzham_decompress( + lzham_decompress_state_ptr p, + const lzham_uint8 *pIn_buf, size_t *pIn_buf_size, + lzham_uint8 *pOut_buf, size_t *pOut_buf_size, + lzham_bool no_more_input_bytes_flag) +{ + return lzham::lzham_lib_decompress(p, pIn_buf, pIn_buf_size, pOut_buf, pOut_buf_size, no_more_input_bytes_flag); +} + +extern "C" LZHAM_DLL_EXPORT lzham_decompress_status_t lzham_decompress_memory(const lzham_decompress_params *pParams, lzham_uint8* pDst_buf, size_t *pDst_len, const lzham_uint8* pSrc_buf, size_t src_len, lzham_uint32 *pAdler32, lzham_uint32 *pCrc32) +{ + return lzham::lzham_lib_decompress_memory(pParams, pDst_buf, pDst_len, pSrc_buf, src_len, pAdler32, pCrc32); +} + +extern "C" LZHAM_DLL_EXPORT lzham_compress_state_ptr lzham_compress_init(const lzham_compress_params *pParams) +{ + return lzham::lzham_lib_compress_init(pParams); +} + +extern "C" LZHAM_DLL_EXPORT lzham_compress_state_ptr lzham_compress_reinit(lzham_compress_state_ptr p) +{ + return lzham::lzham_lib_compress_reinit(p); +} + +extern "C" LZHAM_DLL_EXPORT lzham_compress_checksums* lzham_compress_deinit(lzham_compress_state_ptr p) +{ + return lzham::lzham_lib_compress_deinit(p); +} + +extern "C" LZHAM_DLL_EXPORT lzham_compress_status_t lzham_compress( + lzham_compress_state_ptr p, + const lzham_uint8 *pIn_buf, size_t *pIn_buf_size, + lzham_uint8 *pOut_buf, size_t *pOut_buf_size, + lzham_bool no_more_input_bytes_flag) +{ + return lzham::lzham_lib_compress(p, pIn_buf, pIn_buf_size, pOut_buf, pOut_buf_size, no_more_input_bytes_flag); +} + +extern "C" LZHAM_DLL_EXPORT lzham_compress_status_t lzham_compress2( + lzham_compress_state_ptr p, + const lzham_uint8 *pIn_buf, size_t *pIn_buf_size, + lzham_uint8 *pOut_buf, size_t *pOut_buf_size, + lzham_flush_t flush_type) +{ + return lzham::lzham_lib_compress2(p, pIn_buf, pIn_buf_size, pOut_buf, pOut_buf_size, flush_type); +} + +extern "C" LZHAM_DLL_EXPORT lzham_compress_status_t lzham_compress_memory(const lzham_compress_params *pParams, lzham_uint8* pDst_buf, size_t *pDst_len, const lzham_uint8* pSrc_buf, size_t src_len, lzham_uint32 *pAdler32, lzham_uint32 * pCrc32) +{ + return lzham::lzham_lib_compress_memory(pParams, pDst_buf, pDst_len, pSrc_buf, src_len, pAdler32, pCrc32); +} + +// ----------------- zlib-style API's + +extern "C" LZHAM_DLL_EXPORT const char *lzham_z_version(void) +{ + return LZHAM_Z_VERSION; +} + +extern "C" lzham_z_ulong LZHAM_DLL_EXPORT lzham_z_adler32(lzham_z_ulong adler, const unsigned char *ptr, size_t buf_len) +{ + return lzham::lzham_lib_z_adler32(adler, ptr, buf_len); +} + +extern "C" lzham_z_ulong LZHAM_DLL_EXPORT lzham_z_crc32(lzham_z_ulong crc, const lzham_uint8 *ptr, size_t buf_len) +{ + return lzham::lzham_lib_z_crc32(crc, ptr, buf_len); +} + +extern "C" LZHAM_DLL_EXPORT int lzham_z_deflateInit(lzham_z_streamp pStream, int level) +{ + return lzham::lzham_lib_z_deflateInit(pStream, level); +} + +extern "C" LZHAM_DLL_EXPORT int lzham_z_deflateInit2(lzham_z_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy) +{ + return lzham::lzham_lib_z_deflateInit2(pStream, level, method, window_bits, mem_level, strategy); +} + +extern "C" LZHAM_DLL_EXPORT int lzham_z_deflateReset(lzham_z_streamp pStream) +{ + return lzham::lzham_lib_z_deflateReset(pStream); +} + +extern "C" LZHAM_DLL_EXPORT int lzham_z_deflate(lzham_z_streamp pStream, int flush) +{ + return lzham::lzham_lib_z_deflate(pStream, flush); +} + +extern "C" LZHAM_DLL_EXPORT int lzham_z_deflateEnd(lzham_z_streamp pStream) +{ + return lzham::lzham_lib_z_deflateEnd(pStream); +} + +extern "C" LZHAM_DLL_EXPORT lzham_z_ulong lzham_z_deflateBound(lzham_z_streamp pStream, lzham_z_ulong source_len) +{ + return lzham::lzham_lib_z_deflateBound(pStream, source_len); +} + +extern "C" LZHAM_DLL_EXPORT int lzham_z_compress(unsigned char *pDest, lzham_z_ulong *pDest_len, const unsigned char *pSource, lzham_z_ulong source_len) +{ + return lzham::lzham_lib_z_compress(pDest, pDest_len, pSource, source_len); +} + +extern "C" LZHAM_DLL_EXPORT int lzham_z_compress2(unsigned char *pDest, lzham_z_ulong *pDest_len, const unsigned char *pSource, lzham_z_ulong source_len, int level) +{ + return lzham::lzham_lib_z_compress2(pDest, pDest_len, pSource, source_len, level); +} + +extern "C" LZHAM_DLL_EXPORT lzham_z_ulong lzham_z_compressBound(lzham_z_ulong source_len) +{ + return lzham::lzham_lib_z_compressBound(source_len); +} + +extern "C" LZHAM_DLL_EXPORT int lzham_z_inflateInit(lzham_z_streamp pStream) +{ + return lzham::lzham_lib_z_inflateInit(pStream); +} + +extern "C" LZHAM_DLL_EXPORT int lzham_z_inflateInit2(lzham_z_streamp pStream, int window_bits) +{ + return lzham::lzham_lib_z_inflateInit2(pStream, window_bits); +} + +extern "C" LZHAM_DLL_EXPORT int lzham_z_inflateReset(lzham_z_streamp pStream) +{ + return lzham::lzham_lib_z_inflateReset(pStream); +} + +extern "C" LZHAM_DLL_EXPORT int lzham_z_inflate(lzham_z_streamp pStream, int flush) +{ + return lzham::lzham_lib_z_inflate(pStream, flush); +} + +extern "C" LZHAM_DLL_EXPORT int lzham_z_inflateEnd(lzham_z_streamp pStream) +{ + return lzham::lzham_lib_z_inflateEnd(pStream); +} + +extern "C" LZHAM_DLL_EXPORT int lzham_z_uncompress(unsigned char *pDest, lzham_z_ulong *pDest_len, const unsigned char *pSource, lzham_z_ulong source_len) +{ + return lzham::lzham_lib_z_uncompress(pDest, pDest_len, pSource, source_len); +} + +extern "C" LZHAM_DLL_EXPORT const char *lzham_z_error(int err) +{ + return lzham::lzham_lib_z_error(err); +} diff --git a/r5dev/thirdparty/lzham/lzham_assert.cpp b/r5dev/thirdparty/lzham/lzham_assert.cpp new file mode 100644 index 00000000..efaf0a79 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzham_assert.cpp @@ -0,0 +1,66 @@ +// File: lzham_assert.cpp +// See Copyright Notice and license at the end of include/lzham.h +#include "include/lzham_core.h" + +static bool g_fail_exceptions; +static bool g_exit_on_failure = true; + +void lzham_enable_fail_exceptions(bool enabled) +{ + g_fail_exceptions = enabled; +} + +void lzham_assert(const char* pExp, const char* pFile, unsigned line) +{ + char buf[512]; + + sprintf_s(buf, sizeof(buf), "%s(%u): Assertion failed: \"%s\"\n", pFile, line, pExp); + + lzham_output_debug_string(buf); + + printf("%s", buf); + + if (lzham_is_debugger_present()) + lzham_debug_break(); +} + +void lzham_fail(const char* pExp, const char* pFile, unsigned line) +{ + char buf[512]; + + sprintf_s(buf, sizeof(buf), "%s(%u): Failure: \"%s\"\n", pFile, line, pExp); + + lzham_output_debug_string(buf); + + printf("%s", buf); + + if (lzham_is_debugger_present()) + lzham_debug_break(); + +#if LZHAM_USE_WIN32_API + if (g_fail_exceptions) + RaiseException(LZHAM_FAIL_EXCEPTION_CODE, 0, 0, NULL); + else +#endif + if (g_exit_on_failure) + exit(EXIT_FAILURE); +} + +void lzham_trace(const char* pFmt, va_list args) +{ + if (lzham_is_debugger_present()) + { + char buf[512]; + vsprintf_s(buf, sizeof(buf), pFmt, args); + + lzham_output_debug_string(buf); + } +}; + +void lzham_trace(const char* pFmt, ...) +{ + va_list args; + va_start(args, pFmt); + lzham_trace(pFmt, args); + va_end(args); +}; diff --git a/r5dev/thirdparty/lzham/lzham_checksum.cpp b/r5dev/thirdparty/lzham/lzham_checksum.cpp new file mode 100644 index 00000000..3dc616d4 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzham_checksum.cpp @@ -0,0 +1,73 @@ +// File: lzham_checksum.cpp +#include "include/lzham_core.h" +#include "include/lzham_checksum.h" + +namespace lzham +{ + // Originally from the public domain stb.h header. + uint adler32(const void* pBuf, size_t buflen, uint adler32) + { + if (!pBuf) + return cInitAdler32; + + const uint8* buffer = static_cast(pBuf); + + const unsigned long ADLER_MOD = 65521; + unsigned long s1 = adler32 & 0xffff, s2 = adler32 >> 16; + size_t blocklen; + unsigned long i; + + blocklen = buflen % 5552; + while (buflen) + { + for (i=0; i + 7 < blocklen; i += 8) + { + s1 += buffer[0], s2 += s1; + s1 += buffer[1], s2 += s1; + s1 += buffer[2], s2 += s1; + s1 += buffer[3], s2 += s1; + s1 += buffer[4], s2 += s1; + s1 += buffer[5], s2 += s1; + s1 += buffer[6], s2 += s1; + s1 += buffer[7], s2 += s1; + + buffer += 8; + } + + for (; i < blocklen; ++i) + s1 += *buffer++, s2 += s1; + + s1 %= ADLER_MOD, s2 %= ADLER_MOD; + buflen -= blocklen; + blocklen = 5552; + } + return (s2 << 16) + s1; + } + + // Karl Malbrain's compact CRC-32, with pre and post conditioning. + // See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": + // http://www.geocities.com/malbrain/ + static const lzham_uint32 s_crc32[16] = + { + 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, + 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c + }; + + uint crc32(uint crc, const lzham_uint8 *ptr, size_t buf_len) + { + if (!ptr) + return cInitCRC32; + + crc = ~crc; + while (buf_len--) + { + lzham_uint8 b = *ptr++; + crc = (crc >> 4) ^ s_crc32[(crc & 0xF) ^ (b & 0xF)]; + crc = (crc >> 4) ^ s_crc32[(crc & 0xF) ^ (b >> 4)]; + } + return ~crc; + } + + +} // namespace lzham + diff --git a/r5dev/thirdparty/lzham/lzham_huffman_codes.cpp b/r5dev/thirdparty/lzham/lzham_huffman_codes.cpp new file mode 100644 index 00000000..2ca7e392 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzham_huffman_codes.cpp @@ -0,0 +1,390 @@ +// File: huffman_codes.cpp +// See Copyright Notice and license at the end of include/lzham.h +#include "include/lzham_core.h" +#include "include/lzham_huffman_codes.h" + +namespace lzham +{ + struct sym_freq + { + uint m_freq; + uint16 m_left; + uint16 m_right; + + inline bool operator< (const sym_freq& other) const + { + return m_freq > other.m_freq; + } + }; + + static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* syms0, sym_freq* syms1) + { + const uint cMaxPasses = 2; + uint hist[256 * cMaxPasses]; + + memset(hist, 0, sizeof(hist[0]) * 256 * cMaxPasses); + + { + sym_freq* p = syms0; + sym_freq* q = syms0 + (num_syms >> 1) * 2; + + for ( ; p != q; p += 2) + { + const uint freq0 = p[0].m_freq; + const uint freq1 = p[1].m_freq; + + hist[ freq0 & 0xFF]++; + hist[256 + ((freq0 >> 8) & 0xFF)]++; + + hist[ freq1 & 0xFF]++; + hist[256 + ((freq1 >> 8) & 0xFF)]++; + } + + if (num_syms & 1) + { + const uint freq = p->m_freq; + + hist[ freq & 0xFF]++; + hist[256 + ((freq >> 8) & 0xFF)]++; + } + } + + sym_freq* pCur_syms = syms0; + sym_freq* pNew_syms = syms1; + + const uint total_passes = (hist[256] == num_syms) ? 1 : cMaxPasses; + + for (uint pass = 0; pass < total_passes; pass++) + { + const uint* pHist = &hist[pass << 8]; + + uint offsets[256]; + + uint cur_ofs = 0; + for (uint i = 0; i < 256; i += 2) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + + offsets[i+1] = cur_ofs; + cur_ofs += pHist[i+1]; + } + + const uint pass_shift = pass << 3; + + sym_freq* p = pCur_syms; + sym_freq* q = pCur_syms + (num_syms >> 1) * 2; + + for ( ; p != q; p += 2) + { + uint c0 = p[0].m_freq; + uint c1 = p[1].m_freq; + + if (pass) + { + c0 >>= 8; + c1 >>= 8; + } + + c0 &= 0xFF; + c1 &= 0xFF; + + if (c0 == c1) + { + uint dst_offset0 = offsets[c0]; + + offsets[c0] = dst_offset0 + 2; + + pNew_syms[dst_offset0] = p[0]; + pNew_syms[dst_offset0 + 1] = p[1]; + } + else + { + uint dst_offset0 = offsets[c0]++; + uint dst_offset1 = offsets[c1]++; + + pNew_syms[dst_offset0] = p[0]; + pNew_syms[dst_offset1] = p[1]; + } + } + + if (num_syms & 1) + { + uint c = ((p->m_freq) >> pass_shift) & 0xFF; + + uint dst_offset = offsets[c]; + offsets[c] = dst_offset + 1; + + pNew_syms[dst_offset] = *p; + } + + sym_freq* t = pCur_syms; + pCur_syms = pNew_syms; + pNew_syms = t; + } + +#if LZHAM_ASSERTS_ENABLED + uint prev_freq = 0; + for (uint i = 0; i < num_syms; i++) + { + LZHAM_ASSERT(!(pCur_syms[i].m_freq < prev_freq)); + prev_freq = pCur_syms[i].m_freq; + } +#endif + + return pCur_syms; + } + + struct huffman_work_tables + { + enum { cMaxInternalNodes = cHuffmanMaxSupportedSyms }; + + sym_freq syms0[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes]; + sym_freq syms1[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes]; + +#if !USE_CALCULATE_MINIMUM_REDUNDANCY + uint16 queue[cMaxInternalNodes]; +#endif + }; + + uint get_generate_huffman_codes_table_size() + { + return sizeof(huffman_work_tables); + } + +#define USE_CALCULATE_MINIMUM_REDUNDANCY 1 +#if USE_CALCULATE_MINIMUM_REDUNDANCY + /* calculate_minimum_redundancy() written by + Alistair Moffat, alistair@cs.mu.oz.au, + Jyrki Katajainen, jyrki@diku.dk + November 1996. + */ + static void calculate_minimum_redundancy(int A[], int n) { + int root; /* next root node to be used */ + int leaf; /* next leaf to be used */ + int next; /* next value to be assigned */ + int avbl; /* number of available nodes */ + int used; /* number of internal nodes */ + int dpth; /* current depth of leaves */ + + /* check for pathological cases */ + if (n==0) { return; } + if (n==1) { A[0] = 0; return; } + + /* first pass, left to right, setting parent pointers */ + A[0] += A[1]; root = 0; leaf = 2; + for (next=1; next < n-1; next++) { + /* select first item for a pairing */ + if (leaf>=n || A[root]=n || (root=0; next--) + A[next] = A[A[next]]+1; + + /* third pass, right to left, setting leaf depths */ + avbl = 1; used = dpth = 0; root = n-2; next = n-1; + while (avbl>0) { + while (root>=0 && A[root]==dpth) { + used++; root--; + } + while (avbl>used) { + A[next--] = dpth; avbl--; + } + avbl = 2*used; dpth++; used = 0; + } + } +#endif + + bool generate_huffman_codes(void* pContext, uint num_syms, const uint16* pFreq, uint8* pCodesizes, uint& max_code_size, uint& total_freq_ret) + { + if ((!num_syms) || (num_syms > cHuffmanMaxSupportedSyms)) + return false; + + huffman_work_tables& state = *static_cast(pContext);; + + uint max_freq = 0; + uint total_freq = 0; + + uint num_used_syms = 0; + for (uint i = 0; i < num_syms; i++) + { + uint freq = pFreq[i]; + + if (!freq) + pCodesizes[i] = 0; + else + { + total_freq += freq; + max_freq = math::maximum(max_freq, freq); + + sym_freq& sf = state.syms0[num_used_syms]; + sf.m_left = (uint16)i; + sf.m_right = UINT16_MAX; + sf.m_freq = freq; + num_used_syms++; + } + } + + total_freq_ret = total_freq; + + if (num_used_syms == 1) + { + pCodesizes[state.syms0[0].m_left] = 1; + return true; + } + + sym_freq* syms = radix_sort_syms(num_used_syms, state.syms0, state.syms1); + +#if USE_CALCULATE_MINIMUM_REDUNDANCY + int x[cHuffmanMaxSupportedSyms]; + for (uint i = 0; i < num_used_syms; i++) + x[i] = syms[i].m_freq; + + calculate_minimum_redundancy(x, num_used_syms); + + uint max_len = 0; + for (uint i = 0; i < num_used_syms; i++) + { + uint len = x[i]; + max_len = math::maximum(len, max_len); + pCodesizes[syms[i].m_left] = static_cast(len); + } + max_code_size = max_len; +#else + // Computes Huffman codelengths in linear time. More readable than calculate_minimum_redundancy(), and approximately the same speed, but not in-place. + + // Dummy node + sym_freq& sf = state.syms0[num_used_syms]; + sf.m_left = UINT16_MAX; + sf.m_right = UINT16_MAX; + sf.m_freq = UINT_MAX; + + uint next_internal_node = num_used_syms + 1; + + uint queue_front = 0; + uint queue_end = 0; + + uint next_lowest_sym = 0; + + uint num_nodes_remaining = num_used_syms; + do + { + uint left_freq = syms[next_lowest_sym].m_freq; + uint left_child = next_lowest_sym; + + if ((queue_end > queue_front) && (syms[state.queue[queue_front]].m_freq < left_freq)) + { + left_child = state.queue[queue_front]; + left_freq = syms[left_child].m_freq; + + queue_front++; + } + else + next_lowest_sym++; + + uint right_freq = syms[next_lowest_sym].m_freq; + uint right_child = next_lowest_sym; + + if ((queue_end > queue_front) && (syms[state.queue[queue_front]].m_freq < right_freq)) + { + right_child = state.queue[queue_front]; + right_freq = syms[right_child].m_freq; + + queue_front++; + } + else + next_lowest_sym++; + + LZHAM_ASSERT(next_internal_node < huffman_work_tables::cMaxInternalNodes); + + const uint internal_node_index = next_internal_node; + next_internal_node++; + + syms[internal_node_index].m_freq = left_freq + right_freq; + syms[internal_node_index].m_left = static_cast(left_child); + syms[internal_node_index].m_right = static_cast(right_child); + + LZHAM_ASSERT(queue_end < huffman_work_tables::cMaxInternalNodes); + state.queue[queue_end] = static_cast(internal_node_index); + queue_end++; + + num_nodes_remaining--; + + } while (num_nodes_remaining > 1); + + LZHAM_ASSERT(next_lowest_sym == num_used_syms); + LZHAM_ASSERT((queue_end - queue_front) == 1); + + uint cur_node_index = state.queue[queue_front]; + + uint32* pStack = (syms == state.syms0) ? (uint32*)state.syms1 : (uint32*)state.syms0; + uint32* pStack_top = pStack; + + uint max_level = 0; + + for ( ; ; ) + { + uint level = cur_node_index >> 16; + uint node_index = cur_node_index & 0xFFFF; + + uint left_child = syms[node_index].m_left; + uint right_child = syms[node_index].m_right; + + uint next_level = (cur_node_index + 0x10000) & 0xFFFF0000; + + if (left_child < num_used_syms) + { + max_level = math::maximum(max_level, level); + + pCodesizes[syms[left_child].m_left] = static_cast(level + 1); + + if (right_child < num_used_syms) + { + pCodesizes[syms[right_child].m_left] = static_cast(level + 1); + + if (pStack == pStack_top) break; + cur_node_index = *--pStack; + } + else + { + cur_node_index = next_level | right_child; + } + } + else + { + if (right_child < num_used_syms) + { + max_level = math::maximum(max_level, level); + + pCodesizes[syms[right_child].m_left] = static_cast(level + 1); + + cur_node_index = next_level | left_child; + } + else + { + *pStack++ = next_level | left_child; + + cur_node_index = next_level | right_child; + } + } + } + + max_code_size = max_level + 1; +#endif + + return true; + } + +} // namespace lzham + diff --git a/r5dev/thirdparty/lzham/lzham_lzbase.cpp b/r5dev/thirdparty/lzham/lzham_lzbase.cpp new file mode 100644 index 00000000..e5cf7c06 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzham_lzbase.cpp @@ -0,0 +1,71 @@ +// File: lzham_lzbase.cpp +// See Copyright Notice and license at the end of include/lzham.h +#include "include/lzham_core.h" +#include "include/lzham_lzbase.h" + +namespace lzham +{ + void CLZBase::init_slot_tabs() + { + for (uint i = 0; i < m_num_lzx_slots; i++) + { + //printf("%u: 0x%08X - 0x%08X, %u\n", i, m_lzx_position_base[i], m_lzx_position_base[i] + (1 << m_lzx_position_extra_bits[i]) - 1, m_lzx_position_extra_bits[i]); + + uint lo = m_lzx_position_base[i]; + uint hi = lo + m_lzx_position_extra_mask[i]; + + uint8* pTab; + uint shift; + uint n; LZHAM_NOTE_UNUSED(n); + + if (hi < 0x1000) + { + pTab = m_slot_tab0; + shift = 0; + n = sizeof(m_slot_tab0); + } + else if (hi < 0x100000) + { + pTab = m_slot_tab1; + shift = 11; + n = sizeof(m_slot_tab1); + } + else if (hi < 0x1000000) + { + pTab = m_slot_tab2; + shift = 16; + n = sizeof(m_slot_tab2); + } + else + break; + + lo >>= shift; + hi >>= shift; + + LZHAM_ASSERT(hi < n); + memset(pTab + lo, (uint8)i, hi - lo + 1); + } + +#ifdef LZHAM_BUILD_DEBUG + uint slot, ofs; + for (uint i = 1; i < m_num_lzx_slots; i++) + { + compute_lzx_position_slot(m_lzx_position_base[i], slot, ofs); + LZHAM_ASSERT(slot == i); + + compute_lzx_position_slot(m_lzx_position_base[i] + m_lzx_position_extra_mask[i], slot, ofs); + LZHAM_ASSERT(slot == i); + } + + for (uint i = 1; i <= (m_dict_size-1); i += 512U*1024U) + { + compute_lzx_position_slot(i, slot, ofs); + LZHAM_ASSERT(i == m_lzx_position_base[slot] + ofs); + } + + compute_lzx_position_slot(m_dict_size - 1, slot, ofs); + LZHAM_ASSERT((m_dict_size - 1) == m_lzx_position_base[slot] + ofs); +#endif + } +} //namespace lzham + diff --git a/r5dev/thirdparty/lzham/lzham_match_accel.cpp b/r5dev/thirdparty/lzham/lzham_match_accel.cpp new file mode 100644 index 00000000..629e950f --- /dev/null +++ b/r5dev/thirdparty/lzham/lzham_match_accel.cpp @@ -0,0 +1,562 @@ +// File: lzham_match_accel.cpp +// See Copyright Notice and license at the end of include/lzham.h +#include "include/lzham_core.h" +#include "include/lzham_match_accel.h" +#include "include/lzham_timer.h" + +namespace lzham +{ + static inline uint32 hash2_to_12(uint c0, uint c1) + { + return c0 ^ (c1 << 4); + } + + static inline uint32 hash3_to_16(uint c0, uint c1, uint c2) + { + return (c0 | (c1 << 8)) ^ (c2 << 4); + } + + search_accelerator::search_accelerator() : + m_pLZBase(NULL), + m_pTask_pool(NULL), + m_max_helper_threads(0), + m_max_dict_size(0), + m_max_dict_size_mask(0), + m_lookahead_pos(0), + m_lookahead_size(0), + m_cur_dict_size(0), + m_fill_lookahead_pos(0), + m_fill_lookahead_size(0), + m_fill_dict_size(0), + m_max_probes(0), + m_max_matches(0), + m_all_matches(false), + m_next_match_ref(0), + m_num_completed_helper_threads(0) + { + } + + bool search_accelerator::init(CLZBase* pLZBase, task_pool* pPool, uint max_helper_threads, uint max_dict_size, uint max_matches, bool all_matches, uint max_probes) + { + LZHAM_ASSERT(pLZBase); + LZHAM_ASSERT(max_dict_size && math::is_power_of_2(max_dict_size)); + LZHAM_ASSERT(max_probes); + + m_max_probes = LZHAM_MIN(cMatchAccelMaxSupportedProbes, max_probes); + + m_pLZBase = pLZBase; + m_pTask_pool = max_helper_threads ? pPool : NULL; + m_max_helper_threads = m_pTask_pool ? max_helper_threads : 0; + m_max_matches = LZHAM_MIN(m_max_probes, max_matches); + m_all_matches = all_matches; + + m_max_dict_size = max_dict_size; + m_max_dict_size_mask = m_max_dict_size - 1; + m_cur_dict_size = 0; + m_lookahead_size = 0; + m_lookahead_pos = 0; + m_fill_lookahead_pos = 0; + m_fill_lookahead_size = 0; + m_fill_dict_size = 0; + m_num_completed_helper_threads = 0; + + if (!m_dict.try_resize_no_construct(max_dict_size + LZHAM_MIN(m_max_dict_size, static_cast(CLZBase::cMaxHugeMatchLen)))) + return false; + + if (!m_hash.try_resize_no_construct(cHashSize)) + return false; + + if (!m_nodes.try_resize_no_construct(max_dict_size)) + return false; + + memset(m_hash.get_ptr(), 0, m_hash.size_in_bytes()); + + return true; + } + + void search_accelerator::reset() + { + m_cur_dict_size = 0; + m_lookahead_size = 0; + m_lookahead_pos = 0; + m_fill_lookahead_pos = 0; + m_fill_lookahead_size = 0; + m_fill_dict_size = 0; + m_num_completed_helper_threads = 0; + + // Clearing the hash tables is only necessary for determinism (otherwise, it's possible the matches returned after a reset will depend on the data processes before the reset). + if (m_hash.size()) + memset(m_hash.get_ptr(), 0, m_hash.size_in_bytes()); + if (m_digram_hash.size()) + memset(m_digram_hash.get_ptr(), 0, m_digram_hash.size_in_bytes()); + } + + void search_accelerator::flush() + { + m_cur_dict_size = 0; + } + + uint search_accelerator::get_max_add_bytes() const + { + uint add_pos = static_cast(m_lookahead_pos & (m_max_dict_size - 1)); + return m_max_dict_size - add_pos; + } + + static uint8 g_hamming_dist[256] = + { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 + }; + + void search_accelerator::find_all_matches_callback(uint64 data, void* pData_ptr) + { + scoped_perf_section find_all_matches_timer("find_all_matches_callback"); + + LZHAM_NOTE_UNUSED(pData_ptr); + const uint thread_index = (uint)data; + + dict_match temp_matches[cMatchAccelMaxSupportedProbes * 2]; + + uint fill_lookahead_pos = m_fill_lookahead_pos; + uint fill_dict_size = m_fill_dict_size; + uint fill_lookahead_size = m_fill_lookahead_size; + + uint c0 = 0, c1 = 0; + if (fill_lookahead_size >= 2) + { + c0 = m_dict[fill_lookahead_pos & m_max_dict_size_mask]; + c1 = m_dict[(fill_lookahead_pos & m_max_dict_size_mask) + 1]; + } + + const uint8* pDict = m_dict.get_ptr(); + + while (fill_lookahead_size >= 3) + { + uint insert_pos = fill_lookahead_pos & m_max_dict_size_mask; + + uint c2 = pDict[insert_pos + 2]; + uint h = hash3_to_16(c0, c1, c2); + c0 = c1; + c1 = c2; + + LZHAM_ASSERT(!m_hash_thread_index.size() || (m_hash_thread_index[h] != UINT8_MAX)); + + // Only process those strings that this worker thread was assigned to - this allows us to manipulate multiple trees in parallel with no worries about synchronization. + if (m_hash_thread_index.size() && (m_hash_thread_index[h] != thread_index)) + { + fill_lookahead_pos++; + fill_lookahead_size--; + fill_dict_size++; + continue; + } + + dict_match* pDstMatch = temp_matches; + + uint cur_pos = m_hash[h]; + m_hash[h] = static_cast(fill_lookahead_pos); + + uint *pLeft = &m_nodes[insert_pos].m_left; + uint *pRight = &m_nodes[insert_pos].m_right; + + const uint max_match_len = LZHAM_MIN(static_cast(CLZBase::cMaxMatchLen), fill_lookahead_size); + uint best_match_len = 2; + + const uint8* pIns = &pDict[insert_pos]; + + uint n = m_max_probes; + for ( ; ; ) + { + uint delta_pos = fill_lookahead_pos - cur_pos; + if ((n-- == 0) || (!delta_pos) || (delta_pos >= fill_dict_size)) + { + *pLeft = 0; + *pRight = 0; + break; + } + + uint pos = cur_pos & m_max_dict_size_mask; + node *pNode = &m_nodes[pos]; + + // Unfortunately, the initial compare match_len must be 0 because of the way we hash and truncate matches at the end of each block. + uint match_len = 0; + const uint8* pComp = &pDict[pos]; + +#if LZHAM_PLATFORM_X360 + for ( ; match_len < max_match_len; match_len++) + if (pComp[match_len] != pIns[match_len]) + break; +#else + // Compare a qword at a time for a bit more efficiency. + const uint64* pComp_end = reinterpret_cast(pComp + max_match_len - 7); + const uint64* pComp_cur = reinterpret_cast(pComp); + const uint64* pIns_cur = reinterpret_cast(pIns); + while (pComp_cur < pComp_end) + { + if (*pComp_cur != *pIns_cur) + break; + pComp_cur++; + pIns_cur++; + } + uint alt_match_len = static_cast(reinterpret_cast(pComp_cur) - reinterpret_cast(pComp)); + for ( ; alt_match_len < max_match_len; alt_match_len++) + if (pComp[alt_match_len] != pIns[alt_match_len]) + break; +#ifdef LZVERIFY + for ( ; match_len < max_match_len; match_len++) + if (pComp[match_len] != pIns[match_len]) + break; + LZHAM_VERIFY(alt_match_len == match_len); +#endif + match_len = alt_match_len; +#endif + + if (match_len > best_match_len) + { + pDstMatch->m_len = static_cast(match_len - CLZBase::cMinMatchLen); + pDstMatch->m_dist = delta_pos; + pDstMatch++; + + best_match_len = match_len; + + if (match_len == max_match_len) + { + *pLeft = pNode->m_left; + *pRight = pNode->m_right; + break; + } + } + else if (m_all_matches) + { + pDstMatch->m_len = static_cast(match_len - CLZBase::cMinMatchLen); + pDstMatch->m_dist = delta_pos; + pDstMatch++; + } + else if ((best_match_len > 2) && (best_match_len == match_len)) + { + uint bestMatchDist = pDstMatch[-1].m_dist; + uint compMatchDist = delta_pos; + + uint bestMatchSlot, bestMatchSlotOfs; + m_pLZBase->compute_lzx_position_slot(bestMatchDist, bestMatchSlot, bestMatchSlotOfs); + + uint compMatchSlot, compMatchOfs; + m_pLZBase->compute_lzx_position_slot(compMatchDist, compMatchSlot, compMatchOfs); + + // If both matches uses the same match slot, choose the one with the offset containing the lowest nibble as these bits separately entropy coded. + // This could choose a match which is further away in the absolute sense, but closer in a coding sense. + if ( (compMatchSlot < bestMatchSlot) || + ((compMatchSlot >= 8) && (compMatchSlot == bestMatchSlot) && ((compMatchOfs & 15) < (bestMatchSlotOfs & 15))) ) + { + LZHAM_ASSERT((pDstMatch[-1].m_len + (uint)CLZBase::cMinMatchLen) == best_match_len); + pDstMatch[-1].m_dist = delta_pos; + } + else if ((match_len < max_match_len) && (compMatchSlot <= bestMatchSlot)) + { + // Choose the match which has lowest hamming distance in the mismatch byte for a tiny win on binary files. + // TODO: This competes against the prev. optimization. + uint desired_mismatch_byte = pIns[match_len]; + + uint cur_mismatch_byte = pDict[(insert_pos - bestMatchDist + match_len) & m_max_dict_size_mask]; + uint cur_mismatch_dist = g_hamming_dist[cur_mismatch_byte ^ desired_mismatch_byte]; + + uint new_mismatch_byte = pComp[match_len]; + uint new_mismatch_dist = g_hamming_dist[new_mismatch_byte ^ desired_mismatch_byte]; + if (new_mismatch_dist < cur_mismatch_dist) + { + LZHAM_ASSERT((pDstMatch[-1].m_len + (uint)CLZBase::cMinMatchLen) == best_match_len); + pDstMatch[-1].m_dist = delta_pos; + } + } + } + + uint new_pos; + if (pComp[match_len] < pIns[match_len]) + { + *pLeft = cur_pos; + pLeft = &pNode->m_right; + new_pos = pNode->m_right; + } + else + { + *pRight = cur_pos; + pRight = &pNode->m_left; + new_pos = pNode->m_left; + } + if (new_pos == cur_pos) + break; + cur_pos = new_pos; + } + + const uint num_matches = (uint)(pDstMatch - temp_matches); + + if (num_matches) + { + pDstMatch[-1].m_dist |= 0x80000000; + + const uint num_matches_to_write = LZHAM_MIN(num_matches, m_max_matches); + + const uint match_ref_ofs = atomic_exchange_add(&m_next_match_ref, num_matches_to_write); + + memcpy(&m_matches[match_ref_ofs], + temp_matches + (num_matches - num_matches_to_write), + sizeof(temp_matches[0]) * num_matches_to_write); + + // FIXME: This is going to really hurt on platforms requiring export barriers. + LZHAM_MEMORY_EXPORT_BARRIER + + atomic_exchange32((atomic32_t*)&m_match_refs[static_cast(fill_lookahead_pos - m_fill_lookahead_pos)], match_ref_ofs); + } + else + { + atomic_exchange32((atomic32_t*)&m_match_refs[static_cast(fill_lookahead_pos - m_fill_lookahead_pos)], -2); + } + + fill_lookahead_pos++; + fill_lookahead_size--; + fill_dict_size++; + } + + while (fill_lookahead_size) + { + uint insert_pos = fill_lookahead_pos & m_max_dict_size_mask; + m_nodes[insert_pos].m_left = 0; + m_nodes[insert_pos].m_right = 0; + + atomic_exchange32((atomic32_t*)&m_match_refs[static_cast(fill_lookahead_pos - m_fill_lookahead_pos)], -2); + + fill_lookahead_pos++; + fill_lookahead_size--; + fill_dict_size++; + } + + atomic_increment32(&m_num_completed_helper_threads); + } + + bool search_accelerator::find_len2_matches() + { + if (!m_digram_hash.size()) + { + if (!m_digram_hash.try_resize(cDigramHashSize)) + return false; + } + + if (m_digram_next.size() < m_lookahead_size) + { + if (!m_digram_next.try_resize(m_lookahead_size)) + return false; + } + + uint lookahead_dict_pos = m_lookahead_pos & m_max_dict_size_mask; + + for (int lookahead_ofs = 0; lookahead_ofs < ((int)m_lookahead_size - 1); ++lookahead_ofs, ++lookahead_dict_pos) + { + uint c0 = m_dict[lookahead_dict_pos]; + uint c1 = m_dict[lookahead_dict_pos + 1]; + + uint h = hash2_to_12(c0, c1) & (cDigramHashSize - 1); + + m_digram_next[lookahead_ofs] = m_digram_hash[h]; + m_digram_hash[h] = m_lookahead_pos + lookahead_ofs; + } + + m_digram_next[m_lookahead_size - 1] = 0; + + return true; + } + + uint search_accelerator::get_len2_match(uint lookahead_ofs) + { + if ((m_fill_lookahead_size - lookahead_ofs) < 2) + return 0; + + uint cur_pos = m_lookahead_pos + lookahead_ofs; + + uint next_match_pos = m_digram_next[cur_pos - m_fill_lookahead_pos]; + + uint match_dist = cur_pos - next_match_pos; + + if ((!match_dist) || (match_dist > CLZBase::cMaxLen2MatchDist) || (match_dist > (m_cur_dict_size + lookahead_ofs))) + return 0; + + const uint8* pCur = &m_dict[cur_pos & m_max_dict_size_mask]; + const uint8* pMatch = &m_dict[next_match_pos & m_max_dict_size_mask]; + + if ((pCur[0] == pMatch[0]) && (pCur[1] == pMatch[1])) + return match_dist; + + return 0; + } + + bool search_accelerator::find_all_matches(uint num_bytes) + { + if (!m_matches.try_resize_no_construct(m_max_probes * num_bytes)) + return false; + + if (!m_match_refs.try_resize_no_construct(num_bytes)) + return false; + + memset(m_match_refs.get_ptr(), 0xFF, m_match_refs.size_in_bytes()); + + m_fill_lookahead_pos = m_lookahead_pos; + m_fill_lookahead_size = num_bytes; + m_fill_dict_size = m_cur_dict_size; + + m_next_match_ref = 0; + + if (!m_pTask_pool) + { + find_all_matches_callback(0, NULL); + + m_num_completed_helper_threads = 0; + } + else + { + if (!m_hash_thread_index.try_resize_no_construct(0x10000)) + return false; + + memset(m_hash_thread_index.get_ptr(), 0xFF, m_hash_thread_index.size_in_bytes()); + + uint next_thread_index = 0; + const uint8* pDict = &m_dict[m_lookahead_pos & m_max_dict_size_mask]; + uint num_unique_trigrams = 0; + + if (num_bytes >= 3) + { + uint c0 = pDict[0]; + uint c1 = pDict[1]; + + const int limit = ((int)num_bytes - 2); + for (int i = 0; i < limit; i++) + { + uint c2 = pDict[2]; + uint t = hash3_to_16(c0, c1, c2); + c0 = c1; + c1 = c2; + + pDict++; + + if (m_hash_thread_index[t] == UINT8_MAX) + { + num_unique_trigrams++; + + m_hash_thread_index[t] = static_cast(next_thread_index); + if (++next_thread_index == m_max_helper_threads) + next_thread_index = 0; + } + } + } + + m_num_completed_helper_threads = 0; + + if (!m_pTask_pool->queue_multiple_object_tasks(this, &search_accelerator::find_all_matches_callback, 0, m_max_helper_threads)) + return false; + } + + return find_len2_matches(); + } + + bool search_accelerator::add_bytes_begin(uint num_bytes, const uint8* pBytes) + { + LZHAM_ASSERT(num_bytes <= m_max_dict_size); + LZHAM_ASSERT(!m_lookahead_size); + + uint add_pos = m_lookahead_pos & m_max_dict_size_mask; + LZHAM_ASSERT((add_pos + num_bytes) <= m_max_dict_size); + + memcpy(&m_dict[add_pos], pBytes, num_bytes); + + uint dict_bytes_to_mirror = LZHAM_MIN(static_cast(CLZBase::cMaxHugeMatchLen), m_max_dict_size); + if (add_pos < dict_bytes_to_mirror) + memcpy(&m_dict[m_max_dict_size], &m_dict[0], dict_bytes_to_mirror); + + m_lookahead_size = num_bytes; + + uint max_possible_dict_size = m_max_dict_size - num_bytes; + m_cur_dict_size = LZHAM_MIN(m_cur_dict_size, max_possible_dict_size); + + m_next_match_ref = 0; + + return find_all_matches(num_bytes); + } + + void search_accelerator::add_bytes_end() + { + if (m_pTask_pool) + { + m_pTask_pool->join(); + } + + LZHAM_ASSERT((uint)m_next_match_ref <= m_matches.size()); + } + + dict_match* search_accelerator::find_matches(uint lookahead_ofs, bool spin) + { + LZHAM_ASSERT(lookahead_ofs < m_lookahead_size); + + const uint match_ref_ofs = static_cast(m_lookahead_pos - m_fill_lookahead_pos + lookahead_ofs); + + int match_ref; + uint spin_count = 0; + + // This may spin until the match finder job(s) catch up to the caller's lookahead position. + for ( ; ; ) + { + match_ref = m_match_refs[match_ref_ofs]; + if (match_ref == -2) + return NULL; + else if (match_ref != -1) + break; + + spin_count++; + const uint cMaxSpinCount = 1000; + if ((spin) && (spin_count < cMaxSpinCount)) + { + lzham_yield_processor(); + lzham_yield_processor(); + lzham_yield_processor(); + lzham_yield_processor(); + lzham_yield_processor(); + lzham_yield_processor(); + lzham_yield_processor(); + lzham_yield_processor(); + + LZHAM_MEMORY_IMPORT_BARRIER + } + else + { + spin_count = cMaxSpinCount; + + lzham_sleep(1); + } + } + + LZHAM_MEMORY_IMPORT_BARRIER + + return &m_matches[match_ref]; + } + + void search_accelerator::advance_bytes(uint num_bytes) + { + LZHAM_ASSERT(num_bytes <= m_lookahead_size); + + m_lookahead_pos += num_bytes; + m_lookahead_size -= num_bytes; + + m_cur_dict_size += num_bytes; + LZHAM_ASSERT(m_cur_dict_size <= m_max_dict_size); + } +} diff --git a/r5dev/thirdparty/lzham/lzham_mem.cpp b/r5dev/thirdparty/lzham/lzham_mem.cpp new file mode 100644 index 00000000..0c961ed6 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzham_mem.cpp @@ -0,0 +1,272 @@ +// File: lzham_mem.cpp +// See Copyright Notice and license at the end of include/lzham.h +#include "include/lzham_core.h" +#include +#include +#include + +using namespace lzham; + +#define LZHAM_MEM_STATS 0 + +#ifndef LZHAM_USE_WIN32_API + #define _msize malloc_usable_size +#endif + +namespace lzham +{ + #if LZHAM_64BIT_POINTERS + const uint64 MAX_POSSIBLE_BLOCK_SIZE = 0x400000000ULL; + #else + const uint32 MAX_POSSIBLE_BLOCK_SIZE = 0x7FFF0000U; + #endif + + #if LZHAM_MEM_STATS + #if LZHAM_64BIT_POINTERS + typedef atomic64_t mem_stat_t; + #define LZHAM_MEM_COMPARE_EXCHANGE atomic_compare_exchange64 + #else + typedef atomic32_t mem_stat_t; + #define LZHAM_MEM_COMPARE_EXCHANGE atomic_compare_exchange32 + #endif + + static volatile atomic32_t g_total_blocks; + static volatile mem_stat_t g_total_allocated; + static volatile mem_stat_t g_max_allocated; + + static mem_stat_t update_total_allocated(int block_delta, mem_stat_t byte_delta) + { + atomic32_t cur_total_blocks; + for ( ; ; ) + { + cur_total_blocks = g_total_blocks; + atomic32_t new_total_blocks = static_cast(cur_total_blocks + block_delta); + LZHAM_ASSERT(new_total_blocks >= 0); + if (atomic_compare_exchange32(&g_total_blocks, new_total_blocks, cur_total_blocks) == cur_total_blocks) + break; + } + + mem_stat_t cur_total_allocated, new_total_allocated; + for ( ; ; ) + { + cur_total_allocated = g_total_allocated; + new_total_allocated = static_cast(cur_total_allocated + byte_delta); + LZHAM_ASSERT(new_total_allocated >= 0); + if (LZHAM_MEM_COMPARE_EXCHANGE(&g_total_allocated, new_total_allocated, cur_total_allocated) == cur_total_allocated) + break; + } + for ( ; ; ) + { + mem_stat_t cur_max_allocated = g_max_allocated; + mem_stat_t new_max_allocated = LZHAM_MAX(new_total_allocated, cur_max_allocated); + if (LZHAM_MEM_COMPARE_EXCHANGE(&g_max_allocated, new_max_allocated, cur_max_allocated) == cur_max_allocated) + break; + } + return new_total_allocated; + } + #endif // LZHAM_MEM_STATS + + static void* lzham_default_realloc(void* p, size_t size, size_t* pActual_size, lzham_bool movable, void* pUser_data) + { + LZHAM_NOTE_UNUSED(pUser_data); + + void* p_new; + + if (!p) + { + p_new = malloc(size); + LZHAM_ASSERT( (reinterpret_cast(p_new) & (LZHAM_MIN_ALLOC_ALIGNMENT - 1)) == 0 ); + + if (pActual_size) + *pActual_size = p_new ? _msize(p_new) : 0; + } + else if (!size) + { + free(p); + p_new = NULL; + + if (pActual_size) + *pActual_size = 0; + } + else + { + void* p_final_block = p; +#ifdef WIN32 + p_new = _expand(p, size); +#else + + p_new = NULL; +#endif + + if (p_new) + { + LZHAM_ASSERT( (reinterpret_cast(p_new) & (LZHAM_MIN_ALLOC_ALIGNMENT - 1)) == 0 ); + p_final_block = p_new; + } + else if (movable) + { + p_new = realloc(p, size); + + if (p_new) + { + LZHAM_ASSERT( (reinterpret_cast(p_new) & (LZHAM_MIN_ALLOC_ALIGNMENT - 1)) == 0 ); + p_final_block = p_new; + } + } + + if (pActual_size) + *pActual_size = _msize(p_final_block); + } + + return p_new; + } + + static size_t lzham_default_msize(void* p, void* pUser_data) + { + LZHAM_NOTE_UNUSED(pUser_data); + return p ? _msize(p) : 0; + } + + static lzham_realloc_func g_pRealloc = lzham_default_realloc; + static lzham_msize_func g_pMSize = lzham_default_msize; + static void* g_pUser_data; + + static inline void lzham_mem_error(const char* p_msg) + { + lzham_assert(p_msg, __FILE__, __LINE__); + } + + void* lzham_malloc(size_t size, size_t* pActual_size) + { + size = (size + sizeof(uint32) - 1U) & ~(sizeof(uint32) - 1U); + if (!size) + size = sizeof(uint32); + + if (size > MAX_POSSIBLE_BLOCK_SIZE) + { + lzham_mem_error("lzham_malloc: size too big"); + return NULL; + } + + size_t actual_size = size; + uint8* p_new = static_cast((*g_pRealloc)(NULL, size, &actual_size, true, g_pUser_data)); + + if (pActual_size) + *pActual_size = actual_size; + + if ((!p_new) || (actual_size < size)) + { + lzham_mem_error("lzham_malloc: out of memory"); + return NULL; + } + + LZHAM_ASSERT((reinterpret_cast(p_new) & (LZHAM_MIN_ALLOC_ALIGNMENT - 1)) == 0); + +#if LZHAM_MEM_STATS + update_total_allocated(1, static_cast(actual_size)); +#endif + + return p_new; + } + + void* lzham_realloc(void* p, size_t size, size_t* pActual_size, bool movable) + { + if ((ptr_bits_t)p & (LZHAM_MIN_ALLOC_ALIGNMENT - 1)) + { + lzham_mem_error("lzham_realloc: bad ptr"); + return NULL; + } + + if (size > MAX_POSSIBLE_BLOCK_SIZE) + { + lzham_mem_error("lzham_malloc: size too big"); + return NULL; + } + +#if LZHAM_MEM_STATS + size_t cur_size = p ? (*g_pMSize)(p, g_pUser_data) : 0; +#endif + + size_t actual_size = size; + void* p_new = (*g_pRealloc)(p, size, &actual_size, movable, g_pUser_data); + + if (pActual_size) + *pActual_size = actual_size; + + LZHAM_ASSERT((reinterpret_cast(p_new) & (LZHAM_MIN_ALLOC_ALIGNMENT - 1)) == 0); + +#if LZHAM_MEM_STATS + int num_new_blocks = 0; + if (p) + { + if (!p_new) + num_new_blocks = -1; + } + else if (p_new) + { + num_new_blocks = 1; + } + update_total_allocated(num_new_blocks, static_cast(actual_size) - static_cast(cur_size)); +#endif + + return p_new; + } + + void lzham_free(void* p) + { + if (!p) + return; + + if (reinterpret_cast(p) & (LZHAM_MIN_ALLOC_ALIGNMENT - 1)) + { + lzham_mem_error("lzham_free: bad ptr"); + return; + } + +#if LZHAM_MEM_STATS + size_t cur_size = (*g_pMSize)(p, g_pUser_data); + update_total_allocated(-1, -static_cast(cur_size)); +#endif + + (*g_pRealloc)(p, 0, NULL, true, g_pUser_data); + } + + size_t lzham_msize(void* p) + { + if (!p) + return 0; + + if (reinterpret_cast(p) & (LZHAM_MIN_ALLOC_ALIGNMENT - 1)) + { + lzham_mem_error("lzham_msize: bad ptr"); + return 0; + } + + return (*g_pMSize)(p, g_pUser_data); + } + + void LZHAM_CDECL lzham_lib_set_memory_callbacks(lzham_realloc_func pRealloc, lzham_msize_func pMSize, void* pUser_data) + { + if ((!pRealloc) || (!pMSize)) + { + g_pRealloc = lzham_default_realloc; + g_pMSize = lzham_default_msize; + g_pUser_data = NULL; + } + else + { + g_pRealloc = pRealloc; + g_pMSize = pMSize; + g_pUser_data = pUser_data; + } + } + + void lzham_print_mem_stats() + { +#if LZHAM_MEM_STATS + printf("Current blocks: %u, allocated: %I64u, max ever allocated: %I64i\n", g_total_blocks, (int64)g_total_allocated, (int64)g_max_allocated); +#endif + } + +} // namespace lzham + diff --git a/r5dev/thirdparty/lzham/lzham_platform.cpp b/r5dev/thirdparty/lzham/lzham_platform.cpp new file mode 100644 index 00000000..17a7d092 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzham_platform.cpp @@ -0,0 +1,146 @@ +// File: platform.cpp +// See Copyright Notice and license at the end of include/lzham.h +#include "include/lzham_core.h" +#include "include/lzham_timer.h" + +#if LZHAM_PLATFORM_X360 +#include +#endif + +#ifndef _MSC_VER +int sprintf_s(char *buffer, size_t sizeOfBuffer, const char *format, ...) +{ + if (!sizeOfBuffer) + return 0; + + va_list args; + va_start(args, format); + int c = vsnprintf(buffer, sizeOfBuffer, format, args); + va_end(args); + + buffer[sizeOfBuffer - 1] = '\0'; + + if (c < 0) + return sizeOfBuffer - 1; + + return LZHAM_MIN(c, (int)sizeOfBuffer - 1); +} +int vsprintf_s(char *buffer, size_t sizeOfBuffer, const char *format, va_list args) +{ + if (!sizeOfBuffer) + return 0; + + int c = vsnprintf(buffer, sizeOfBuffer, format, args); + + buffer[sizeOfBuffer - 1] = '\0'; + + if (c < 0) + return sizeOfBuffer - 1; + + return LZHAM_MIN(c, (int)sizeOfBuffer - 1); +} +#endif // __GNUC__ + +bool lzham_is_debugger_present(void) +{ +#if LZHAM_PLATFORM_X360 + return DmIsDebuggerPresent() != 0; +#elif LZHAM_USE_WIN32_API + return IsDebuggerPresent() != 0; +#else + return false; +#endif +} + +void lzham_debug_break(void) +{ +#if LZHAM_USE_WIN32_API + DebugBreak(); +#endif +} + +void lzham_output_debug_string(const char* p) +{ + LZHAM_NOTE_UNUSED(p); +#if LZHAM_USE_WIN32_API + OutputDebugStringA(p); +#endif +} + +#if LZHAM_BUFFERED_PRINTF +// This stuff was a quick hack only intended for debugging/development. +namespace lzham +{ + struct buffered_str + { + enum { cBufSize = 256 }; + char m_buf[cBufSize]; + }; + + static lzham::vector g_buffered_strings; + static volatile long g_buffered_string_locked; + + static void lock_buffered_strings() + { + while (atomic_exchange32(&g_buffered_string_locked, 1) == 1) + { + lzham_yield_processor(); + lzham_yield_processor(); + lzham_yield_processor(); + lzham_yield_processor(); + } + + LZHAM_MEMORY_IMPORT_BARRIER + } + + static void unlock_buffered_strings() + { + LZHAM_MEMORY_EXPORT_BARRIER + + atomic_exchange32(&g_buffered_string_locked, 0); + } + +} // namespace lzham + +void lzham_buffered_printf(const char *format, ...) +{ + format; + + char buf[lzham::buffered_str::cBufSize]; + + va_list args; + va_start(args, format); + vsnprintf_s(buf, sizeof(buf), sizeof(buf), format, args); + va_end(args); + + buf[sizeof(buf) - 1] = '\0'; + + lzham::lock_buffered_strings(); + + if (!lzham::g_buffered_strings.capacity()) + { + lzham::g_buffered_strings.try_reserve(2048); + } + + if (lzham::g_buffered_strings.try_resize(lzham::g_buffered_strings.size() + 1)) + { + memcpy(lzham::g_buffered_strings.back().m_buf, buf, sizeof(buf)); + } + + lzham::unlock_buffered_strings(); +} + +void lzham_flush_buffered_printf() +{ + lzham::lock_buffered_strings(); + + for (lzham::uint i = 0; i < lzham::g_buffered_strings.size(); i++) + { + printf("%s", lzham::g_buffered_strings[i].m_buf); + } + + lzham::g_buffered_strings.try_resize(0); + + lzham::unlock_buffered_strings(); +} +#endif diff --git a/r5dev/thirdparty/lzham/lzham_polar_codes.cpp b/r5dev/thirdparty/lzham/lzham_polar_codes.cpp new file mode 100644 index 00000000..55696fff --- /dev/null +++ b/r5dev/thirdparty/lzham/lzham_polar_codes.cpp @@ -0,0 +1,414 @@ +// File: polar_codes.cpp +// See Copyright Notice and license at the end of include/lzham.h +// +// Andrew Polar's prefix code algorithm: +// http://ezcodesample.com/prefixer/prefixer_article.html +// +// Also implements Fyffe's approximate codelength generation method, which is +// very similar but operates directly on codelengths vs. symbol frequencies: +// Fyffe Codes for Fast Codelength Approximation, Graham Fyffe, 1999 +// http://code.google.com/p/lzham/wiki/FyffeCodes +#include "include/lzham_core.h" +#include "include/lzham_polar_codes.h" + +#define LZHAM_USE_SHANNON_FANO_CODES 0 +#define LZHAM_USE_FYFFE_CODES 0 + +namespace lzham +{ + struct sym_freq + { + uint16 m_freq; + uint16 m_sym; + }; + + static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* syms0, sym_freq* syms1) + { + const uint cMaxPasses = 2; + uint hist[256 * cMaxPasses]; + + memset(hist, 0, sizeof(hist[0]) * 256 * cMaxPasses); + + { + sym_freq* p = syms0; + sym_freq* q = syms0 + (num_syms >> 1) * 2; + + for ( ; p != q; p += 2) + { + const uint freq0 = p[0].m_freq; + const uint freq1 = p[1].m_freq; + + hist[ freq0 & 0xFF]++; + hist[256 + ((freq0 >> 8) & 0xFF)]++; + + hist[ freq1 & 0xFF]++; + hist[256 + ((freq1 >> 8) & 0xFF)]++; + } + + if (num_syms & 1) + { + const uint freq = p->m_freq; + + hist[ freq & 0xFF]++; + hist[256 + ((freq >> 8) & 0xFF)]++; + } + } + + sym_freq* pCur_syms = syms0; + sym_freq* pNew_syms = syms1; + + const uint total_passes = (hist[256] == num_syms) ? 1 : cMaxPasses; + + for (uint pass = 0; pass < total_passes; pass++) + { + const uint* pHist = &hist[pass << 8]; + + uint offsets[256]; + + uint cur_ofs = 0; + for (uint i = 0; i < 256; i += 2) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + + offsets[i+1] = cur_ofs; + cur_ofs += pHist[i+1]; + } + + const uint pass_shift = pass << 3; + + sym_freq* p = pCur_syms; + sym_freq* q = pCur_syms + (num_syms >> 1) * 2; + + for ( ; p != q; p += 2) + { + uint c0 = p[0].m_freq; + uint c1 = p[1].m_freq; + + if (pass) + { + c0 >>= 8; + c1 >>= 8; + } + + c0 &= 0xFF; + c1 &= 0xFF; + + // Cut down on LHS's on console platforms by processing two at a time. + if (c0 == c1) + { + uint dst_offset0 = offsets[c0]; + + offsets[c0] = dst_offset0 + 2; + + pNew_syms[dst_offset0] = p[0]; + pNew_syms[dst_offset0 + 1] = p[1]; + } + else + { + uint dst_offset0 = offsets[c0]++; + uint dst_offset1 = offsets[c1]++; + + pNew_syms[dst_offset0] = p[0]; + pNew_syms[dst_offset1] = p[1]; + } + } + + if (num_syms & 1) + { + uint c = ((p->m_freq) >> pass_shift) & 0xFF; + + uint dst_offset = offsets[c]; + offsets[c] = dst_offset + 1; + + pNew_syms[dst_offset] = *p; + } + + sym_freq* t = pCur_syms; + pCur_syms = pNew_syms; + pNew_syms = t; + } + +#if LZHAM_ASSERTS_ENABLED + uint prev_freq = 0; + for (uint i = 0; i < num_syms; i++) + { + LZHAM_ASSERT(!(pCur_syms[i].m_freq < prev_freq)); + prev_freq = pCur_syms[i].m_freq; + } +#endif + + return pCur_syms; + } + + struct polar_work_tables + { + sym_freq syms0[cPolarMaxSupportedSyms]; + sym_freq syms1[cPolarMaxSupportedSyms]; + }; + + uint get_generate_polar_codes_table_size() + { + return sizeof(polar_work_tables); + } + + void generate_polar_codes(uint num_syms, sym_freq* pSF, uint8* pCodesizes, uint& max_code_size_ret) + { + int tmp_freq[cPolarMaxSupportedSyms]; + + uint orig_total_freq = 0; + uint cur_total = 0; + for (uint i = 0; i < num_syms; i++) + { + uint sym_freq = pSF[num_syms - 1 - i].m_freq; + orig_total_freq += sym_freq; + + uint sym_len = math::total_bits(sym_freq); + uint adjusted_sym_freq = 1 << (sym_len - 1); + tmp_freq[i] = adjusted_sym_freq; + cur_total += adjusted_sym_freq; + } + + uint tree_total = 1 << (math::total_bits(orig_total_freq) - 1); + if (tree_total < orig_total_freq) + tree_total <<= 1; + + uint start_index = 0; + while ((cur_total < tree_total) && (start_index < num_syms)) + { + for (uint i = start_index; i < num_syms; i++) + { + uint freq = tmp_freq[i]; + if ((cur_total + freq) <= tree_total) + { + tmp_freq[i] += freq; + if ((cur_total += freq) == tree_total) + break; + } + else + { + start_index = i + 1; + } + } + } + + LZHAM_ASSERT(cur_total == tree_total); + + uint max_code_size = 0; + const uint tree_total_bits = math::total_bits(tree_total); + for (uint i = 0; i < num_syms; i++) + { + uint codesize = (tree_total_bits - math::total_bits(tmp_freq[i])); + max_code_size = LZHAM_MAX(codesize, max_code_size); + pCodesizes[pSF[num_syms-1-i].m_sym] = static_cast(codesize); + } + max_code_size_ret = max_code_size; + } + +#if LZHAM_USE_FYFFE_CODES + void generate_fyffe_codes(uint num_syms, sym_freq* pSF, uint8* pCodesizes, uint& max_code_size_ret) + { + int tmp_codesizes[cPolarMaxSupportedSyms]; + + uint cur_total = 0; + uint orig_total = 0; + for (uint i = 0; i < num_syms; i++) + { + uint sym_freq = pSF[i].m_freq; + orig_total += sym_freq; + + // Compute the nearest power of 2 lower or equal to the symbol's frequency. + // This is equivalent to codesize=ceil(-log2(sym_prob)). + uint floor_sym_freq = sym_freq; + if (!math::is_power_of_2(floor_sym_freq)) + { + uint sym_freq_bits = math::total_bits(sym_freq); + floor_sym_freq = 1 << (sym_freq_bits - 1); + } + + // Compute preliminary codesizes. tmp_freq's will always be <= the input frequencies. + tmp_codesizes[i] = math::total_bits(floor_sym_freq); + cur_total += floor_sym_freq; + } + + // Desired_total is a power of 2, and will always be >= the adjusted frequency total. + uint desired_total = cur_total; + if (!math::is_power_of_2(desired_total)) + desired_total = math::next_pow2(desired_total); + + LZHAM_ASSERT(cur_total <= desired_total); + + // Compute residual and initial symbol codesizes. + uint desired_total_bits = math::total_bits(desired_total); + int r = desired_total; + for (uint i = 0; i < num_syms; i++) + { + uint codesize = desired_total_bits - tmp_codesizes[i]; + tmp_codesizes[i] = static_cast(codesize); + r -= (desired_total >> codesize); + } + + LZHAM_ASSERT(r >= 0); + + int sym_freq_scale = (desired_total << 7) / orig_total; + + // Promote codesizes from most probable to lowest, as needed. + bool force_unhappiness = false; + while (r > 0) + { + for (int i = num_syms - 1; i >= 0; i--) + { + uint codesize = tmp_codesizes[i]; + if (codesize == 1) + continue; + + int sym_freq = pSF[i].m_freq; + int f = desired_total >> codesize; + if (f > r) + continue; + + // A code is "unhappy" when it is assigned more bits than -log2(sym_prob). + // It's too expensive to compute -log2(sym_freq/total_freq), so instead this directly compares the symbol's original + // frequency vs. the effective/adjusted frequency. sym_freq >= f is an approximation. + //bool unhappy = force_unhappiness || (sym_freq >= f); + + // Compare the symbol's original probability vs. its effective probability at its current codelength. + //bool unhappy = force_unhappiness || ((sym_freq * ((float)desired_total / orig_total)) > f); + bool unhappy = force_unhappiness || ((sym_freq * sym_freq_scale) > (f << 7)); + + if (unhappy) + { + tmp_codesizes[i]--; + + r -= f; + if (r <= 0) + break; + } + } + // Occasionally, a second pass is required to reduce the residual to 0. + // Subsequent passes ignore unhappiness. This is not discussed in Fyffe's original article. + force_unhappiness = true; + } + + LZHAM_ASSERT(!r); + + uint max_code_size = 0; + + for (uint i = 0; i < num_syms; i++) + { + uint codesize = tmp_codesizes[i]; + max_code_size = LZHAM_MAX(codesize, max_code_size); + pCodesizes[pSF[i].m_sym] = static_cast(codesize); + } + max_code_size_ret = max_code_size; + } +#endif //LZHAM_USE_FYFFE_CODES + +#if LZHAM_USE_SHANNON_FANO_CODES + // Straightforward recursive Shannon-Fano implementation, for comparison purposes. + static void generate_shannon_fano_codes_internal(uint num_syms, sym_freq* pSF, uint8* pCodesizes, int l, int h, uint total_freq) + { + LZHAM_ASSERT((h - l) >= 2); + + uint left_total = total_freq; + uint right_total = 0; + int best_diff = INT_MAX; + int best_split_index = 0; + for (int i = h - 1; i > l; i--) + { + uint freq = pSF[i].m_freq; + uint next_left_total = left_total - freq; + uint next_right_total = right_total + freq; + LZHAM_ASSERT((next_left_total + next_right_total) == total_freq); + + int diff = labs(next_left_total - next_right_total); + if (diff >= best_diff) + break; + + left_total = next_left_total; + right_total = next_right_total; + best_split_index = i; + best_diff = diff; + if (!best_diff) + break; + } + + for (int i = l; i < h; i++) + pCodesizes[i]++; + + if ((best_split_index - l) > 1) generate_shannon_fano_codes_internal(num_syms, pSF, pCodesizes, l, best_split_index, left_total); + if ((h - best_split_index) > 1) generate_shannon_fano_codes_internal(num_syms, pSF, pCodesizes, best_split_index, h, right_total); + } + + void generate_shannon_fano_codes(uint num_syms, sym_freq* pSF, uint total_freq, uint8* pCodesizes, uint& max_code_size_ret) + { + LZHAM_ASSERT(num_syms >= 2); + uint8 tmp_codesizes[cPolarMaxSupportedSyms]; + memset(tmp_codesizes, 0, num_syms); + + generate_shannon_fano_codes_internal(num_syms, pSF, tmp_codesizes, 0, num_syms, total_freq); + + uint max_code_size = 0; + for (uint i = 0; i < num_syms; i++) + { + uint codesize = tmp_codesizes[i]; + max_code_size = LZHAM_MAX(codesize, max_code_size); + pCodesizes[pSF[i].m_sym] = static_cast(codesize); + } + max_code_size_ret = max_code_size; + } +#endif // LZHAM_USE_SHANNON_FANO_CODES + + bool generate_polar_codes(void* pContext, uint num_syms, const uint16* pFreq, uint8* pCodesizes, uint& max_code_size, uint& total_freq_ret) + { + if ((!num_syms) || (num_syms > cPolarMaxSupportedSyms)) + return false; + + polar_work_tables& state = *static_cast(pContext);; + + uint max_freq = 0; + uint total_freq = 0; + + uint num_used_syms = 0; + for (uint i = 0; i < num_syms; i++) + { + uint freq = pFreq[i]; + + if (!freq) + pCodesizes[i] = 0; + else + { + total_freq += freq; + max_freq = math::maximum(max_freq, freq); + + sym_freq& sf = state.syms0[num_used_syms]; + sf.m_sym = static_cast(i); + sf.m_freq = static_cast(freq); + num_used_syms++; + } + } + + total_freq_ret = total_freq; + + if (num_used_syms == 1) + { + pCodesizes[state.syms0[0].m_sym] = 1; + } + else + { + sym_freq* syms = radix_sort_syms(num_used_syms, state.syms0, state.syms1); + +#if LZHAM_USE_SHANNON_FANO_CODES + generate_shannon_fano_codes(num_syms, syms, total_freq, pCodesizes, max_code_size); +#elif LZHAM_USE_FYFFE_CODES + generate_fyffe_codes(num_syms, syms, pCodesizes, max_code_size); +#else + generate_polar_codes(num_syms, syms, pCodesizes, max_code_size); +#endif + } + + return true; + } + +} // namespace lzham + diff --git a/r5dev/thirdparty/lzham/lzham_prefix_coding.cpp b/r5dev/thirdparty/lzham/lzham_prefix_coding.cpp new file mode 100644 index 00000000..47ef9260 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzham_prefix_coding.cpp @@ -0,0 +1,350 @@ +// File: lzham_prefix_coding.cpp +// See Copyright Notice and license at the end of include/lzham.h +#include "include/lzham_core.h" +#include "include/lzham_prefix_coding.h" + +#ifdef LZHAM_BUILD_DEBUG + //#define TEST_DECODER_TABLES +#endif + +namespace lzham +{ + namespace prefix_coding + { + bool limit_max_code_size(uint num_syms, uint8* pCodesizes, uint max_code_size) + { + const uint cMaxEverCodeSize = 34; + + if ((!num_syms) || (num_syms > cMaxSupportedSyms) || (max_code_size < 1) || (max_code_size > cMaxEverCodeSize)) + return false; + + uint num_codes[cMaxEverCodeSize + 1]; + utils::zero_object(num_codes); + + bool should_limit = false; + + for (uint i = 0; i < num_syms; i++) + { + uint c = pCodesizes[i]; + + LZHAM_ASSERT(c <= cMaxEverCodeSize); + + num_codes[c]++; + if (c > max_code_size) + should_limit = true; + } + + if (!should_limit) + return true; + + uint ofs = 0; + uint next_sorted_ofs[cMaxEverCodeSize + 1]; + for (uint i = 1; i <= cMaxEverCodeSize; i++) + { + next_sorted_ofs[i] = ofs; + ofs += num_codes[i]; + } + + if ((ofs < 2) || (ofs > cMaxSupportedSyms)) + return true; + + if (ofs > (1U << max_code_size)) + return false; + + for (uint i = max_code_size + 1; i <= cMaxEverCodeSize; i++) + num_codes[max_code_size] += num_codes[i]; + + // Technique of adjusting tree to enforce maximum code size from LHArc. + + uint total = 0; + for (uint i = max_code_size; i; --i) + total += (num_codes[i] << (max_code_size - i)); + + if (total == (1U << max_code_size)) + return true; + + do + { + num_codes[max_code_size]--; + + uint i; + for (i = max_code_size - 1; i; --i) + { + if (!num_codes[i]) + continue; + num_codes[i]--; + num_codes[i + 1] += 2; + break; + } + if (!i) + return false; + + total--; + } while (total != (1U << max_code_size)); + + uint8 new_codesizes[cMaxSupportedSyms]; + uint8* p = new_codesizes; + for (uint i = 1; i <= max_code_size; i++) + { + uint n = num_codes[i]; + if (n) + { + memset(p, i, n); + p += n; + } + } + + for (uint i = 0; i < num_syms; i++) + { + const uint c = pCodesizes[i]; + if (c) + { + uint next_ofs = next_sorted_ofs[c]; + next_sorted_ofs[c] = next_ofs + 1; + + pCodesizes[i] = static_cast(new_codesizes[next_ofs]); + } + } + + return true; + } + + bool generate_codes(uint num_syms, const uint8* pCodesizes, uint16* pCodes) + { + uint num_codes[cMaxExpectedCodeSize + 1]; + utils::zero_object(num_codes); + + for (uint i = 0; i < num_syms; i++) + { + uint c = pCodesizes[i]; + LZHAM_ASSERT(c <= cMaxExpectedCodeSize); + num_codes[c]++; + } + + uint code = 0; + + uint next_code[cMaxExpectedCodeSize + 1]; + next_code[0] = 0; + + for (uint i = 1; i <= cMaxExpectedCodeSize; i++) + { + next_code[i] = code; + + code = (code + num_codes[i]) << 1; + } + + if (code != (1 << (cMaxExpectedCodeSize + 1))) + { + uint t = 0; + for (uint i = 1; i <= cMaxExpectedCodeSize; i++) + { + t += num_codes[i]; + if (t > 1) + return false; + } + } + + for (uint i = 0; i < num_syms; i++) + { + uint c = pCodesizes[i]; + + LZHAM_ASSERT(!c || (next_code[c] <= UINT16_MAX)); + + pCodes[i] = static_cast(next_code[c]++); + + LZHAM_ASSERT(!c || (math::total_bits(pCodes[i]) <= pCodesizes[i])); + } + + return true; + } + + bool generate_decoder_tables(uint num_syms, const uint8* pCodesizes, decoder_tables* pTables, uint table_bits) + { + uint min_codes[cMaxExpectedCodeSize]; + + if ((!num_syms) || (table_bits > cMaxTableBits)) + return false; + + pTables->m_num_syms = num_syms; + + uint num_codes[cMaxExpectedCodeSize + 1]; + utils::zero_object(num_codes); + + for (uint i = 0; i < num_syms; i++) + { + uint c = pCodesizes[i]; + num_codes[c]++; + } + + uint sorted_positions[cMaxExpectedCodeSize + 1]; + + uint next_code = 0; + + uint total_used_syms = 0; + uint max_code_size = 0; + uint min_code_size = UINT_MAX; + for (uint i = 1; i <= cMaxExpectedCodeSize; i++) + { + const uint n = num_codes[i]; + + if (!n) + pTables->m_max_codes[i - 1] = 0;//UINT_MAX; + else + { + min_code_size = math::minimum(min_code_size, i); + max_code_size = math::maximum(max_code_size, i); + + min_codes[i - 1] = next_code; + + pTables->m_max_codes[i - 1] = next_code + n - 1; + pTables->m_max_codes[i - 1] = 1 + ((pTables->m_max_codes[i - 1] << (16 - i)) | ((1 << (16 - i)) - 1)); + + pTables->m_val_ptrs[i - 1] = total_used_syms; + + sorted_positions[i] = total_used_syms; + + next_code += n; + total_used_syms += n; + } + + next_code <<= 1; + } + + pTables->m_total_used_syms = total_used_syms; + + if (total_used_syms > pTables->m_cur_sorted_symbol_order_size) + { + pTables->m_cur_sorted_symbol_order_size = total_used_syms; + + if (!math::is_power_of_2(total_used_syms)) + pTables->m_cur_sorted_symbol_order_size = math::minimum(num_syms, math::next_pow2(total_used_syms)); + + if (pTables->m_sorted_symbol_order) + { + lzham_delete_array(pTables->m_sorted_symbol_order); + pTables->m_sorted_symbol_order = NULL; + } + + pTables->m_sorted_symbol_order = lzham_new_array(pTables->m_cur_sorted_symbol_order_size); + if (!pTables->m_sorted_symbol_order) + return false; + } + + pTables->m_min_code_size = static_cast(min_code_size); + pTables->m_max_code_size = static_cast(max_code_size); + + for (uint i = 0; i < num_syms; i++) + { + uint c = pCodesizes[i]; + if (c) + { + LZHAM_ASSERT(num_codes[c]); + + uint sorted_pos = sorted_positions[c]++; + + LZHAM_ASSERT(sorted_pos < total_used_syms); + + pTables->m_sorted_symbol_order[sorted_pos] = static_cast(i); + } + } + + if (table_bits <= pTables->m_min_code_size) + table_bits = 0; + pTables->m_table_bits = table_bits; + + if (table_bits) + { + uint table_size = 1 << table_bits; + if (table_size > pTables->m_cur_lookup_size) + { + pTables->m_cur_lookup_size = table_size; + + if (pTables->m_lookup) + { + lzham_delete_array(pTables->m_lookup); + pTables->m_lookup = NULL; + } + + pTables->m_lookup = lzham_new_array(table_size); + if (!pTables->m_lookup) + return false; + } + + memset(pTables->m_lookup, 0xFF, static_cast(sizeof(pTables->m_lookup[0])) * (1UL << table_bits)); + + for (uint codesize = 1; codesize <= table_bits; codesize++) + { + if (!num_codes[codesize]) + continue; + + const uint fillsize = table_bits - codesize; + const uint fillnum = 1 << fillsize; + + const uint min_code = min_codes[codesize - 1]; + const uint max_code = pTables->get_unshifted_max_code(codesize); + const uint val_ptr = pTables->m_val_ptrs[codesize - 1]; + + for (uint code = min_code; code <= max_code; code++) + { + const uint sym_index = pTables->m_sorted_symbol_order[ val_ptr + code - min_code ]; + LZHAM_ASSERT( pCodesizes[sym_index] == codesize ); + + for (uint j = 0; j < fillnum; j++) + { + const uint t = j + (code << fillsize); + + LZHAM_ASSERT(t < (1U << table_bits)); + + LZHAM_ASSERT(pTables->m_lookup[t] == UINT32_MAX); + + pTables->m_lookup[t] = sym_index | (codesize << 16U); + } + } + } + } + + for (uint i = 0; i < cMaxExpectedCodeSize; i++) + pTables->m_val_ptrs[i] -= min_codes[i]; + + pTables->m_table_max_code = 0; + pTables->m_decode_start_code_size = pTables->m_min_code_size; + + if (table_bits) + { + uint i; + for (i = table_bits; i >= 1; i--) + { + if (num_codes[i]) + { + pTables->m_table_max_code = pTables->m_max_codes[i - 1]; + break; + } + } + if (i >= 1) + { + pTables->m_decode_start_code_size = table_bits + 1; + for (i = table_bits + 1; i <= max_code_size; i++) + { + if (num_codes[i]) + { + pTables->m_decode_start_code_size = i; + break; + } + } + } + } + + // sentinels + pTables->m_max_codes[cMaxExpectedCodeSize] = UINT_MAX; + pTables->m_val_ptrs[cMaxExpectedCodeSize] = 0xFFFFF; + + pTables->m_table_shift = 32 - pTables->m_table_bits; + + return true; + } + + } // namespace prefix_codig + +} // namespace lzham + + diff --git a/r5dev/thirdparty/lzham/lzham_pthreads_threading.cpp b/r5dev/thirdparty/lzham/lzham_pthreads_threading.cpp new file mode 100644 index 00000000..511d740d --- /dev/null +++ b/r5dev/thirdparty/lzham/lzham_pthreads_threading.cpp @@ -0,0 +1,227 @@ +// File: lzham_task_pool_pthreads.cpp +// +// Copyright (c) 2009-2010 Richard Geldreich, Jr. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +#include "include/lzham_core.h" +#include "include/lzham_pthreads_threading.h" +#include "include/lzham_timer.h" + +#ifdef WIN32 +#include +#endif + +#if defined(__GNUC__) +#include +#endif + +#if LZHAM_USE_PTHREADS_API + +#ifdef WIN32 +#pragma comment(lib, "../ext/libpthread/lib/pthreadVC2.lib") +#endif + +namespace lzham +{ + task_pool::task_pool() : + m_num_threads(0), + m_tasks_available(0, 32767), + m_num_outstanding_tasks(0), + m_exit_flag(false) + { + utils::zero_object(m_threads); + } + + task_pool::task_pool(uint num_threads) : + m_num_threads(0), + m_tasks_available(0, 32767), + m_num_outstanding_tasks(0), + m_exit_flag(false) + { + utils::zero_object(m_threads); + + bool status = init(num_threads); + LZHAM_VERIFY(status); + } + + task_pool::~task_pool() + { + deinit(); + } + + bool task_pool::init(uint num_threads) + { + LZHAM_ASSERT(num_threads <= cMaxThreads); + num_threads = math::minimum(num_threads, cMaxThreads); + + deinit(); + + bool succeeded = true; + + m_num_threads = 0; + while (m_num_threads < num_threads) + { + int status = pthread_create(&m_threads[m_num_threads], NULL, thread_func, this); + if (status) + { + succeeded = false; + break; + } + + m_num_threads++; + } + + if (!succeeded) + { + deinit(); + return false; + } + + return true; + } + + void task_pool::deinit() + { + if (m_num_threads) + { + join(); + + atomic_exchange32(&m_exit_flag, true); + + m_tasks_available.release(m_num_threads); + + for (uint i = 0; i < m_num_threads; i++) + pthread_join(m_threads[i], NULL); + + m_num_threads = 0; + + atomic_exchange32(&m_exit_flag, false); + } + + m_task_stack.clear(); + m_num_outstanding_tasks = 0; + } + + bool task_pool::queue_task(task_callback_func pFunc, uint64 data, void* pData_ptr) + { + LZHAM_ASSERT(m_num_threads); + LZHAM_ASSERT(pFunc); + + task tsk; + tsk.m_callback = pFunc; + tsk.m_data = data; + tsk.m_pData_ptr = pData_ptr; + tsk.m_flags = 0; + + if (!m_task_stack.try_push(tsk)) + return false; + + atomic_increment32(&m_num_outstanding_tasks); + + m_tasks_available.release(1); + + return true; + } + + // It's the object's responsibility to delete pObj within the execute_task() method, if needed! + bool task_pool::queue_task(executable_task* pObj, uint64 data, void* pData_ptr) + { + LZHAM_ASSERT(m_num_threads); + LZHAM_ASSERT(pObj); + + task tsk; + tsk.m_pObj = pObj; + tsk.m_data = data; + tsk.m_pData_ptr = pData_ptr; + tsk.m_flags = cTaskFlagObject; + + if (!m_task_stack.try_push(tsk)) + return false; + + atomic_increment32(&m_num_outstanding_tasks); + + m_tasks_available.release(1); + + return true; + } + + void task_pool::process_task(task& tsk) + { + if (tsk.m_flags & cTaskFlagObject) + tsk.m_pObj->execute_task(tsk.m_data, tsk.m_pData_ptr); + else + tsk.m_callback(tsk.m_data, tsk.m_pData_ptr); + + atomic_decrement32(&m_num_outstanding_tasks); + } + + void task_pool::join() + { + task tsk; + while (atomic_add32(&m_num_outstanding_tasks, 0) > 0) + { + if (m_task_stack.pop(tsk)) + { + process_task(tsk); + } + else + { + lzham_sleep(1); + } + } + } + + void * task_pool::thread_func(void *pContext) + { + task_pool* pPool = static_cast(pContext); + task tsk; + + for ( ; ; ) + { + if (!pPool->m_tasks_available.wait()) + break; + + if (pPool->m_exit_flag) + break; + + if (pPool->m_task_stack.pop(tsk)) + { + pPool->process_task(tsk); + } + } + + return NULL; + } + + uint lzham_get_max_helper_threads() + { +#if defined(__GNUC__) + uint num_procs = get_nprocs(); + return num_procs ? (num_procs - 1) : 0; +#else + printf("TODO: lzham_get_max_helper_threads(): Implement system specific func to determine the max # of helper threads\n"); + + // Just assume a dual-core machine. + return 1; +#endif + } + +} // namespace lzham + +#endif // LZHAM_USE_PTHREADS_API diff --git a/r5dev/thirdparty/lzham/lzham_symbol_codec.cpp b/r5dev/thirdparty/lzham/lzham_symbol_codec.cpp new file mode 100644 index 00000000..9eeb6c62 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzham_symbol_codec.cpp @@ -0,0 +1,1453 @@ +// File: lzham_symbol_codec.cpp +// See Copyright Notice and license at the end of include/lzham.h +#include "include/lzham_core.h" +#include "include/lzham_symbol_codec.h" +#include "include/lzham_huffman_codes.h" +#include "include/lzham_polar_codes.h" + +// Set to 1 to enable ~2x more frequent Huffman table updating (at slower decompression). +#define LZHAM_MORE_FREQUENT_TABLE_UPDATING 1 + +namespace lzham +{ + // Using a fixed table to convert from scaled probability to scaled bits for determinism across compilers/run-time libs/platforms. + uint32 g_prob_cost[cSymbolCodecArithProbScale] = + { + 0x0,0xB000000,0xA000000,0x96A3FE6,0x9000000,0x8AD961F,0x86A3FE6,0x8315130,0x8000000,0x7D47FCC,0x7AD961F, + 0x78A62B0,0x76A3FE6,0x74CAFFC,0x7315130,0x717D605,0x7000000,0x6E99C09,0x6D47FCC,0x6C087D3,0x6AD961F,0x69B9116, + 0x68A62B0,0x679F7D8,0x66A3FE6,0x65B2C3E,0x64CAFFC,0x63EBFB1,0x6315130,0x6245B5C,0x617D605,0x60BB9CA,0x6000000, + 0x5F4A296,0x5E99C09,0x5DEE74F,0x5D47FCC,0x5CA6144,0x5C087D3,0x5B6EFE1,0x5AD961F,0x5A47779,0x59B9116,0x592E050, + 0x58A62B0,0x58215EA,0x579F7D8,0x5720677,0x56A3FE6,0x562A260,0x55B2C3E,0x553DBEF,0x54CAFFC,0x545A701,0x53EBFB1, + 0x537F8CF,0x5315130,0x52AC7B8,0x5245B5C,0x51E0B1B,0x517D605,0x511BB33,0x50BB9CA,0x505D0FC,0x5000000,0x4FA461A, + 0x4F4A296,0x4EF14C7,0x4E99C09,0x4E437BE,0x4DEE74F,0x4D9AA2C,0x4D47FCC,0x4CF67A8,0x4CA6144,0x4C56C23,0x4C087D3, + 0x4BBB3E1,0x4B6EFE1,0x4B23B6D,0x4AD961F,0x4A8FF97,0x4A47779,0x49FFD6A,0x49B9116,0x4973228,0x492E050,0x48E9B41, + 0x48A62B0,0x4863655,0x48215EA,0x47E012C,0x479F7D8,0x475F9B0,0x4720677,0x46E1DF1,0x46A3FE6,0x4666C1D,0x462A260, + 0x45EE27C,0x45B2C3E,0x4577F74,0x453DBEF,0x4504180,0x44CAFFC,0x4492735,0x445A701,0x4422F38,0x43EBFB1,0x43B5846, + 0x437F8CF,0x434A129,0x4315130,0x42E08C0,0x42AC7B8,0x4278DF7,0x4245B5C,0x4212FC7,0x41E0B1B,0x41AED39,0x417D605, + 0x414C561,0x411BB33,0x40EB75F,0x40BB9CA,0x408C25C,0x405D0FC,0x402E58F,0x4000000,0x3FD2036,0x3FA461A,0x3F77197, + 0x3F4A296,0x3F1D903,0x3EF14C7,0x3EC55D0,0x3E99C09,0x3E6E75F,0x3E437BE,0x3E18D14,0x3DEE74F,0x3DC465D,0x3D9AA2C, + 0x3D712AC,0x3D47FCC,0x3D1F17A,0x3CF67A8,0x3CCE246,0x3CA6144,0x3C7E492,0x3C56C23,0x3C2F7E8,0x3C087D3,0x3BE1BD5, + 0x3BBB3E1,0x3B94FE9,0x3B6EFE1,0x3B493BC,0x3B23B6D,0x3AFE6E7,0x3AD961F,0x3AB4908,0x3A8FF97,0x3A6B9C0,0x3A47779, + 0x3A238B5,0x39FFD6A,0x39DC58E,0x39B9116,0x3995FF7,0x3973228,0x395079E,0x392E050,0x390BC34,0x38E9B41,0x38C7D6E, + 0x38A62B0,0x3884B01,0x3863655,0x38424A6,0x38215EA,0x3800A1A,0x37E012C,0x37BFB18,0x379F7D8,0x377F762,0x375F9B0, + 0x373FEBA,0x3720677,0x37010E1,0x36E1DF1,0x36C2DA0,0x36A3FE6,0x36854BC,0x3666C1D,0x3648600,0x362A260,0x360C136, + 0x35EE27C,0x35D062B,0x35B2C3E,0x35954AD,0x3577F74,0x355AC8C,0x353DBEF,0x3520D98,0x3504180,0x34E77A4,0x34CAFFC, + 0x34AEA83,0x3492735,0x347660B,0x345A701,0x343EA12,0x3422F38,0x340766F,0x33EBFB1,0x33D0AFA,0x33B5846,0x339A78E, + 0x337F8CF,0x3364C05,0x334A129,0x332F839,0x3315130,0x32FAC09,0x32E08C0,0x32C6751,0x32AC7B8,0x32929F1,0x3278DF7, + 0x325F3C6,0x3245B5C,0x322C4B2,0x3212FC7,0x31F9C96,0x31E0B1B,0x31C7B53,0x31AED39,0x31960CB,0x317D605,0x3164CE2, + 0x314C561,0x3133F7D,0x311BB33,0x310387F,0x30EB75F,0x30D37CE,0x30BB9CA,0x30A3D50,0x308C25C,0x30748EC,0x305D0FC, + 0x3045A88,0x302E58F,0x301720E,0x3000000,0x2FE8F64,0x2FD2036,0x2FBB274,0x2FA461A,0x2F8DB27,0x2F77197,0x2F60968, + 0x2F4A296,0x2F33D20,0x2F1D903,0x2F0763B,0x2EF14C7,0x2EDB4A5,0x2EC55D0,0x2EAF848,0x2E99C09,0x2E84111,0x2E6E75F, + 0x2E58EEE,0x2E437BE,0x2E2E1CB,0x2E18D14,0x2E03996,0x2DEE74F,0x2DD963D,0x2DC465D,0x2DAF7AD,0x2D9AA2C,0x2D85DD7, + 0x2D712AC,0x2D5C8A9,0x2D47FCC,0x2D33812,0x2D1F17A,0x2D0AC02,0x2CF67A8,0x2CE246A,0x2CCE246,0x2CBA13A,0x2CA6144, + 0x2C92262,0x2C7E492,0x2C6A7D4,0x2C56C23,0x2C43180,0x2C2F7E8,0x2C1BF5A,0x2C087D3,0x2BF5151,0x2BE1BD5,0x2BCE75A, + 0x2BBB3E1,0x2BA8166,0x2B94FE9,0x2B81F68,0x2B6EFE1,0x2B5C153,0x2B493BC,0x2B3671A,0x2B23B6D,0x2B110B1,0x2AFE6E7, + 0x2AEBE0C,0x2AD961F,0x2AC6F1E,0x2AB4908,0x2AA23DC,0x2A8FF97,0x2A7DC39,0x2A6B9C0,0x2A5982B,0x2A47779,0x2A357A7, + 0x2A238B5,0x2A11AA1,0x29FFD6A,0x29EE10F,0x29DC58E,0x29CAAE6,0x29B9116,0x29A781C,0x2995FF7,0x29848A6,0x2973228, + 0x2961C7B,0x295079E,0x293F390,0x292E050,0x291CDDD,0x290BC34,0x28FAB56,0x28E9B41,0x28D8BF4,0x28C7D6E,0x28B6FAD, + 0x28A62B0,0x2895677,0x2884B01,0x287404B,0x2863655,0x2852D1F,0x28424A6,0x2831CEA,0x28215EA,0x2810FA5,0x2800A1A, + 0x27F0547,0x27E012C,0x27CFDC7,0x27BFB18,0x27AF91E,0x279F7D8,0x278F744,0x277F762,0x276F831,0x275F9B0,0x274FBDE, + 0x273FEBA,0x2730242,0x2720677,0x2710B57,0x27010E1,0x26F1715,0x26E1DF1,0x26D2575,0x26C2DA0,0x26B3670,0x26A3FE6, + 0x26949FF,0x26854BC,0x267601C,0x2666C1D,0x26578BE,0x2648600,0x26393E1,0x262A260,0x261B17D,0x260C136,0x25FD18C, + 0x25EE27C,0x25DF407,0x25D062B,0x25C18E8,0x25B2C3E,0x25A402A,0x25954AD,0x25869C6,0x2577F74,0x25695B6,0x255AC8C, + 0x254C3F4,0x253DBEF,0x252F47B,0x2520D98,0x2512744,0x2504180,0x24F5C4B,0x24E77A4,0x24D9389,0x24CAFFC,0x24BCCFA, + 0x24AEA83,0x24A0897,0x2492735,0x248465C,0x247660B,0x2468643,0x245A701,0x244C847,0x243EA12,0x2430C63,0x2422F38, + 0x2415292,0x240766F,0x23F9ACF,0x23EBFB1,0x23DE515,0x23D0AFA,0x23C3160,0x23B5846,0x23A7FAB,0x239A78E,0x238CFF0, + 0x237F8CF,0x237222C,0x2364C05,0x2357659,0x234A129,0x233CC74,0x232F839,0x2322478,0x2315130,0x2307E61,0x22FAC09, + 0x22EDA29,0x22E08C0,0x22D37CE,0x22C6751,0x22B974A,0x22AC7B8,0x229F89B,0x22929F1,0x2285BBA,0x2278DF7,0x226C0A6, + 0x225F3C6,0x2252758,0x2245B5C,0x2238FCF,0x222C4B2,0x221FA05,0x2212FC7,0x22065F7,0x21F9C96,0x21ED3A2,0x21E0B1B, + 0x21D4301,0x21C7B53,0x21BB410,0x21AED39,0x21A26CD,0x21960CB,0x2189B33,0x217D605,0x217113F,0x2164CE2,0x21588EE, + 0x214C561,0x214023B,0x2133F7D,0x2127D25,0x211BB33,0x210F9A6,0x210387F,0x20F77BD,0x20EB75F,0x20DF765,0x20D37CE, + 0x20C789B,0x20BB9CA,0x20AFB5C,0x20A3D50,0x2097FA6,0x208C25C,0x2080574,0x20748EC,0x2068CC4,0x205D0FC,0x2051593, + 0x2045A88,0x2039FDD,0x202E58F,0x2022BA0,0x201720E,0x200B8D8,0x2000000,0x1FF4784,0x1FE8F64,0x1FDD79F,0x1FD2036, + 0x1FC6928,0x1FBB274,0x1FAFC1A,0x1FA461A,0x1F99074,0x1F8DB27,0x1F82633,0x1F77197,0x1F6BD53,0x1F60968,0x1F555D3, + 0x1F4A296,0x1F3EFB0,0x1F33D20,0x1F28AE6,0x1F1D903,0x1F12774,0x1F0763B,0x1EFC557,0x1EF14C7,0x1EE648C,0x1EDB4A5, + 0x1ED0511,0x1EC55D0,0x1EBA6E3,0x1EAF848,0x1EA49FF,0x1E99C09,0x1E8EE64,0x1E84111,0x1E79410,0x1E6E75F,0x1E63AFE, + 0x1E58EEE,0x1E4E32E,0x1E437BE,0x1E38C9D,0x1E2E1CB,0x1E23748,0x1E18D14,0x1E0E32E,0x1E03996,0x1DF904C,0x1DEE74F, + 0x1DE3E9F,0x1DD963D,0x1DCEE27,0x1DC465D,0x1DB9EDF,0x1DAF7AD,0x1DA50C7,0x1D9AA2C,0x1D903DC,0x1D85DD7,0x1D7B81C, + 0x1D712AC,0x1D66D86,0x1D5C8A9,0x1D52416,0x1D47FCC,0x1D3DBCA,0x1D33812,0x1D294A2,0x1D1F17A,0x1D14E9B,0x1D0AC02, + 0x1D009B2,0x1CF67A8,0x1CEC5E6,0x1CE246A,0x1CD8335,0x1CCE246,0x1CC419D,0x1CBA13A,0x1CB011C,0x1CA6144,0x1C9C1B0, + 0x1C92262,0x1C88358,0x1C7E492,0x1C74611,0x1C6A7D4,0x1C609DA,0x1C56C23,0x1C4CEB0,0x1C43180,0x1C39493,0x1C2F7E8, + 0x1C25B80,0x1C1BF5A,0x1C12375,0x1C087D3,0x1BFEC71,0x1BF5151,0x1BEB673,0x1BE1BD5,0x1BD8177,0x1BCE75A,0x1BC4D7D, + 0x1BBB3E1,0x1BB1A84,0x1BA8166,0x1B9E888,0x1B94FE9,0x1B8B789,0x1B81F68,0x1B78786,0x1B6EFE1,0x1B6587B,0x1B5C153, + 0x1B52A69,0x1B493BC,0x1B3FD4D,0x1B3671A,0x1B2D125,0x1B23B6D,0x1B1A5F1,0x1B110B1,0x1B07BAE,0x1AFE6E7,0x1AF525C, + 0x1AEBE0C,0x1AE29F8,0x1AD961F,0x1AD0281,0x1AC6F1E,0x1ABDBF6,0x1AB4908,0x1AAB655,0x1AA23DC,0x1A9919C,0x1A8FF97, + 0x1A86DCB,0x1A7DC39,0x1A74AE0,0x1A6B9C0,0x1A628DA,0x1A5982B,0x1A507B6,0x1A47779,0x1A3E774,0x1A357A7,0x1A2C812, + 0x1A238B5,0x1A1A98F,0x1A11AA1,0x1A08BEA,0x19FFD6A,0x19F6F21,0x19EE10F,0x19E5333,0x19DC58E,0x19D381F,0x19CAAE6, + 0x19C1DE3,0x19B9116,0x19B047E,0x19A781C,0x199EBEF,0x1995FF7,0x198D434,0x19848A6,0x197BD4D,0x1973228,0x196A737, + 0x1961C7B,0x19591F3,0x195079E,0x1947D7D,0x193F390,0x19369D7,0x192E050,0x19256FD,0x191CDDD,0x19144EF,0x190BC34, + 0x19033AC,0x18FAB56,0x18F2333,0x18E9B41,0x18E1382,0x18D8BF4,0x18D0498,0x18C7D6E,0x18BF675,0x18B6FAD,0x18AE916, + 0x18A62B0,0x189DC7C,0x1895677,0x188D0A4,0x1884B01,0x187C58E,0x187404B,0x186BB38,0x1863655,0x185B1A2,0x1852D1F, + 0x184A8CB,0x18424A6,0x183A0B1,0x1831CEA,0x1829953,0x18215EA,0x18192B0,0x1810FA5,0x1808CC8,0x1800A1A,0x17F8799, + 0x17F0547,0x17E8322,0x17E012C,0x17D7F63,0x17CFDC7,0x17C7C59,0x17BFB18,0x17B7A05,0x17AF91E,0x17A7865,0x179F7D8, + 0x1797778,0x178F744,0x178773D,0x177F762,0x17777B4,0x176F831,0x17678DB,0x175F9B0,0x1757AB1,0x174FBDE,0x1747D36, + 0x173FEBA,0x1738068,0x1730242,0x1728447,0x1720677,0x17188D2,0x1710B57,0x1708E07,0x17010E1,0x16F93E6,0x16F1715, + 0x16E9A6E,0x16E1DF1,0x16DA19E,0x16D2575,0x16CA976,0x16C2DA0,0x16BB1F3,0x16B3670,0x16ABB16,0x16A3FE6,0x169C4DE, + 0x16949FF,0x168CF49,0x16854BC,0x167DA58,0x167601C,0x166E608,0x1666C1D,0x165F25A,0x16578BE,0x164FF4B,0x1648600, + 0x1640CDD,0x16393E1,0x1631B0D,0x162A260,0x16229DB,0x161B17D,0x1613946,0x160C136,0x160494D,0x15FD18C,0x15F59F0, + 0x15EE27C,0x15E6B2E,0x15DF407,0x15D7D06,0x15D062B,0x15C8F77,0x15C18E8,0x15BA280,0x15B2C3E,0x15AB621,0x15A402A, + 0x159CA59,0x15954AD,0x158DF27,0x15869C6,0x157F48A,0x1577F74,0x1570A82,0x15695B6,0x156210E,0x155AC8C,0x155382E, + 0x154C3F4,0x1544FDF,0x153DBEF,0x1536823,0x152F47B,0x15280F7,0x1520D98,0x1519A5C,0x1512744,0x150B450,0x1504180, + 0x14FCED4,0x14F5C4B,0x14EE9E6,0x14E77A4,0x14E0585,0x14D9389,0x14D21B1,0x14CAFFC,0x14C3E69,0x14BCCFA,0x14B5BAD, + 0x14AEA83,0x14A797C,0x14A0897,0x14997D5,0x1492735,0x148B6B7,0x148465C,0x147D622,0x147660B,0x146F616,0x1468643, + 0x1461691,0x145A701,0x1453793,0x144C847,0x144591C,0x143EA12,0x1437B2A,0x1430C63,0x1429DBD,0x1422F38,0x141C0D5, + 0x1415292,0x140E470,0x140766F,0x140088F,0x13F9ACF,0x13F2D30,0x13EBFB1,0x13E5253,0x13DE515,0x13D77F8,0x13D0AFA, + 0x13C9E1D,0x13C3160,0x13BC4C3,0x13B5846,0x13AEBE8,0x13A7FAB,0x13A138D,0x139A78E,0x1393BAF,0x138CFF0,0x1386450, + 0x137F8CF,0x1378D6E,0x137222C,0x136B709,0x1364C05,0x135E11F,0x1357659,0x1350BB2,0x134A129,0x13436C0,0x133CC74, + 0x1336248,0x132F839,0x1328E4A,0x1322478,0x131BAC5,0x1315130,0x130E7B9,0x1307E61,0x1301526,0x12FAC09,0x12F430A, + 0x12EDA29,0x12E7166,0x12E08C0,0x12DA038,0x12D37CE,0x12CCF81,0x12C6751,0x12BFF3F,0x12B974A,0x12B2F73,0x12AC7B8, + 0x12A601B,0x129F89B,0x1299137,0x12929F1,0x128C2C7,0x1285BBA,0x127F4CA,0x1278DF7,0x1272740,0x126C0A6,0x1265A28, + 0x125F3C6,0x1258D81,0x1252758,0x124C14C,0x1245B5C,0x123F587,0x1238FCF,0x1232A33,0x122C4B2,0x1225F4E,0x121FA05, + 0x12194D8,0x1212FC7,0x120CAD1,0x12065F7,0x1200139,0x11F9C96,0x11F380E,0x11ED3A2,0x11E6F51,0x11E0B1B,0x11DA700, + 0x11D4301,0x11CDF1C,0x11C7B53,0x11C17A4,0x11BB410,0x11B5097,0x11AED39,0x11A89F6,0x11A26CD,0x119C3BF,0x11960CB, + 0x118FDF2,0x1189B33,0x118388F,0x117D605,0x1177395,0x117113F,0x116AF04,0x1164CE2,0x115EADB,0x11588EE,0x115271A, + 0x114C561,0x11463C1,0x114023B,0x113A0CF,0x1133F7D,0x112DE44,0x1127D25,0x1121C1F,0x111BB33,0x1115A60,0x110F9A6, + 0x1109906,0x110387F,0x10FD811,0x10F77BD,0x10F1781,0x10EB75F,0x10E5755,0x10DF765,0x10D978D,0x10D37CE,0x10CD828, + 0x10C789B,0x10C1926,0x10BB9CA,0x10B5A87,0x10AFB5C,0x10A9C4A,0x10A3D50,0x109DE6F,0x1097FA6,0x10920F5,0x108C25C, + 0x10863DC,0x1080574,0x107A724,0x10748EC,0x106EACC,0x1068CC4,0x1062ED4,0x105D0FC,0x105733B,0x1051593,0x104B802, + 0x1045A88,0x103FD27,0x1039FDD,0x10342AA,0x102E58F,0x102888C,0x1022BA0,0x101CECB,0x101720E,0x1011567,0x100B8D8, + 0x1005C61,0x1000000,0xFFA3B6,0xFF4784,0xFEEB68,0xFE8F64,0xFE3376,0xFDD79F,0xFD7BDF,0xFD2036,0xFCC4A3, + 0xFC6928,0xFC0DC2,0xFBB274,0xFB573C,0xFAFC1A,0xFAA10F,0xFA461A,0xF9EB3C,0xF99074,0xF935C2,0xF8DB27, + 0xF880A2,0xF82633,0xF7CBDA,0xF77197,0xF7176A,0xF6BD53,0xF66353,0xF60968,0xF5AF93,0xF555D3,0xF4FC2A, + 0xF4A296,0xF44918,0xF3EFB0,0xF3965D,0xF33D20,0xF2E3F9,0xF28AE6,0xF231EA,0xF1D903,0xF18031,0xF12774, + 0xF0CECD,0xF0763B,0xF01DBF,0xEFC557,0xEF6D05,0xEF14C7,0xEEBC9F,0xEE648C,0xEE0C8E,0xEDB4A5,0xED5CD0, + 0xED0511,0xECAD66,0xEC55D0,0xEBFE4F,0xEBA6E3,0xEB4F8B,0xEAF848,0xEAA119,0xEA49FF,0xE9F2FA,0xE99C09, + 0xE9452D,0xE8EE64,0xE897B1,0xE84111,0xE7EA86,0xE79410,0xE73DAD,0xE6E75F,0xE69124,0xE63AFE,0xE5E4EC, + 0xE58EEE,0xE53904,0xE4E32E,0xE48D6C,0xE437BE,0xE3E223,0xE38C9D,0xE3372A,0xE2E1CB,0xE28C80,0xE23748, + 0xE1E224,0xE18D14,0xE13817,0xE0E32E,0xE08E58,0xE03996,0xDFE4E7,0xDF904C,0xDF3BC4,0xDEE74F,0xDE92ED, + 0xDE3E9F,0xDDEA64,0xDD963D,0xDD4228,0xDCEE27,0xDC9A38,0xDC465D,0xDBF295,0xDB9EDF,0xDB4B3D,0xDAF7AD, + 0xDAA431,0xDA50C7,0xD9FD70,0xD9AA2C,0xD956FB,0xD903DC,0xD8B0D0,0xD85DD7,0xD80AF1,0xD7B81C,0xD7655B, + 0xD712AC,0xD6C010,0xD66D86,0xD61B0E,0xD5C8A9,0xD57656,0xD52416,0xD4D1E7,0xD47FCC,0xD42DC2,0xD3DBCA, + 0xD389E5,0xD33812,0xD2E651,0xD294A2,0xD24305,0xD1F17A,0xD1A001,0xD14E9B,0xD0FD46,0xD0AC02,0xD05AD1, + 0xD009B2,0xCFB8A4,0xCF67A8,0xCF16BE,0xCEC5E6,0xCE751F,0xCE246A,0xCDD3C7,0xCD8335,0xCD32B5,0xCCE246, + 0xCC91E9,0xCC419D,0xCBF163,0xCBA13A,0xCB5122,0xCB011C,0xCAB127,0xCA6144,0xCA1171,0xC9C1B0,0xC97200, + 0xC92262,0xC8D2D4,0xC88358,0xC833ED,0xC7E492,0xC79549,0xC74611,0xC6F6EA,0xC6A7D4,0xC658CE,0xC609DA, + 0xC5BAF6,0xC56C23,0xC51D61,0xC4CEB0,0xC48010,0xC43180,0xC3E301,0xC39493,0xC34635,0xC2F7E8,0xC2A9AC, + 0xC25B80,0xC20D64,0xC1BF5A,0xC1715F,0xC12375,0xC0D59C,0xC087D3,0xC03A1A,0xBFEC71,0xBF9ED9,0xBF5151, + 0xBF03DA,0xBEB673,0xBE691B,0xBE1BD5,0xBDCE9E,0xBD8177,0xBD3461,0xBCE75A,0xBC9A64,0xBC4D7D,0xBC00A7, + 0xBBB3E1,0xBB672A,0xBB1A84,0xBACDED,0xBA8166,0xBA34EF,0xB9E888,0xB99C31,0xB94FE9,0xB903B1,0xB8B789, + 0xB86B71,0xB81F68,0xB7D36F,0xB78786,0xB73BAC,0xB6EFE1,0xB6A427,0xB6587B,0xB60CDF,0xB5C153,0xB575D6, + 0xB52A69,0xB4DF0B,0xB493BC,0xB4487D,0xB3FD4D,0xB3B22C,0xB3671A,0xB31C18,0xB2D125,0xB28641,0xB23B6D, + 0xB1F0A7,0xB1A5F1,0xB15B4A,0xB110B1,0xB0C628,0xB07BAE,0xB03143,0xAFE6E7,0xAF9C9A,0xAF525C,0xAF082C, + 0xAEBE0C,0xAE73FA,0xAE29F8,0xADE004,0xAD961F,0xAD4C49,0xAD0281,0xACB8C8,0xAC6F1E,0xAC2583,0xABDBF6, + 0xAB9278,0xAB4908,0xAAFFA7,0xAAB655,0xAA6D11,0xAA23DC,0xA9DAB5,0xA9919C,0xA94893,0xA8FF97,0xA8B6AA, + 0xA86DCB,0xA824FB,0xA7DC39,0xA79386,0xA74AE0,0xA70249,0xA6B9C0,0xA67146,0xA628DA,0xA5E07B,0xA5982B, + 0xA54FEA,0xA507B6,0xA4BF90,0xA47779,0xA42F6F,0xA3E774,0xA39F87,0xA357A7,0xA30FD6,0xA2C812,0xA2805D, + 0xA238B5,0xA1F11B,0xA1A98F,0xA16211,0xA11AA1,0xA0D33F,0xA08BEA,0xA044A4,0x9FFD6A,0x9FB63F,0x9F6F21, + 0x9F2811,0x9EE10F,0x9E9A1B,0x9E5333,0x9E0C5A,0x9DC58E,0x9D7ED0,0x9D381F,0x9CF17C,0x9CAAE6,0x9C645E, + 0x9C1DE3,0x9BD776,0x9B9116,0x9B4AC3,0x9B047E,0x9ABE46,0x9A781C,0x9A31FF,0x99EBEF,0x99A5EC,0x995FF7, + 0x991A0F,0x98D434,0x988E67,0x9848A6,0x9802F3,0x97BD4D,0x9777B4,0x973228,0x96ECA9,0x96A737,0x9661D3, + 0x961C7B,0x95D730,0x9591F3,0x954CC2,0x95079E,0x94C287,0x947D7D,0x943880,0x93F390,0x93AEAD,0x9369D7, + 0x93250D,0x92E050,0x929BA0,0x9256FD,0x921266,0x91CDDD,0x91895F,0x9144EF,0x91008B,0x90BC34,0x9077EA, + 0x9033AC,0x8FEF7B,0x8FAB56,0x8F673E,0x8F2333,0x8EDF34,0x8E9B41,0x8E575B,0x8E1382,0x8DCFB5,0x8D8BF4, + 0x8D4840,0x8D0498,0x8CC0FD,0x8C7D6E,0x8C39EB,0x8BF675,0x8BB30B,0x8B6FAD,0x8B2C5B,0x8AE916,0x8AA5DD, + 0x8A62B0,0x8A1F90,0x89DC7C,0x899973,0x895677,0x891388,0x88D0A4,0x888DCC,0x884B01,0x880841,0x87C58E, + 0x8782E6,0x87404B,0x86FDBC,0x86BB38,0x8678C1,0x863655,0x85F3F6,0x85B1A2,0x856F5B,0x852D1F,0x84EAEF, + 0x84A8CB,0x8466B3,0x8424A6,0x83E2A6,0x83A0B1,0x835EC8,0x831CEA,0x82DB19,0x829953,0x825799,0x8215EA, + 0x81D448,0x8192B0,0x815125,0x810FA5,0x80CE31,0x808CC8,0x804B6B,0x800A1A,0x7FC8D4,0x7F8799,0x7F466A, + 0x7F0547,0x7EC42F,0x7E8322,0x7E4221,0x7E012C,0x7DC041,0x7D7F63,0x7D3E8F,0x7CFDC7,0x7CBD0B,0x7C7C59, + 0x7C3BB3,0x7BFB18,0x7BBA89,0x7B7A05,0x7B398C,0x7AF91E,0x7AB8BC,0x7A7865,0x7A3819,0x79F7D8,0x79B7A2, + 0x797778,0x793759,0x78F744,0x78B73B,0x78773D,0x78374A,0x77F762,0x77B786,0x7777B4,0x7737ED,0x76F831, + 0x76B881,0x7678DB,0x763940,0x75F9B0,0x75BA2B,0x757AB1,0x753B42,0x74FBDE,0x74BC84,0x747D36,0x743DF2, + 0x73FEBA,0x73BF8C,0x738068,0x734150,0x730242,0x72C33F,0x728447,0x72455A,0x720677,0x71C79F,0x7188D2, + 0x714A0F,0x710B57,0x70CCAA,0x708E07,0x704F6F,0x7010E1,0x6FD25E,0x6F93E6,0x6F5578,0x6F1715,0x6ED8BC, + 0x6E9A6E,0x6E5C2B,0x6E1DF1,0x6DDFC3,0x6DA19E,0x6D6385,0x6D2575,0x6CE770,0x6CA976,0x6C6B86,0x6C2DA0, + 0x6BEFC4,0x6BB1F3,0x6B742D,0x6B3670,0x6AF8BE,0x6ABB16,0x6A7D79,0x6A3FE6,0x6A025D,0x69C4DE,0x69876A, + 0x6949FF,0x690C9F,0x68CF49,0x6891FE,0x6854BC,0x681785,0x67DA58,0x679D35,0x67601C,0x67230D,0x66E608, + 0x66A90D,0x666C1D,0x662F36,0x65F25A,0x65B587,0x6578BE,0x653C00,0x64FF4B,0x64C2A1,0x648600,0x644969, + 0x640CDD,0x63D05A,0x6393E1,0x635772,0x631B0D,0x62DEB2,0x62A260,0x626619,0x6229DB,0x61EDA7,0x61B17D, + 0x61755D,0x613946,0x60FD39,0x60C136,0x60853D,0x60494D,0x600D68,0x5FD18C,0x5F95B9,0x5F59F0,0x5F1E31, + 0x5EE27C,0x5EA6D0,0x5E6B2E,0x5E2F96,0x5DF407,0x5DB882,0x5D7D06,0x5D4194,0x5D062B,0x5CCACC,0x5C8F77, + 0x5C542B,0x5C18E8,0x5BDDAF,0x5BA280,0x5B675A,0x5B2C3E,0x5AF12B,0x5AB621,0x5A7B21,0x5A402A,0x5A053D, + 0x59CA59,0x598F7E,0x5954AD,0x5919E5,0x58DF27,0x58A472,0x5869C6,0x582F23,0x57F48A,0x57B9FA,0x577F74, + 0x5744F6,0x570A82,0x56D018,0x5695B6,0x565B5E,0x56210E,0x55E6C8,0x55AC8C,0x557258,0x55382E,0x54FE0C, + 0x54C3F4,0x5489E5,0x544FDF,0x5415E2,0x53DBEF,0x53A204,0x536823,0x532E4A,0x52F47B,0x52BAB5,0x5280F7, + 0x524743,0x520D98,0x51D3F5,0x519A5C,0x5160CC,0x512744,0x50EDC6,0x50B450,0x507AE4,0x504180,0x500826, + 0x4FCED4,0x4F958B,0x4F5C4B,0x4F2314,0x4EE9E6,0x4EB0C0,0x4E77A4,0x4E3E90,0x4E0585,0x4DCC83,0x4D9389, + 0x4D5A99,0x4D21B1,0x4CE8D2,0x4CAFFC,0x4C772E,0x4C3E69,0x4C05AD,0x4BCCFA,0x4B944F,0x4B5BAD,0x4B2314, + 0x4AEA83,0x4AB1FB,0x4A797C,0x4A4105,0x4A0897,0x49D031,0x4997D5,0x495F80,0x492735,0x48EEF2,0x48B6B7, + 0x487E85,0x48465C,0x480E3B,0x47D622,0x479E13,0x47660B,0x472E0C,0x46F616,0x46BE28,0x468643,0x464E66, + 0x461691,0x45DEC5,0x45A701,0x456F46,0x453793,0x44FFE9,0x44C847,0x4490AD,0x44591C,0x442193,0x43EA12, + 0x43B29A,0x437B2A,0x4343C2,0x430C63,0x42D50C,0x429DBD,0x426676,0x422F38,0x41F802,0x41C0D5,0x4189AF, + 0x415292,0x411B7D,0x40E470,0x40AD6B,0x40766F,0x403F7B,0x40088F,0x3FD1AB,0x3F9ACF,0x3F63FB,0x3F2D30, + 0x3EF66D,0x3EBFB1,0x3E88FE,0x3E5253,0x3E1BB0,0x3DE515,0x3DAE83,0x3D77F8,0x3D4175,0x3D0AFA,0x3CD488, + 0x3C9E1D,0x3C67BB,0x3C3160,0x3BFB0E,0x3BC4C3,0x3B8E80,0x3B5846,0x3B2213,0x3AEBE8,0x3AB5C5,0x3A7FAB, + 0x3A4998,0x3A138D,0x39DD89,0x39A78E,0x39719B,0x393BAF,0x3905CC,0x38CFF0,0x389A1C,0x386450,0x382E8C, + 0x37F8CF,0x37C31B,0x378D6E,0x3757C9,0x37222C,0x36EC96,0x36B709,0x368183,0x364C05,0x36168E,0x35E11F, + 0x35ABB9,0x357659,0x354102,0x350BB2,0x34D66A,0x34A129,0x346BF1,0x3436C0,0x340196,0x33CC74,0x33975A, + 0x336248,0x332D3D,0x32F839,0x32C33E,0x328E4A,0x32595D,0x322478,0x31EF9B,0x31BAC5,0x3185F7,0x315130, + 0x311C71,0x30E7B9,0x30B309,0x307E61,0x3049C0,0x301526,0x2FE094,0x2FAC09,0x2F7786,0x2F430A,0x2F0E96, + 0x2EDA29,0x2EA5C4,0x2E7166,0x2E3D0F,0x2E08C0,0x2DD479,0x2DA038,0x2D6BFF,0x2D37CE,0x2D03A4,0x2CCF81, + 0x2C9B66,0x2C6751,0x2C3345,0x2BFF3F,0x2BCB41,0x2B974A,0x2B635B,0x2B2F73,0x2AFB92,0x2AC7B8,0x2A93E6, + 0x2A601B,0x2A2C57,0x29F89B,0x29C4E5,0x299137,0x295D90,0x2929F1,0x28F658,0x28C2C7,0x288F3D,0x285BBA, + 0x28283F,0x27F4CA,0x27C15D,0x278DF7,0x275A98,0x272740,0x26F3EF,0x26C0A6,0x268D63,0x265A28,0x2626F4, + 0x25F3C6,0x25C0A0,0x258D81,0x255A69,0x252758,0x24F44F,0x24C14C,0x248E50,0x245B5C,0x24286E,0x23F587, + 0x23C2A8,0x238FCF,0x235CFD,0x232A33,0x22F76F,0x22C4B2,0x2291FD,0x225F4E,0x222CA6,0x21FA05,0x21C76B, + 0x2194D8,0x21624C,0x212FC7,0x20FD49,0x20CAD1,0x209861,0x2065F7,0x203395,0x200139,0x1FCEE4,0x1F9C96, + 0x1F6A4F,0x1F380E,0x1F05D5,0x1ED3A2,0x1EA176,0x1E6F51,0x1E3D32,0x1E0B1B,0x1DD90A,0x1DA700,0x1D74FD, + 0x1D4301,0x1D110B,0x1CDF1C,0x1CAD34,0x1C7B53,0x1C4978,0x1C17A4,0x1BE5D7,0x1BB410,0x1B8250,0x1B5097, + 0x1B1EE5,0x1AED39,0x1ABB94,0x1A89F6,0x1A585E,0x1A26CD,0x19F542,0x19C3BF,0x199241,0x1960CB,0x192F5B, + 0x18FDF2,0x18CC8F,0x189B33,0x1869DE,0x18388F,0x180746,0x17D605,0x17A4C9,0x177395,0x174267,0x17113F, + 0x16E01E,0x16AF04,0x167DF0,0x164CE2,0x161BDC,0x15EADB,0x15B9E1,0x1588EE,0x155801,0x15271A,0x14F63A, + 0x14C561,0x14948E,0x1463C1,0x1432FB,0x14023B,0x13D182,0x13A0CF,0x137023,0x133F7D,0x130EDD,0x12DE44, + 0x12ADB1,0x127D25,0x124C9F,0x121C1F,0x11EBA6,0x11BB33,0x118AC6,0x115A60,0x112A00,0x10F9A6,0x10C953, + 0x109906,0x1068BF,0x10387F,0x100845,0xFD811,0xFA7E4,0xF77BD,0xF479C,0xF1781,0xEE76D,0xEB75F, + 0xE8757,0xE5755,0xE275A,0xDF765,0xDC776,0xD978D,0xD67AA,0xD37CE,0xD07F8,0xCD828,0xCA85E, + 0xC789B,0xC48DD,0xC1926,0xBE975,0xBB9CA,0xB8A26,0xB5A87,0xB2AEF,0xAFB5C,0xACBD0,0xA9C4A, + 0xA6CCA,0xA3D50,0xA0DDC,0x9DE6F,0x9AF07,0x97FA6,0x9504A,0x920F5,0x8F1A6,0x8C25C,0x89319, + 0x863DC,0x834A5,0x80574,0x7D649,0x7A724,0x77805,0x748EC,0x719D9,0x6EACC,0x6BBC5,0x68CC4, + 0x65DC9,0x62ED4,0x5FFE5,0x5D0FC,0x5A218,0x5733B,0x54464,0x51593,0x4E6C7,0x4B802,0x48942, + 0x45A88,0x42BD5,0x3FD27,0x3CE7F,0x39FDD,0x37141,0x342AA,0x3141A,0x2E58F,0x2B70B,0x2888C, + 0x25A13,0x22BA0,0x1FD33,0x1CECB,0x1A069,0x1720E,0x143B8,0x11567,0xE71D,0xB8D8,0x8A9A, + 0x5C61,0x2E2D + }; + +#define LZHAM_CREATE_PROB_COST_TABLE 0 + +#if LZHAM_CREATE_PROB_COST_TABLE + class arith_prob_cost_initializer + { + public: + arith_prob_cost_initializer() + { + const double cInvLn2 = 1.4426950408889634073599246810019; // 1.0/ln(2) + + for (uint i = 0; i < cSymbolCodecArithProbScale; i++) + { + double flBits = i ? (-log(i * (1.0 / cSymbolCodecArithProbScale)) * cInvLn2) : 0; + g_prob_cost[i] = static_cast(floor(.5f + flBits * cBitCostScale)); + printf("0x%X,", g_prob_cost[i]); + if ((i % 11) == 10) printf("\n"); + } + printf("\n"); + } + }; + static arith_prob_cost_initializer g_prob_cost_initializer; +#endif + + raw_quasi_adaptive_huffman_data_model::raw_quasi_adaptive_huffman_data_model(bool encoding, uint total_syms, bool fast_updating, bool use_polar_codes) : + m_pDecode_tables(NULL), + m_total_syms(0), + m_max_cycle(0), + m_update_cycle(0), + m_symbols_until_update(0), + m_total_count(0), + m_decoder_table_bits(0), + m_encoding(encoding), + m_fast_updating(false), + m_use_polar_codes(false) + { + if (total_syms) + { + init(encoding, total_syms, fast_updating, use_polar_codes); + } + } + + raw_quasi_adaptive_huffman_data_model::raw_quasi_adaptive_huffman_data_model(const raw_quasi_adaptive_huffman_data_model& other) : + m_pDecode_tables(NULL), + m_total_syms(0), + m_max_cycle(0), + m_update_cycle(0), + m_symbols_until_update(0), + m_total_count(0), + m_decoder_table_bits(0), + m_encoding(false), + m_fast_updating(false), + m_use_polar_codes(false) + { + *this = other; + } + + raw_quasi_adaptive_huffman_data_model::~raw_quasi_adaptive_huffman_data_model() + { + if (m_pDecode_tables) + lzham_delete(m_pDecode_tables); + } + + bool raw_quasi_adaptive_huffman_data_model::assign(const raw_quasi_adaptive_huffman_data_model& rhs) + { + if (this == &rhs) + return true; + + m_total_syms = rhs.m_total_syms; + + m_max_cycle = rhs.m_max_cycle; + m_update_cycle = rhs.m_update_cycle; + m_symbols_until_update = rhs.m_symbols_until_update; + + m_total_count = rhs.m_total_count; + + m_sym_freq = rhs.m_sym_freq; + m_initial_sym_freq = rhs.m_initial_sym_freq; + + m_codes = rhs.m_codes; + m_code_sizes = rhs.m_code_sizes; + + if (rhs.m_pDecode_tables) + { + if (m_pDecode_tables) + { + if (!m_pDecode_tables->assign(*rhs.m_pDecode_tables)) + { + clear(); + return false; + } + } + else + { + m_pDecode_tables = lzham_new(*rhs.m_pDecode_tables); + if (!m_pDecode_tables) + { + clear(); + return false; + } + } + } + else if (m_pDecode_tables) + { + lzham_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } + + m_decoder_table_bits = rhs.m_decoder_table_bits; + m_encoding = rhs.m_encoding; + m_fast_updating = rhs.m_fast_updating; + m_use_polar_codes = rhs.m_use_polar_codes; + + return true; + } + + raw_quasi_adaptive_huffman_data_model& raw_quasi_adaptive_huffman_data_model::operator= (const raw_quasi_adaptive_huffman_data_model& rhs) + { + assign(rhs); + return *this; + } + + void raw_quasi_adaptive_huffman_data_model::clear() + { + m_sym_freq.clear(); + m_initial_sym_freq.clear(); + m_codes.clear(); + m_code_sizes.clear(); + + m_max_cycle = 0; + m_total_syms = 0; + m_update_cycle = 0; + m_symbols_until_update = 0; + m_decoder_table_bits = 0; + m_total_count = 0; + + if (m_pDecode_tables) + { + lzham_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } + + m_fast_updating = false; + m_use_polar_codes = false; + } + + bool raw_quasi_adaptive_huffman_data_model::init(bool encoding, uint total_syms, bool fast_updating, bool use_polar_codes, const uint16 *pInitial_sym_freq) + { + m_encoding = encoding; + m_fast_updating = fast_updating; + m_use_polar_codes = use_polar_codes; + m_symbols_until_update = 0; + + if (!m_sym_freq.try_resize(total_syms)) + { + clear(); + return false; + } + + if (pInitial_sym_freq) + { + if (!m_initial_sym_freq.try_resize(total_syms)) + { + clear(); + return false; + } + memcpy(m_initial_sym_freq.begin(), pInitial_sym_freq, total_syms * m_initial_sym_freq.size_in_bytes()); + } + + if (!m_code_sizes.try_resize(total_syms)) + { + clear(); + return false; + } + + m_total_syms = total_syms; + + if (m_total_syms <= 16) + m_decoder_table_bits = 0; + else + m_decoder_table_bits = static_cast(math::minimum(1 + math::ceil_log2i(m_total_syms), prefix_coding::cMaxTableBits)); + + if (m_encoding) + { + lzham_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + + if (!m_codes.try_resize(total_syms)) + { + clear(); + return false; + } + } + else if (!m_pDecode_tables) + { + m_pDecode_tables = lzham_new(); + if (!m_pDecode_tables) + { + clear(); + return false; + } + } + + // TODO: Make this setting a user controllable parameter? + if (m_fast_updating) + m_max_cycle = (LZHAM_MAX(64, m_total_syms) + 6) << 5; + else + { +#if LZHAM_MORE_FREQUENT_TABLE_UPDATING + m_max_cycle = (LZHAM_MAX(24, m_total_syms) + 6) * 12; +#else + m_max_cycle = (LZHAM_MAX(32, m_total_syms) + 6) * 16; +#endif + } + + m_max_cycle = LZHAM_MIN(m_max_cycle, 32767); + + reset(); + + return true; + } + + bool raw_quasi_adaptive_huffman_data_model::reset() + { + if (!m_total_syms) + return true; + + if (m_initial_sym_freq.size()) + { + m_update_cycle = 0; + for (uint i = 0; i < m_total_syms; i++) + { + uint sym_freq = m_initial_sym_freq[i]; + m_sym_freq[i] = static_cast(sym_freq); + m_update_cycle += sym_freq; + } + } + else + { + for (uint i = 0; i < m_total_syms; i++) + m_sym_freq[i] = 1; + m_update_cycle = m_total_syms; + } + + m_total_count = 0; + m_symbols_until_update = 0; + + if (!update()) + return false; + + m_symbols_until_update = m_update_cycle = 8; + return true; + } + + void raw_quasi_adaptive_huffman_data_model::rescale() + { + uint total_freq = 0; + + for (uint i = 0; i < m_total_syms; i++) + { + uint freq = (m_sym_freq[i] + 1) >> 1; + total_freq += freq; + m_sym_freq[i] = static_cast(freq); + } + + m_total_count = total_freq; + } + + void raw_quasi_adaptive_huffman_data_model::reset_update_rate() + { + m_total_count += (m_update_cycle - m_symbols_until_update); + +#ifdef _DEBUG + uint actual_total = 0; + for (uint i = 0; i < m_sym_freq.size(); i++) + actual_total += m_sym_freq[i]; + LZHAM_ASSERT(actual_total == m_total_count); +#endif + + if (m_total_count > m_total_syms) + rescale(); + + m_symbols_until_update = m_update_cycle = LZHAM_MIN(8, m_update_cycle); + } + + bool raw_quasi_adaptive_huffman_data_model::update() + { + LZHAM_ASSERT(!m_symbols_until_update); + m_total_count += m_update_cycle; + LZHAM_ASSERT(m_total_count <= 65535); + + while (m_total_count >= 32768) + rescale(); + + uint table_size = m_use_polar_codes ? get_generate_polar_codes_table_size() : get_generate_huffman_codes_table_size(); + void *pTables = alloca(table_size); + + uint max_code_size, total_freq; + bool status; + if (m_use_polar_codes) + status = generate_polar_codes(pTables, m_total_syms, &m_sym_freq[0], &m_code_sizes[0], max_code_size, total_freq); + else + status = generate_huffman_codes(pTables, m_total_syms, &m_sym_freq[0], &m_code_sizes[0], max_code_size, total_freq); + LZHAM_ASSERT(status); + LZHAM_ASSERT(total_freq == m_total_count); + if ((!status) || (total_freq != m_total_count)) + return false; + + if (max_code_size > prefix_coding::cMaxExpectedCodeSize) + { + status = prefix_coding::limit_max_code_size(m_total_syms, &m_code_sizes[0], prefix_coding::cMaxExpectedCodeSize); + LZHAM_ASSERT(status); + if (!status) + return false; + } + + if (m_encoding) + status = prefix_coding::generate_codes(m_total_syms, &m_code_sizes[0], &m_codes[0]); + else + status = prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, m_decoder_table_bits); + + LZHAM_ASSERT(status); + if (!status) + return false; + + if (m_fast_updating) + m_update_cycle = 2 * m_update_cycle; + else + m_update_cycle = (5 * m_update_cycle) >> 2; + + if (m_update_cycle > m_max_cycle) + m_update_cycle = m_max_cycle; + + m_symbols_until_update = m_update_cycle; + + return true; + } + + bool raw_quasi_adaptive_huffman_data_model::update(uint sym) + { + uint freq = m_sym_freq[sym]; + freq++; + m_sym_freq[sym] = static_cast(freq); + + LZHAM_ASSERT(freq <= UINT16_MAX); + + if (--m_symbols_until_update == 0) + { + if (!update()) + return false; + } + + return true; + } + + adaptive_bit_model::adaptive_bit_model() + { + clear(); + } + + adaptive_bit_model::adaptive_bit_model(float prob0) + { + set_probability_0(prob0); + } + + adaptive_bit_model::adaptive_bit_model(const adaptive_bit_model& other) : + m_bit_0_prob(other.m_bit_0_prob) + { + } + + void adaptive_bit_model::set_probability_0(float prob0) + { + m_bit_0_prob = static_cast(math::clamp((uint)(prob0 * cSymbolCodecArithProbScale), 1, cSymbolCodecArithProbScale - 1)); + } + + adaptive_arith_data_model::adaptive_arith_data_model(bool encoding, uint total_syms) + { + init(encoding, total_syms); + } + + adaptive_arith_data_model::adaptive_arith_data_model(const adaptive_arith_data_model& other) + { + m_total_syms = other.m_total_syms; + m_probs = other.m_probs; + } + + adaptive_arith_data_model::~adaptive_arith_data_model() + { + } + + adaptive_arith_data_model& adaptive_arith_data_model::operator= (const adaptive_arith_data_model& rhs) + { + m_total_syms = rhs.m_total_syms; + m_probs = rhs.m_probs; + return *this; + } + + void adaptive_arith_data_model::clear() + { + m_total_syms = 0; + m_probs.clear(); + } + + bool adaptive_arith_data_model::init(bool encoding, uint total_syms) + { + LZHAM_NOTE_UNUSED(encoding); + if (!total_syms) + { + clear(); + return true; + } + + if ((total_syms < 2) || (!math::is_power_of_2(total_syms))) + total_syms = math::next_pow2(total_syms); + + m_total_syms = total_syms; + + if (!m_probs.try_resize(m_total_syms)) + return false; + + return true; + } + + void adaptive_arith_data_model::reset() + { + for (uint i = 0; i < m_probs.size(); i++) + m_probs[i].clear(); + } + + void adaptive_arith_data_model::reset_update_rate() + { + } + + bool adaptive_arith_data_model::update(uint sym) + { + uint node = 1; + + uint bitmask = m_total_syms; + + do + { + bitmask >>= 1; + + uint bit = (sym & bitmask) ? 1 : 0; + m_probs[node].update(bit); + node = (node << 1) + bit; + + } while (bitmask > 1); + + return true; + } + + bit_cost_t adaptive_arith_data_model::get_cost(uint sym) const + { + uint node = 1; + + uint bitmask = m_total_syms; + + bit_cost_t cost = 0; + do + { + bitmask >>= 1; + + uint bit = (sym & bitmask) ? 1 : 0; + cost += m_probs[node].get_cost(bit); + node = (node << 1) + bit; + + } while (bitmask > 1); + + return cost; + } + + symbol_codec::symbol_codec() + { + clear(); + } + + void symbol_codec::reset() + { + m_pDecode_buf = NULL; + m_pDecode_buf_next = NULL; + m_pDecode_buf_end = NULL; + m_decode_buf_size = 0; + + m_bit_buf = 0; + m_bit_count = 0; + m_total_model_updates = 0; + m_mode = cNull; + m_total_bits_written = 0; + + m_arith_base = 0; + m_arith_value = 0; + m_arith_length = 0; + m_arith_total_bits = 0; + + m_output_buf.try_resize(0); + m_arith_output_buf.try_resize(0); + m_output_syms.try_resize(0); + + m_pDecode_need_bytes_func = NULL; + m_pDecode_private_data = NULL; + m_pSaved_huff_model = NULL; + m_pSaved_model = NULL; + m_saved_node_index = 0; + } + + void symbol_codec::clear() + { + reset(); + + m_output_buf.clear(); + m_arith_output_buf.clear(); + m_output_syms.clear(); + } + + bool symbol_codec::start_encoding(uint expected_file_size) + { + m_mode = cEncoding; + + m_total_model_updates = 0; + m_total_bits_written = 0; + + if (!put_bits_init(expected_file_size)) + return false; + + m_output_syms.try_resize(0); + + arith_start_encoding(); + + return true; + } + + bool symbol_codec::encode_bits(uint bits, uint num_bits) + { + LZHAM_ASSERT(m_mode == cEncoding); + + if (!num_bits) + return true; + + LZHAM_ASSERT((num_bits == 32) || (bits <= ((1U << num_bits) - 1))); + + if (num_bits > 16) + { + if (!record_put_bits(bits >> 16, num_bits - 16)) + return false; + if (!record_put_bits(bits & 0xFFFF, 16)) + return false; + } + else + { + if (!record_put_bits(bits, num_bits)) + return false; + } + return true; + } + + bool symbol_codec::encode_arith_init() + { + LZHAM_ASSERT(m_mode == cEncoding); + + output_symbol sym; + sym.m_bits = 0; + sym.m_num_bits = output_symbol::cArithInit; + sym.m_arith_prob0 = 0; + if (!m_output_syms.try_push_back(sym)) + return false; + + return true; + } + + bool symbol_codec::encode_align_to_byte() + { + LZHAM_ASSERT(m_mode == cEncoding); + + output_symbol sym; + sym.m_bits = 0; + sym.m_num_bits = output_symbol::cAlignToByteSym; + sym.m_arith_prob0 = 0; + if (!m_output_syms.try_push_back(sym)) + return false; + + return true; + } + + bool symbol_codec::encode(uint sym, quasi_adaptive_huffman_data_model& model) + { + LZHAM_ASSERT(m_mode == cEncoding); + LZHAM_ASSERT(model.m_encoding); + + if (!record_put_bits(model.m_codes[sym], model.m_code_sizes[sym])) + return false; + + uint freq = model.m_sym_freq[sym]; + freq++; + model.m_sym_freq[sym] = static_cast(freq); + + LZHAM_ASSERT(freq <= UINT16_MAX); + + if (--model.m_symbols_until_update == 0) + { + m_total_model_updates++; + if (!model.update()) + return false; + } + return true; + } + + void symbol_codec::arith_propagate_carry() + { + int index = m_arith_output_buf.size() - 1; + while (index >= 0) + { + uint c = m_arith_output_buf[index]; + + if (c == 0xFF) + m_arith_output_buf[index] = 0; + else + { + m_arith_output_buf[index]++; + break; + } + + index--; + } + } + + bool symbol_codec::arith_renorm_enc_interval() + { + do + { + if (!m_arith_output_buf.try_push_back((m_arith_base >> 24) & 0xFF)) + return false; + m_total_bits_written += 8; + + m_arith_base <<= 8; + } while ((m_arith_length <<= 8) < cSymbolCodecArithMinLen); + return true; + } + + void symbol_codec::arith_start_encoding() + { + m_arith_output_buf.try_resize(0); + + m_arith_base = 0; + m_arith_value = 0; + m_arith_length = cSymbolCodecArithMaxLen; + m_arith_total_bits = 0; + } + + bool symbol_codec::encode(uint bit, adaptive_bit_model& model, bool update_model) + { + LZHAM_ASSERT(m_mode == cEncoding); + + m_arith_total_bits++; + + output_symbol sym; + sym.m_bits = bit; + sym.m_num_bits = -1; + sym.m_arith_prob0 = model.m_bit_0_prob; + if (!m_output_syms.try_push_back(sym)) + return false; + + uint x = model.m_bit_0_prob * (m_arith_length >> cSymbolCodecArithProbBits); + + if (!bit) + { + if (update_model) + model.m_bit_0_prob += ((cSymbolCodecArithProbScale - model.m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); + + m_arith_length = x; + } + else + { + if (update_model) + model.m_bit_0_prob -= (model.m_bit_0_prob >> cSymbolCodecArithProbMoveBits); + + uint orig_base = m_arith_base; + m_arith_base += x; + m_arith_length -= x; + if (orig_base > m_arith_base) + arith_propagate_carry(); + } + + if (m_arith_length < cSymbolCodecArithMinLen) + { + if (!arith_renorm_enc_interval()) + return false; + } + + return true; + } + + bool symbol_codec::encode(uint sym, adaptive_arith_data_model& model) + { + uint node = 1; + + uint bitmask = model.m_total_syms; + + do + { + bitmask >>= 1; + + uint bit = (sym & bitmask) ? 1 : 0; + if (!encode(bit, model.m_probs[node])) + return false; + node = (node << 1) + bit; + + } while (bitmask > 1); + return true; + } + + bool symbol_codec::arith_stop_encoding() + { + uint orig_base = m_arith_base; + + if (m_arith_length > 2 * cSymbolCodecArithMinLen) + { + m_arith_base += cSymbolCodecArithMinLen; + m_arith_length = (cSymbolCodecArithMinLen >> 1); + } + else + { + m_arith_base += (cSymbolCodecArithMinLen >> 1); + m_arith_length = (cSymbolCodecArithMinLen >> 9); + } + + if (orig_base > m_arith_base) + arith_propagate_carry(); + + if (!arith_renorm_enc_interval()) + return false; + + while (m_arith_output_buf.size() < 4) + { + if (!m_arith_output_buf.try_push_back(0)) + return false; + m_total_bits_written += 8; + } + return true; + } + + bool symbol_codec::stop_encoding(bool support_arith) + { + LZHAM_ASSERT(m_mode == cEncoding); + + if (support_arith) + { + if (!arith_stop_encoding()) + return false; + } + + if (!assemble_output_buf()) + return false; + + m_mode = cNull; + return true; + } + + bool symbol_codec::record_put_bits(uint bits, uint num_bits) + { + LZHAM_ASSERT(m_mode == cEncoding); + + LZHAM_ASSERT(num_bits <= 25); + LZHAM_ASSERT(m_bit_count >= 25); + + if (!num_bits) + return true; + + m_total_bits_written += num_bits; + + output_symbol sym; + sym.m_bits = bits; + sym.m_num_bits = (uint16)num_bits; + sym.m_arith_prob0 = 0; + if (!m_output_syms.try_push_back(sym)) + return false; + + return true; + } + + bool symbol_codec::put_bits_init(uint expected_size) + { + m_bit_buf = 0; + m_bit_count = cBitBufSize; + + m_output_buf.try_resize(0); + if (!m_output_buf.try_reserve(expected_size)) + return false; + + return true; + } + + bool symbol_codec::put_bits(uint bits, uint num_bits) + { + LZHAM_ASSERT(num_bits <= 25); + LZHAM_ASSERT(m_bit_count >= 25); + + if (!num_bits) + return true; + + m_bit_count -= num_bits; + m_bit_buf |= (static_cast(bits) << m_bit_count); + + m_total_bits_written += num_bits; + + while (m_bit_count <= (cBitBufSize - 8)) + { + if (!m_output_buf.try_push_back(static_cast(m_bit_buf >> (cBitBufSize - 8)))) + return false; + + m_bit_buf <<= 8; + m_bit_count += 8; + } + + return true; + } + + bool symbol_codec::put_bits_align_to_byte() + { + uint num_bits_in = cBitBufSize - m_bit_count; + if (num_bits_in & 7) + { + if (!put_bits(0, 8 - (num_bits_in & 7))) + return false; + } + return true; + } + + bool symbol_codec::flush_bits() + { + return put_bits(0, 7); // to ensure the last bits are flushed + } + + bool symbol_codec::assemble_output_buf() + { + m_total_bits_written = 0; + + uint arith_buf_ofs = 0; + + // Intermix the final Arithmetic, Huffman, or plain bits to a single combined bitstream. + // All bits from each source must be output in exactly the same order that the decompressor will read them. + for (uint sym_index = 0; sym_index < m_output_syms.size(); sym_index++) + { + const output_symbol& sym = m_output_syms[sym_index]; + + if (sym.m_num_bits == output_symbol::cAlignToByteSym) + { + if (!put_bits_align_to_byte()) + return false; + } + else if (sym.m_num_bits == output_symbol::cArithInit) + { + LZHAM_ASSERT(m_arith_output_buf.size()); + + if (m_arith_output_buf.size()) + { + m_arith_length = cSymbolCodecArithMaxLen; + m_arith_value = 0; + for (uint i = 0; i < 4; i++) + { + const uint c = m_arith_output_buf[arith_buf_ofs++]; + m_arith_value = (m_arith_value << 8) | c; + if (!put_bits(c, 8)) + return false; + } + } + } + else if (sym.m_num_bits == output_symbol::cArithSym) + { + // This renorm logic must match the logic used in the arithmetic decoder. + if (m_arith_length < cSymbolCodecArithMinLen) + { + do + { + const uint c = (arith_buf_ofs < m_arith_output_buf.size()) ? m_arith_output_buf[arith_buf_ofs++] : 0; + if (!put_bits(c, 8)) + return false; + m_arith_value = (m_arith_value << 8) | c; + } while ((m_arith_length <<= 8) < cSymbolCodecArithMinLen); + } + + uint x = sym.m_arith_prob0 * (m_arith_length >> cSymbolCodecArithProbBits); + uint bit = (m_arith_value >= x); + + if (bit == 0) + { + m_arith_length = x; + } + else + { + m_arith_value -= x; + m_arith_length -= x; + } + + LZHAM_VERIFY(bit == sym.m_bits); + } + else + { + // Huffman or plain bits + if (!put_bits(sym.m_bits, sym.m_num_bits)) + return false; + } + } + + return flush_bits(); + } + + //------------------------------------------------------------------------------------------------------------------ + // Decoding + //------------------------------------------------------------------------------------------------------------------ + + bool symbol_codec::start_decoding(const uint8* pBuf, size_t buf_size, bool eof_flag, need_bytes_func_ptr pNeed_bytes_func, void *pPrivate_data) + { + if (!buf_size) + return false; + + m_total_model_updates = 0; + + m_pDecode_buf = pBuf; + m_pDecode_buf_next = pBuf; + m_decode_buf_size = buf_size; + m_pDecode_buf_end = pBuf + buf_size; + + m_pDecode_need_bytes_func = pNeed_bytes_func; + m_pDecode_private_data = pPrivate_data; + m_decode_buf_eof = eof_flag; + + m_bit_buf = 0; + m_bit_count = 0; + + m_mode = cDecoding; + + return true; + } + + uint symbol_codec::decode_bits(uint num_bits) + { + LZHAM_ASSERT(m_mode == cDecoding); + + if (!num_bits) + return 0; + + if (num_bits > 16) + { + uint a = get_bits(num_bits - 16); + uint b = get_bits(16); + + return (a << 16) | b; + } + else + return get_bits(num_bits); + } + + void symbol_codec::decode_remove_bits(uint num_bits) + { + LZHAM_ASSERT(m_mode == cDecoding); + + while (num_bits > 16) + { + remove_bits(16); + num_bits -= 16; + } + + remove_bits(num_bits); + } + + uint symbol_codec::decode_peek_bits(uint num_bits) + { + LZHAM_ASSERT(m_mode == cDecoding); + LZHAM_ASSERT(num_bits <= 25); + + if (!num_bits) + return 0; + + while (m_bit_count < (int)num_bits) + { + uint c = 0; + if (m_pDecode_buf_next == m_pDecode_buf_end) + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + if (m_pDecode_buf_next < m_pDecode_buf_end) c = *m_pDecode_buf_next++; + } + } + else + c = *m_pDecode_buf_next++; + + m_bit_count += 8; + LZHAM_ASSERT(m_bit_count <= cBitBufSize); + + m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); + } + + return static_cast(m_bit_buf >> (cBitBufSize - num_bits)); + } + + uint symbol_codec::decode(quasi_adaptive_huffman_data_model& model) + { + LZHAM_ASSERT(m_mode == cDecoding); + LZHAM_ASSERT(!model.m_encoding); + + const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; + + while (m_bit_count < (cBitBufSize - 8)) + { + uint c = 0; + if (m_pDecode_buf_next == m_pDecode_buf_end) + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + if (m_pDecode_buf_next < m_pDecode_buf_end) c = *m_pDecode_buf_next++; + } + } + else + c = *m_pDecode_buf_next++; + + m_bit_count += 8; + m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); + } + + uint k = static_cast((m_bit_buf >> (cBitBufSize - 16)) + 1); + uint sym, len; + + if (k <= pTables->m_table_max_code) + { + uint32 t = pTables->m_lookup[m_bit_buf >> (cBitBufSize - pTables->m_table_bits)]; + + LZHAM_ASSERT(t != UINT32_MAX); + sym = t & UINT16_MAX; + len = t >> 16; + + LZHAM_ASSERT(model.m_code_sizes[sym] == len); + } + else + { + len = pTables->m_decode_start_code_size; + + for ( ; ; ) + { + if (k <= pTables->m_max_codes[len - 1]) + break; + len++; + } + + int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast((m_bit_buf >> (cBitBufSize - len))); + + if (((uint)val_ptr >= model.m_total_syms)) + { + // corrupted stream, or a bug + LZHAM_ASSERT(0); + return 0; + } + + sym = pTables->m_sorted_symbol_order[val_ptr]; + } + + m_bit_buf <<= len; + m_bit_count -= len; + + uint freq = model.m_sym_freq[sym]; + freq++; + model.m_sym_freq[sym] = static_cast(freq); + + LZHAM_ASSERT(freq <= UINT16_MAX); + + if (--model.m_symbols_until_update == 0) + { + m_total_model_updates++; + model.update(); + } + + return sym; + } + + uint64 symbol_codec::stop_decoding() + { + LZHAM_ASSERT(m_mode == cDecoding); + + uint64 n = m_pDecode_buf_next - m_pDecode_buf; + + m_mode = cNull; + + return n; + } + + uint symbol_codec::get_bits(uint num_bits) + { + LZHAM_ASSERT(num_bits <= 25); + + if (!num_bits) + return 0; + + while (m_bit_count < (int)num_bits) + { + uint c = 0; + if (m_pDecode_buf_next == m_pDecode_buf_end) + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + if (m_pDecode_buf_next < m_pDecode_buf_end) c = *m_pDecode_buf_next++; + } + } + else + c = *m_pDecode_buf_next++; + + m_bit_count += 8; + LZHAM_ASSERT(m_bit_count <= cBitBufSize); + + m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); + } + + uint result = static_cast(m_bit_buf >> (cBitBufSize - num_bits)); + + m_bit_buf <<= num_bits; + m_bit_count -= num_bits; + + return result; + } + + void symbol_codec::remove_bits(uint num_bits) + { + LZHAM_ASSERT(num_bits <= 25); + + if (!num_bits) + return; + + while (m_bit_count < (int)num_bits) + { + uint c = 0; + if (m_pDecode_buf_next == m_pDecode_buf_end) + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + if (m_pDecode_buf_next < m_pDecode_buf_end) c = *m_pDecode_buf_next++; + } + } + else + c = *m_pDecode_buf_next++; + + m_bit_count += 8; + LZHAM_ASSERT(m_bit_count <= cBitBufSize); + + m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); + } + + m_bit_buf <<= num_bits; + m_bit_count -= num_bits; + } + + void symbol_codec::decode_align_to_byte() + { + LZHAM_ASSERT(m_mode == cDecoding); + + if (m_bit_count & 7) + { + remove_bits(m_bit_count & 7); + } + } + + int symbol_codec::decode_remove_byte_from_bit_buf() + { + if (m_bit_count < 8) + return -1; + int result = static_cast(m_bit_buf >> (cBitBufSize - 8)); + m_bit_buf <<= 8; + m_bit_count -= 8; + return result; + } + + uint symbol_codec::decode(adaptive_bit_model& model, bool update_model) + { + while (m_arith_length < cSymbolCodecArithMinLen) + { + uint c = get_bits(8); + m_arith_value = (m_arith_value << 8) | c; + m_arith_length <<= 8; + } + + uint x = model.m_bit_0_prob * (m_arith_length >> cSymbolCodecArithProbBits); + uint bit = (m_arith_value >= x); + + if (!bit) + { + if (update_model) + model.m_bit_0_prob += ((cSymbolCodecArithProbScale - model.m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); + + m_arith_length = x; + } + else + { + if (update_model) + model.m_bit_0_prob -= (model.m_bit_0_prob >> cSymbolCodecArithProbMoveBits); + m_arith_value -= x; + m_arith_length -= x; + } + + return bit; + } + + uint symbol_codec::decode(adaptive_arith_data_model& model) + { + uint node = 1; + + do + { + uint bit = decode(model.m_probs[node]); + + node = (node << 1) + bit; + + } while (node < model.m_total_syms); + + return node - model.m_total_syms; + } + + void symbol_codec::start_arith_decoding() + { + LZHAM_ASSERT(m_mode == cDecoding); + + m_arith_length = cSymbolCodecArithMaxLen; + m_arith_value = 0; + + m_arith_value = (get_bits(8) << 24); + m_arith_value |= (get_bits(8) << 16); + m_arith_value |= (get_bits(8) << 8); + m_arith_value |= get_bits(8); + } + + void symbol_codec::decode_need_bytes() + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + } + } + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/lzham_timer.cpp b/r5dev/thirdparty/lzham/lzham_timer.cpp new file mode 100644 index 00000000..8d31e53b --- /dev/null +++ b/r5dev/thirdparty/lzham/lzham_timer.cpp @@ -0,0 +1,147 @@ +// File: lzham_timer.cpp +// See Copyright Notice and license at the end of include/lzham.h +#include "include/lzham_core.h" +#include "include/lzham_timer.h" + +#ifndef LZHAM_USE_WIN32_API + #include +#endif + +namespace lzham +{ + unsigned long long lzham_timer::g_init_ticks; + unsigned long long lzham_timer::g_freq; + double lzham_timer::g_inv_freq; + + #if LZHAM_USE_WIN32_API + inline void query_counter(timer_ticks *pTicks) + { + QueryPerformanceCounter(reinterpret_cast(pTicks)); + } + inline void query_counter_frequency(timer_ticks *pTicks) + { + QueryPerformanceFrequency(reinterpret_cast(pTicks)); + } + #else + inline void query_counter(timer_ticks *pTicks) + { + *pTicks = clock(); + } + inline void query_counter_frequency(timer_ticks *pTicks) + { + *pTicks = CLOCKS_PER_SEC; + } + #endif + + lzham_timer::lzham_timer() : + m_start_time(0), + m_stop_time(0), + m_started(false), + m_stopped(false) + { + if (!g_inv_freq) + init(); + } + + lzham_timer::lzham_timer(timer_ticks start_ticks) + { + if (!g_inv_freq) + init(); + + m_start_time = start_ticks; + + m_started = true; + m_stopped = false; + } + + void lzham_timer::start(timer_ticks start_ticks) + { + m_start_time = start_ticks; + + m_started = true; + m_stopped = false; + } + + void lzham_timer::start() + { + query_counter(&m_start_time); + + m_started = true; + m_stopped = false; + } + + void lzham_timer::stop() + { + LZHAM_ASSERT(m_started); + + query_counter(&m_stop_time); + + m_stopped = true; + } + + double lzham_timer::get_elapsed_secs() const + { + LZHAM_ASSERT(m_started); + if (!m_started) + return 0; + + timer_ticks stop_time = m_stop_time; + if (!m_stopped) + query_counter(&stop_time); + + timer_ticks delta = stop_time - m_start_time; + return delta * g_inv_freq; + } + + timer_ticks lzham_timer::get_elapsed_us() const + { + LZHAM_ASSERT(m_started); + if (!m_started) + return 0; + + timer_ticks stop_time = m_stop_time; + if (!m_stopped) + query_counter(&stop_time); + + timer_ticks delta = stop_time - m_start_time; + return (delta * 1000000ULL + (g_freq >> 1U)) / g_freq; + } + + void lzham_timer::init() + { + if (!g_inv_freq) + { + query_counter_frequency(&g_freq); + g_inv_freq = 1.0f / g_freq; + + query_counter(&g_init_ticks); + } + } + + timer_ticks lzham_timer::get_init_ticks() + { + if (!g_inv_freq) + init(); + + return g_init_ticks; + } + + timer_ticks lzham_timer::get_ticks() + { + if (!g_inv_freq) + init(); + + timer_ticks ticks; + query_counter(&ticks); + return ticks - g_init_ticks; + } + + double lzham_timer::ticks_to_secs(timer_ticks ticks) + { + if (!g_inv_freq) + init(); + + return ticks * g_inv_freq; + } + +} // namespace lzham \ No newline at end of file diff --git a/r5dev/thirdparty/lzham/lzham_vector.cpp b/r5dev/thirdparty/lzham/lzham_vector.cpp new file mode 100644 index 00000000..ca3f449d --- /dev/null +++ b/r5dev/thirdparty/lzham/lzham_vector.cpp @@ -0,0 +1,74 @@ +// File: lzham_vector.cpp +// See Copyright Notice and license at the end of include/lzham.h +#include "include/lzham_core.h" +#include "include/lzham_vector.h" + +namespace lzham +{ + bool elemental_vector::increase_capacity(uint min_new_capacity, bool grow_hint, uint element_size, object_mover pMover, bool nofail) + { + LZHAM_ASSERT(m_size <= m_capacity); + +#if LZHAM_64BIT_POINTERS + LZHAM_ASSUME(sizeof(void*) == sizeof(uint64)); + LZHAM_ASSERT(min_new_capacity < (0x400000000ULL / element_size)); +#else + LZHAM_ASSUME(sizeof(void*) == sizeof(uint32)); + LZHAM_ASSERT(min_new_capacity < (0x7FFF0000U / element_size)); +#endif + + if (m_capacity >= min_new_capacity) + return true; + + size_t new_capacity = min_new_capacity; + if ((grow_hint) && (!math::is_power_of_2(static_cast(new_capacity)))) + new_capacity = math::next_pow2(static_cast(new_capacity)); + + LZHAM_ASSERT(new_capacity && (new_capacity > m_capacity)); + + const size_t desired_size = element_size * new_capacity; + size_t actual_size; + if (!pMover) + { + void* new_p = lzham_realloc(m_p, desired_size, &actual_size, true); + if (!new_p) + { + if (nofail) + return false; + + char buf[256]; + sprintf_s(buf, sizeof(buf), "vector: lzham_realloc() failed allocating %u bytes", desired_size); + LZHAM_FAIL(buf); + } + m_p = new_p; + } + else + { + void* new_p = lzham_malloc(desired_size, &actual_size); + if (!new_p) + { + if (nofail) + return false; + + char buf[256]; + sprintf_s(buf, sizeof(buf), "vector: lzham_malloc() failed allocating %u bytes", desired_size); + LZHAM_FAIL(buf); + } + + (*pMover)(new_p, m_p, m_size); + + if (m_p) + lzham_free(m_p); + + m_p = new_p; + } + + if (actual_size > desired_size) + m_capacity = static_cast(actual_size / element_size); + else + m_capacity = static_cast(new_capacity); + + return true; + } + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/lzham_win32_threading.cpp b/r5dev/thirdparty/lzham/lzham_win32_threading.cpp new file mode 100644 index 00000000..23b3a273 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzham_win32_threading.cpp @@ -0,0 +1,220 @@ +// File: lzham_task_pool_win32.cpp +// See Copyright Notice and license at the end of include/lzham.h +#include "include/lzham_core.h" +#include "include/lzham_win32_threading.h" +#include "include/lzham_timer.h" +#include + +#if LZHAM_USE_WIN32_API + +namespace lzham +{ + task_pool::task_pool() : + m_num_threads(0), + m_tasks_available(0, 32767), + m_num_outstanding_tasks(0), + m_exit_flag(false) + { + utils::zero_object(m_threads); + } + + task_pool::task_pool(uint num_threads) : + m_num_threads(0), + m_tasks_available(0, 32767), + m_num_outstanding_tasks(0), + m_exit_flag(false) + { + utils::zero_object(m_threads); + + bool status = init(num_threads); + LZHAM_VERIFY(status); + } + + task_pool::~task_pool() + { + deinit(); + } + + bool task_pool::init(uint num_threads) + { + LZHAM_ASSERT(num_threads <= cMaxThreads); + num_threads = math::minimum(num_threads, cMaxThreads); + + deinit(); + + bool succeeded = true; + + m_num_threads = 0; + while (m_num_threads < num_threads) + { + m_threads[m_num_threads] = (HANDLE)_beginthreadex(NULL, 32768, thread_func, this, 0, NULL); + LZHAM_ASSERT(m_threads[m_num_threads] != 0); + + if (!m_threads[m_num_threads]) + { + succeeded = false; + break; + } + + m_num_threads++; + } + + if (!succeeded) + { + deinit(); + return false; + } + + return true; + } + + void task_pool::deinit() + { + if (m_num_threads) + { + join(); + + atomic_exchange32(&m_exit_flag, true); + + m_tasks_available.release(m_num_threads); + + for (uint i = 0; i < m_num_threads; i++) + { + if (m_threads[i]) + { + for ( ; ; ) + { + DWORD result = WaitForSingleObject(m_threads[i], 30000); + if ((result == WAIT_OBJECT_0) || (result == WAIT_ABANDONED)) + break; + } + + CloseHandle(m_threads[i]); + m_threads[i] = NULL; + } + } + + m_num_threads = 0; + + atomic_exchange32(&m_exit_flag, false); + } + + m_task_stack.clear(); + m_num_outstanding_tasks = 0; + } + + bool task_pool::queue_task(task_callback_func pFunc, uint64 data, void* pData_ptr) + { + LZHAM_ASSERT(m_num_threads); + LZHAM_ASSERT(pFunc); + + task tsk; + tsk.m_callback = pFunc; + tsk.m_data = data; + tsk.m_pData_ptr = pData_ptr; + tsk.m_flags = 0; + + if (!m_task_stack.try_push(tsk)) + return false; + + atomic_increment32(&m_num_outstanding_tasks); + + m_tasks_available.release(1); + + return true; + } + + // It's the object's responsibility to delete pObj within the execute_task() method, if needed! + bool task_pool::queue_task(executable_task* pObj, uint64 data, void* pData_ptr) + { + LZHAM_ASSERT(m_num_threads); + LZHAM_ASSERT(pObj); + + task tsk; + tsk.m_pObj = pObj; + tsk.m_data = data; + tsk.m_pData_ptr = pData_ptr; + tsk.m_flags = cTaskFlagObject; + + if (!m_task_stack.try_push(tsk)) + return false; + + atomic_increment32(&m_num_outstanding_tasks); + + m_tasks_available.release(1); + + return true; + } + + void task_pool::process_task(task& tsk) + { + if (tsk.m_flags & cTaskFlagObject) + tsk.m_pObj->execute_task(tsk.m_data, tsk.m_pData_ptr); + else + tsk.m_callback(tsk.m_data, tsk.m_pData_ptr); + + atomic_decrement32(&m_num_outstanding_tasks); + } + + void task_pool::join() + { + while (atomic_add32(&m_num_outstanding_tasks, 0) > 0) + { + task tsk; + if (m_task_stack.pop(tsk)) + { + process_task(tsk); + } + else + { + lzham_sleep(1); + } + } + } + + unsigned __stdcall task_pool::thread_func(void* pContext) + { + task_pool* pPool = static_cast(pContext); + + for ( ; ; ) + { + if (!pPool->m_tasks_available.wait()) + break; + + if (pPool->m_exit_flag) + break; + + task tsk; + if (pPool->m_task_stack.pop(tsk)) + { + pPool->process_task(tsk); + } + } + + _endthreadex(0); + return 0; + } + + static uint g_num_processors; + + uint lzham_get_max_helper_threads() + { + if (!g_num_processors) + { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + g_num_processors = system_info.dwNumberOfProcessors; + } + + if (g_num_processors > 1) + { + // use all CPU's + return LZHAM_MIN(task_pool::cMaxThreads, g_num_processors - 1); + } + + return 0; + } + +} // namespace lzham + +#endif // LZHAM_USE_WIN32_API diff --git a/r5dev/thirdparty/lzham/lzhamcomp/lzham_comp.h b/r5dev/thirdparty/lzham/lzhamcomp/lzham_comp.h new file mode 100644 index 00000000..05827470 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzhamcomp/lzham_comp.h @@ -0,0 +1,38 @@ +// File: lzham_comp.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once +#include "../include/lzham.h" + +namespace lzham +{ + lzham_compress_state_ptr LZHAM_CDECL lzham_lib_compress_init(const lzham_compress_params *pParams); + + lzham_compress_state_ptr LZHAM_CDECL lzham_lib_compress_reinit(lzham_compress_state_ptr p); + + lzham_compress_checksums* LZHAM_CDECL lzham_lib_compress_deinit(lzham_compress_state_ptr p); + + lzham_compress_status_t LZHAM_CDECL lzham_lib_compress( + lzham_compress_state_ptr p, + const lzham_uint8 *pIn_buf, size_t *pIn_buf_size, + lzham_uint8 *pOut_buf, size_t *pOut_buf_size, + lzham_bool no_more_input_bytes_flag); + + lzham_compress_status_t LZHAM_CDECL lzham_lib_compress2( + lzham_compress_state_ptr p, + const lzham_uint8 *pIn_buf, size_t *pIn_buf_size, + lzham_uint8 *pOut_buf, size_t *pOut_buf_size, + lzham_flush_t flush_type); + + lzham_compress_status_t LZHAM_CDECL lzham_lib_compress_memory(const lzham_compress_params *pParams, lzham_uint8* pDst_buf, size_t *pDst_len, const lzham_uint8* pSrc_buf, size_t src_len, lzham_uint32 *pAdler32, lzham_uint32* pCrc32); + + int lzham_lib_z_deflateInit(lzham_z_streamp pStream, int level); + int lzham_lib_z_deflateInit2(lzham_z_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); + int lzham_lib_z_deflateReset(lzham_z_streamp pStream); + int lzham_lib_z_deflate(lzham_z_streamp pStream, int flush); + int lzham_lib_z_deflateEnd(lzham_z_streamp pStream); + lzham_z_ulong lzham_lib_z_deflateBound(lzham_z_streamp pStream, lzham_z_ulong source_len); + int lzham_lib_z_compress2(unsigned char *pDest, lzham_z_ulong *pDest_len, const unsigned char *pSource, lzham_z_ulong source_len, int level); + int lzham_lib_z_compress(unsigned char *pDest, lzham_z_ulong *pDest_len, const unsigned char *pSource, lzham_z_ulong source_len); + lzham_z_ulong lzham_lib_z_compressBound(lzham_z_ulong source_len); + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/lzhamcomp/lzham_lzcomp.cpp b/r5dev/thirdparty/lzham/lzhamcomp/lzham_lzcomp.cpp new file mode 100644 index 00000000..4e54c736 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzhamcomp/lzham_lzcomp.cpp @@ -0,0 +1,611 @@ +// File: lzham_lzcomp.cpp +// See Copyright Notice and license at the end of include/lzham.h +#include "../include/lzham_core.h" +#include "../include/lzham.h" +#include "lzham_comp.h" +#include "lzham_lzcomp_internal.h" + +using namespace lzham; + +namespace lzham +{ + struct lzham_compress_state + { + // task_pool requires 8 or 16 alignment + task_pool m_tp; + lzcompressor m_compressor; + + uint m_dict_size_log2; + + const uint8 *m_pIn_buf; + size_t *m_pIn_buf_size; + uint8 *m_pOut_buf; + size_t *m_pOut_buf_size; + + size_t m_comp_data_ofs; + + bool m_finished_compression; + + lzham_compress_params m_params; + + lzham_compress_status_t m_status; + }; + + static lzham_compress_status_t create_internal_init_params(lzcompressor::init_params &internal_params, const lzham_compress_params *pParams) + { + if ((pParams->m_dict_size_log2 < CLZBase::cMinDictSizeLog2) || (pParams->m_dict_size_log2 > CLZBase::cMaxDictSizeLog2)) + return LZHAM_COMP_STATUS_INVALID_PARAMETER; + + if (pParams->m_cpucache_total_lines) + { + if (!math::is_power_of_2(pParams->m_cpucache_line_size)) + return LZHAM_COMP_STATUS_INVALID_PARAMETER; + } + + internal_params.m_dict_size_log2 = pParams->m_dict_size_log2; + + if (pParams->m_max_helper_threads < 0) + internal_params.m_max_helper_threads = lzham_get_max_helper_threads(); + else + internal_params.m_max_helper_threads = pParams->m_max_helper_threads; + internal_params.m_max_helper_threads = LZHAM_MIN(LZHAM_MAX_HELPER_THREADS, internal_params.m_max_helper_threads); + + internal_params.m_num_cachelines = pParams->m_cpucache_total_lines; + internal_params.m_cacheline_size = pParams->m_cpucache_line_size; + internal_params.m_lzham_compress_flags = pParams->m_compress_flags; + + if (pParams->m_num_seed_bytes) + { + if ((!pParams->m_pSeed_bytes) || (pParams->m_num_seed_bytes > (1U << pParams->m_dict_size_log2))) + return LZHAM_COMP_STATUS_INVALID_PARAMETER; + + internal_params.m_num_seed_bytes = pParams->m_num_seed_bytes; + internal_params.m_pSeed_bytes = pParams->m_pSeed_bytes; + } + + switch (pParams->m_level) + { + case LZHAM_COMP_LEVEL_FASTEST: internal_params.m_compression_level = cCompressionLevelFastest; break; + case LZHAM_COMP_LEVEL_FASTER: internal_params.m_compression_level = cCompressionLevelFaster; break; + case LZHAM_COMP_LEVEL_DEFAULT: internal_params.m_compression_level = cCompressionLevelDefault; break; + case LZHAM_COMP_LEVEL_BETTER: internal_params.m_compression_level = cCompressionLevelBetter; break; + case LZHAM_COMP_LEVEL_UBER: internal_params.m_compression_level = cCompressionLevelUber; break; + default: + return LZHAM_COMP_STATUS_INVALID_PARAMETER; + }; + + return LZHAM_COMP_STATUS_SUCCESS; + } + + lzham_compress_state_ptr LZHAM_CDECL lzham_lib_compress_init(const lzham_compress_params *pParams) + { + if ((!pParams) || (pParams->m_struct_size != sizeof(lzham_compress_params))) + return NULL; + + if ((pParams->m_dict_size_log2 < CLZBase::cMinDictSizeLog2) || (pParams->m_dict_size_log2 > CLZBase::cMaxDictSizeLog2)) + return NULL; + + lzcompressor::init_params internal_params; + lzham_compress_status_t status = create_internal_init_params(internal_params, pParams); + if (status != LZHAM_COMP_STATUS_SUCCESS) + return NULL; + + lzham_compress_state *pState = lzham_new(); + if (!pState) + return NULL; + + pState->m_params = *pParams; + + pState->m_pIn_buf = NULL; + pState->m_pIn_buf_size = NULL; + pState->m_pOut_buf = NULL; + pState->m_pOut_buf_size = NULL; + pState->m_status = LZHAM_COMP_STATUS_NOT_FINISHED; + pState->m_comp_data_ofs = 0; + pState->m_finished_compression = false; + + if (internal_params.m_max_helper_threads) + { + if (!pState->m_tp.init(internal_params.m_max_helper_threads)) + { + lzham_delete(pState); + return NULL; + } + if (pState->m_tp.get_num_threads() >= internal_params.m_max_helper_threads) + { + internal_params.m_pTask_pool = &pState->m_tp; + } + else + { + internal_params.m_max_helper_threads = 0; + } + } + + if (!pState->m_compressor.init(internal_params)) + { + lzham_delete(pState); + return NULL; + } + + return pState; + } + + lzham_compress_state_ptr LZHAM_CDECL lzham_lib_compress_reinit(lzham_compress_state_ptr p) + { + lzham_compress_state *pState = static_cast(p); + if (pState) + { + if (!pState->m_compressor.reset()) + return NULL; + + pState->m_pIn_buf = NULL; + pState->m_pIn_buf_size = NULL; + pState->m_pOut_buf = NULL; + pState->m_pOut_buf_size = NULL; + pState->m_status = LZHAM_COMP_STATUS_NOT_FINISHED; + pState->m_comp_data_ofs = 0; + pState->m_finished_compression = false; + } + + return pState; + } + + lzham_compress_checksums* LZHAM_CDECL lzham_lib_compress_deinit(lzham_compress_state_ptr p) + { + lzham_compress_state *pState = static_cast(p); + if (!pState) + return nullptr; + + lzham_compress_checksums* checksums = new lzham_compress_checksums(); + checksums->adler32 = pState->m_compressor.get_src_adler32(); + checksums->crc32 = pState->m_compressor.get_src_crc32(); + + printf("checksums->adler32 %zX\n", checksums->adler32); + printf("checksums->crc32 %zX\n", checksums->crc32); + + lzham_delete(pState); + return checksums; + } + + lzham_compress_status_t LZHAM_CDECL lzham_lib_compress( + lzham_compress_state_ptr p, + const lzham_uint8 *pIn_buf, size_t *pIn_buf_size, + lzham_uint8 *pOut_buf, size_t *pOut_buf_size, + lzham_bool no_more_input_bytes_flag) + { + return lzham_lib_compress2(p, pIn_buf, pIn_buf_size, pOut_buf, pOut_buf_size, no_more_input_bytes_flag ? LZHAM_FINISH : LZHAM_NO_FLUSH); + } + + lzham_compress_status_t LZHAM_CDECL lzham_lib_compress2( + lzham_compress_state_ptr p, + const lzham_uint8 *pIn_buf, size_t *pIn_buf_size, + lzham_uint8 *pOut_buf, size_t *pOut_buf_size, + lzham_flush_t flush_type) + { + lzham_compress_state *pState = static_cast(p); + + if ((!pState) || (!pState->m_params.m_dict_size_log2) || (pState->m_status >= LZHAM_COMP_STATUS_FIRST_SUCCESS_OR_FAILURE_CODE) || (!pIn_buf_size) || (!pOut_buf_size)) + return LZHAM_COMP_STATUS_INVALID_PARAMETER; + + if ((*pIn_buf_size) && (!pIn_buf)) + return LZHAM_COMP_STATUS_INVALID_PARAMETER; + + if ((!*pOut_buf_size) || (!pOut_buf)) + return LZHAM_COMP_STATUS_INVALID_PARAMETER; + + byte_vec &comp_data = pState->m_compressor.get_compressed_data(); + size_t num_bytes_written_to_out_buf = 0; + if (pState->m_comp_data_ofs < comp_data.size()) + { + size_t n = LZHAM_MIN(comp_data.size() - pState->m_comp_data_ofs, *pOut_buf_size); + + memcpy(pOut_buf, comp_data.get_ptr() + pState->m_comp_data_ofs, n); + + pState->m_comp_data_ofs += n; + + const bool has_no_more_output = (pState->m_comp_data_ofs >= comp_data.size()); + if (has_no_more_output) + { + pOut_buf += n; + *pOut_buf_size -= n; + num_bytes_written_to_out_buf += n; + } + else + { + *pIn_buf_size = 0; + *pOut_buf_size = n; + pState->m_status = LZHAM_COMP_STATUS_HAS_MORE_OUTPUT; + return pState->m_status; + } + } + + comp_data.try_resize(0); + pState->m_comp_data_ofs = 0; + + if (pState->m_finished_compression) + { + if ((*pIn_buf_size) || (flush_type != LZHAM_FINISH)) + { + pState->m_status = LZHAM_COMP_STATUS_INVALID_PARAMETER; + return pState->m_status; + } + + *pIn_buf_size = 0; + *pOut_buf_size = num_bytes_written_to_out_buf; + + pState->m_status = LZHAM_COMP_STATUS_SUCCESS; + return pState->m_status; + } + + const size_t cMaxBytesToPutPerIteration = 4*1024*1024; + size_t bytes_to_put = LZHAM_MIN(cMaxBytesToPutPerIteration, *pIn_buf_size); + const bool consumed_entire_input_buf = (bytes_to_put == *pIn_buf_size); + + if (bytes_to_put) + { + if (!pState->m_compressor.put_bytes(pIn_buf, (uint)bytes_to_put)) + { + *pIn_buf_size = 0; + *pOut_buf_size = num_bytes_written_to_out_buf; + pState->m_status = LZHAM_COMP_STATUS_FAILED; + return pState->m_status; + } + } + + if ((consumed_entire_input_buf) && (flush_type != LZHAM_NO_FLUSH)) + { + if ((flush_type == LZHAM_SYNC_FLUSH) || (flush_type == LZHAM_FULL_FLUSH) || (flush_type == LZHAM_TABLE_FLUSH)) + { + if (!pState->m_compressor.flush(flush_type)) + { + *pIn_buf_size = 0; + *pOut_buf_size = num_bytes_written_to_out_buf; + pState->m_status = LZHAM_COMP_STATUS_FAILED; + return pState->m_status; + } + } + else if (!pState->m_finished_compression) + { + if (!pState->m_compressor.put_bytes(NULL, 0)) + { + *pIn_buf_size = 0; + *pOut_buf_size = num_bytes_written_to_out_buf; + pState->m_status = LZHAM_COMP_STATUS_FAILED; + return pState->m_status; + } + pState->m_finished_compression = true; + } + } + + size_t num_comp_bytes_to_output = LZHAM_MIN(comp_data.size() - pState->m_comp_data_ofs, *pOut_buf_size); + if (num_comp_bytes_to_output) + { + memcpy(pOut_buf, comp_data.get_ptr() + pState->m_comp_data_ofs, num_comp_bytes_to_output); + + pState->m_comp_data_ofs += num_comp_bytes_to_output; + } + + *pIn_buf_size = bytes_to_put; + *pOut_buf_size = num_bytes_written_to_out_buf + num_comp_bytes_to_output; + + const bool has_no_more_output = (pState->m_comp_data_ofs >= comp_data.size()); + if ((has_no_more_output) && (flush_type == LZHAM_FINISH) && (pState->m_finished_compression)) + pState->m_status = LZHAM_COMP_STATUS_SUCCESS; + else if ((has_no_more_output) && (consumed_entire_input_buf) && (flush_type == LZHAM_NO_FLUSH)) + pState->m_status = LZHAM_COMP_STATUS_NEEDS_MORE_INPUT; + else + pState->m_status = has_no_more_output ? LZHAM_COMP_STATUS_NOT_FINISHED : LZHAM_COMP_STATUS_HAS_MORE_OUTPUT; + + return pState->m_status; + } + + lzham_compress_status_t LZHAM_CDECL lzham_lib_compress_memory(const lzham_compress_params *pParams, lzham_uint8* pDst_buf, size_t *pDst_len, const lzham_uint8* pSrc_buf, size_t src_len, lzham_uint32 *pAdler32, lzham_uint32 *pCrc32) + { + if ((!pParams) || (!pDst_len)) + return LZHAM_COMP_STATUS_INVALID_PARAMETER; + + if (src_len) + { + if (!pSrc_buf) + return LZHAM_COMP_STATUS_INVALID_PARAMETER; + } + + if (sizeof(size_t) > sizeof(uint32)) + { + if (src_len > UINT32_MAX) + return LZHAM_COMP_STATUS_INVALID_PARAMETER; + } + + lzcompressor::init_params internal_params; + lzham_compress_status_t status = create_internal_init_params(internal_params, pParams); + if (status != LZHAM_COMP_STATUS_SUCCESS) + return status; + + task_pool *pTP = NULL; + if (internal_params.m_max_helper_threads) + { + pTP = lzham_new(); + if (!pTP->init(internal_params.m_max_helper_threads)) + return LZHAM_COMP_STATUS_FAILED; + + internal_params.m_pTask_pool = pTP; + } + + lzcompressor *pCompressor = lzham_new(); + if (!pCompressor) + { + lzham_delete(pTP); + return LZHAM_COMP_STATUS_FAILED; + } + + if (!pCompressor->init(internal_params)) + { + lzham_delete(pTP); + lzham_delete(pCompressor); + return LZHAM_COMP_STATUS_INVALID_PARAMETER; + } + + if (src_len) + { + if (!pCompressor->put_bytes(pSrc_buf, static_cast(src_len))) + { + *pDst_len = 0; + lzham_delete(pTP); + lzham_delete(pCompressor); + return LZHAM_COMP_STATUS_FAILED; + } + } + + if (!pCompressor->put_bytes(NULL, 0)) + { + *pDst_len = 0; + lzham_delete(pTP); + lzham_delete(pCompressor); + return LZHAM_COMP_STATUS_FAILED; + } + + const byte_vec &comp_data = pCompressor->get_compressed_data(); + + size_t dst_buf_size = *pDst_len; + *pDst_len = comp_data.size(); + + if (pAdler32) + *pAdler32 = pCompressor->get_src_adler32(); + if (pCrc32) + *pCrc32 = pCompressor->get_src_crc32(); + + if (comp_data.size() > dst_buf_size) + { + lzham_delete(pTP); + lzham_delete(pCompressor); + return LZHAM_COMP_STATUS_OUTPUT_BUF_TOO_SMALL; + } + + memcpy(pDst_buf, comp_data.get_ptr(), comp_data.size()); + + lzham_delete(pTP); + lzham_delete(pCompressor); + return LZHAM_COMP_STATUS_SUCCESS; + } + + // ----------------- zlib-style API's + + int lzham_lib_z_deflateInit(lzham_z_streamp pStream, int level) + { + return lzham_lib_z_deflateInit2(pStream, level, LZHAM_Z_LZHAM, LZHAM_Z_DEFAULT_WINDOW_BITS, 9, LZHAM_Z_DEFAULT_STRATEGY); + } + + int lzham_lib_z_deflateInit2(lzham_z_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy) + { + LZHAM_NOTE_UNUSED(strategy); + + if (!pStream) + return LZHAM_Z_STREAM_ERROR; + if ((mem_level < 1) || (mem_level > 9)) + return LZHAM_Z_PARAM_ERROR; + if ((method != LZHAM_Z_DEFLATED) && (method != LZHAM_Z_LZHAM)) + return LZHAM_Z_PARAM_ERROR; + + if (level == LZHAM_Z_DEFAULT_COMPRESSION) + level = 9; + + if (method == LZHAM_Z_DEFLATED) + { + // Force Deflate to LZHAM with default window_bits. + method = LZHAM_Z_LZHAM; + window_bits = LZHAM_Z_DEFAULT_WINDOW_BITS; + } + +#ifdef LZHAM_Z_API_FORCE_WINDOW_BITS + window_bits = LZHAM_Z_API_FORCE_WINDOW_BITS; +#endif + + int max_window_bits = LZHAM_64BIT_POINTERS ? LZHAM_MAX_DICT_SIZE_LOG2_X64 : LZHAM_MAX_DICT_SIZE_LOG2_X86; + if ((labs(window_bits) < LZHAM_MIN_DICT_SIZE_LOG2) || (labs(window_bits) > max_window_bits)) + return LZHAM_Z_PARAM_ERROR; + + lzham_compress_params comp_params; + + utils::zero_object(comp_params); + comp_params.m_struct_size = sizeof(lzham_compress_params); + + comp_params.m_level = LZHAM_COMP_LEVEL_UBER; + if (level <= 1) + comp_params.m_level = LZHAM_COMP_LEVEL_FASTEST; + else if (level <= 3) + comp_params.m_level = LZHAM_COMP_LEVEL_FASTER; + else if (level <= 5) + comp_params.m_level = LZHAM_COMP_LEVEL_DEFAULT; + else if (level <= 7) + comp_params.m_level = LZHAM_COMP_LEVEL_BETTER; + + if (level == 10) + comp_params.m_compress_flags |= LZHAM_COMP_FLAG_EXTREME_PARSING; + + // Use all CPU's. TODO: This is not always the best idea depending on the dictionary size and the # of bytes to compress. + comp_params.m_max_helper_threads = -1; + + comp_params.m_dict_size_log2 = labs(window_bits); + + if (window_bits > 0) + comp_params.m_compress_flags |= LZHAM_COMP_FLAG_WRITE_ZLIB_STREAM; + + pStream->data_type = 0; + pStream->adler32 = LZHAM_Z_ADLER32_INIT; + pStream->msg = NULL; + pStream->reserved = 0; + pStream->total_in = 0; + pStream->total_out = 0; + + lzham_compress_state_ptr pComp = lzham_lib_compress_init(&comp_params); + if (!pComp) + return LZHAM_Z_PARAM_ERROR; + + pStream->state = (struct lzham_z_internal_state *)pComp; + + return LZHAM_Z_OK; + } + + int lzham_lib_z_deflateReset(lzham_z_streamp pStream) + { + if (!pStream) + return LZHAM_Z_STREAM_ERROR; + + lzham_compress_state_ptr pComp = (lzham_compress_state_ptr)pStream->state; + if (!pComp) + return LZHAM_Z_STREAM_ERROR; + + pComp = lzham_lib_compress_reinit(pComp); + if (!pComp) + return LZHAM_Z_STREAM_ERROR; + + pStream->state = (struct lzham_z_internal_state *)pComp; + + return LZHAM_Z_OK; + } + + int lzham_lib_z_deflate(lzham_z_streamp pStream, int flush) + { + if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > LZHAM_Z_FINISH) || (!pStream->next_out)) + return LZHAM_Z_STREAM_ERROR; + + if (!pStream->avail_out) + return LZHAM_Z_BUF_ERROR; + + if (flush == LZHAM_Z_PARTIAL_FLUSH) + flush = LZHAM_Z_SYNC_FLUSH; + + int lzham_status = LZHAM_Z_OK; + lzham_z_ulong orig_total_in = pStream->total_in, orig_total_out = pStream->total_out; + for ( ; ; ) + { + size_t in_bytes = pStream->avail_in, out_bytes = pStream->avail_out; + + lzham_compress_state_ptr pComp = (lzham_compress_state_ptr)pStream->state; + lzham_compress_state *pState = static_cast(pComp); + + lzham_compress_status_t status = lzham_lib_compress2( + pComp, + pStream->next_in, &in_bytes, + pStream->next_out, &out_bytes, + (lzham_flush_t)flush); + + pStream->next_in += (uint)in_bytes; + pStream->avail_in -= (uint)in_bytes; + pStream->total_in += (uint)in_bytes; + + pStream->next_out += (uint)out_bytes; + pStream->avail_out -= (uint)out_bytes; + pStream->total_out += (uint)out_bytes; + + pStream->adler32 = pState->m_compressor.get_src_adler32(); + pStream->crc32 = pState->m_compressor.get_src_crc32(); + + if (status >= LZHAM_COMP_STATUS_FIRST_FAILURE_CODE) + { + lzham_status = LZHAM_Z_STREAM_ERROR; + break; + } + else if (status == LZHAM_COMP_STATUS_SUCCESS) + { + lzham_status = LZHAM_Z_STREAM_END; + break; + } + else if (!pStream->avail_out) + break; + else if ((!pStream->avail_in) && (flush != LZHAM_Z_FINISH)) + { + if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out)) + break; + return LZHAM_Z_BUF_ERROR; // Can't make forward progress without some input. + } + } + return lzham_status; + } + + int lzham_lib_z_deflateEnd(lzham_z_streamp pStream) + { + if (!pStream) + return LZHAM_Z_STREAM_ERROR; + + lzham_compress_state_ptr pComp = (lzham_compress_state_ptr)pStream->state; + if (pComp) + { + lzham_compress_checksums* checksums = lzham_lib_compress_deinit(pComp); + + pStream->adler32 = checksums->adler32; + pStream->crc32 = checksums->crc32; + + pStream->state = NULL; + } + + return LZHAM_Z_OK; + } + + lzham_z_ulong lzham_lib_z_deflateBound(lzham_z_streamp pStream, lzham_z_ulong source_len) + { + LZHAM_NOTE_UNUSED(pStream); + return 64 + source_len + ((source_len + 4095) / 4096) * 4; + } + + int lzham_lib_z_compress2(unsigned char *pDest, lzham_z_ulong *pDest_len, const unsigned char *pSource, lzham_z_ulong source_len, int level) + { + int status; + lzham_z_stream stream; + memset(&stream, 0, sizeof(stream)); + + // In case lzham_z_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) + return LZHAM_Z_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (uint)source_len; + stream.next_out = pDest; + stream.avail_out = (uint)*pDest_len; + + status = lzham_lib_z_deflateInit(&stream, level); + if (status != LZHAM_Z_OK) + return status; + + status = lzham_lib_z_deflate(&stream, LZHAM_Z_FINISH); + if (status != LZHAM_Z_STREAM_END) + { + lzham_lib_z_deflateEnd(&stream); + return (status == LZHAM_Z_OK) ? LZHAM_Z_BUF_ERROR : status; + } + + *pDest_len = stream.total_out; + return lzham_lib_z_deflateEnd(&stream); + } + + int lzham_lib_z_compress(unsigned char *pDest, lzham_z_ulong *pDest_len, const unsigned char *pSource, lzham_z_ulong source_len) + { + return lzham_lib_z_compress2(pDest, pDest_len, pSource, source_len, (int)LZHAM_Z_DEFAULT_COMPRESSION); + } + + lzham_z_ulong lzham_lib_z_compressBound(lzham_z_ulong source_len) + { + return lzham_lib_z_deflateBound(NULL, source_len); + } + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/lzhamcomp/lzham_lzcomp_internal.cpp b/r5dev/thirdparty/lzham/lzhamcomp/lzham_lzcomp_internal.cpp new file mode 100644 index 00000000..55ac4149 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzhamcomp/lzham_lzcomp_internal.cpp @@ -0,0 +1,1972 @@ +// File: lzham_lzcomp_internal.cpp +// See Copyright Notice and license at the end of include/lzham.h +#include "../include/lzham_core.h" +#include "lzham_lzcomp_internal.h" +#include "../include/lzham_checksum.h" +#include "../include/lzham_timer.h" +#include "../include/lzham_lzbase.h" +#include + +// Update and print high-level coding statistics if set to 1. +// TODO: Add match distance coding statistics. +#define LZHAM_UPDATE_STATS 0 + +// Only parse on the main thread, for easier debugging. +#define LZHAM_FORCE_SINGLE_THREADED_PARSING 0 + +// Verify all computed match costs against the generic/slow state::get_cost() method. +#define LZHAM_VERIFY_MATCH_COSTS 0 + +// Set to 1 to force all blocks to be uncompressed (raw). +#define LZHAM_FORCE_ALL_RAW_BLOCKS 0 + +namespace lzham +{ + static comp_settings s_level_settings[cCompressionLevelCount] = + { + // cCompressionLevelFastest + { + 8, // m_fast_bytes + true, // m_fast_adaptive_huffman_updating + true, // m_use_polar_codes + 1, // m_match_accel_max_matches_per_probe + 2, // m_match_accel_max_probes + }, + // cCompressionLevelFaster + { + 24, // m_fast_bytes + true, // m_fast_adaptive_huffman_updating + true, // m_use_polar_codes + 6, // m_match_accel_max_matches_per_probe + 12, // m_match_accel_max_probes + }, + // cCompressionLevelDefault + { + 32, // m_fast_bytes + false, // m_fast_adaptive_huffman_updating + true, // m_use_polar_codes + UINT_MAX, // m_match_accel_max_matches_per_probe + 16, // m_match_accel_max_probes + }, + // cCompressionLevelBetter + { + 48, // m_fast_bytes + false, // m_fast_adaptive_huffman_updating + false, // m_use_polar_codes + UINT_MAX, // m_match_accel_max_matches_per_probe + 32, // m_match_accel_max_probes + }, + // cCompressionLevelUber + { + 64, // m_fast_bytes + false, // m_fast_adaptive_huffman_updating + false, // m_use_polar_codes + UINT_MAX, // m_match_accel_max_matches_per_probe + cMatchAccelMaxSupportedProbes, // m_match_accel_max_probes + } + }; + + lzcompressor::lzcompressor() : + m_src_size(-1), + m_src_adler32(0), + m_src_crc32(0), + m_step(0), + m_block_start_dict_ofs(0), + m_block_index(0), + m_finished(false), + m_num_parse_threads(0), + m_parse_jobs_remaining(0), + m_block_history_size(0), + m_block_history_next(0) + { + LZHAM_VERIFY( ((uint32_ptr)this & (LZHAM_GET_ALIGNMENT(lzcompressor) - 1)) == 0); + } + + bool lzcompressor::init_seed_bytes() + { + uint cur_seed_ofs = 0; + + while (cur_seed_ofs < m_params.m_num_seed_bytes) + { + uint total_bytes_remaining = m_params.m_num_seed_bytes - cur_seed_ofs; + uint num_bytes_to_add = math::minimum(total_bytes_remaining, m_params.m_block_size); + + if (!m_accel.add_bytes_begin(num_bytes_to_add, static_cast(m_params.m_pSeed_bytes) + cur_seed_ofs)) + return false; + m_accel.add_bytes_end(); + + m_accel.advance_bytes(num_bytes_to_add); + + cur_seed_ofs += num_bytes_to_add; + } + + return true; + } + + bool lzcompressor::init(const init_params& params) + { + clear(); + + if ((params.m_dict_size_log2 < CLZBase::cMinDictSizeLog2) || (params.m_dict_size_log2 > CLZBase::cMaxDictSizeLog2)) + return false; + if ((params.m_compression_level < 0) || (params.m_compression_level > cCompressionLevelCount)) + return false; + + m_params = params; + m_use_task_pool = (m_params.m_pTask_pool) && (m_params.m_pTask_pool->get_num_threads() != 0) && (m_params.m_max_helper_threads > 0); + if ((m_params.m_max_helper_threads) && (!m_use_task_pool)) + return false; + m_settings = s_level_settings[params.m_compression_level]; + + const uint dict_size = 1U << m_params.m_dict_size_log2; + + if (params.m_num_seed_bytes) + { + if (!params.m_pSeed_bytes) + return false; + if (params.m_num_seed_bytes > dict_size) + return false; + } + + if (m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_FORCE_POLAR_CODING) + m_settings.m_use_polar_codes = true; + + uint max_block_size = dict_size / 8; + if (m_params.m_block_size > max_block_size) + { + m_params.m_block_size = max_block_size; + } + + m_num_parse_threads = 1; + +#if !LZHAM_FORCE_SINGLE_THREADED_PARSING + if (params.m_max_helper_threads > 0) + { + LZHAM_ASSUME(cMaxParseThreads >= 4); + + if (m_params.m_block_size < 16384) + { + m_num_parse_threads = LZHAM_MIN(cMaxParseThreads, params.m_max_helper_threads + 1); + } + else + { + if ((params.m_max_helper_threads == 1) || (m_params.m_compression_level == cCompressionLevelFastest)) + { + m_num_parse_threads = 1; + } + else if (params.m_max_helper_threads <= 3) + { + m_num_parse_threads = 2; + } + else if (params.m_max_helper_threads <= 7) + { + if ((m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_EXTREME_PARSING) && (m_params.m_compression_level == cCompressionLevelUber)) + m_num_parse_threads = 4; + else + m_num_parse_threads = 2; + } + else + { + // 8-16 + m_num_parse_threads = 4; + } + } + } +#endif + + int num_parse_jobs = m_num_parse_threads - 1; + uint match_accel_helper_threads = LZHAM_MAX(0, (int)params.m_max_helper_threads - num_parse_jobs); + + LZHAM_ASSERT(m_num_parse_threads >= 1); + LZHAM_ASSERT(m_num_parse_threads <= cMaxParseThreads); + + if (!m_use_task_pool) + { + LZHAM_ASSERT(!match_accel_helper_threads && (m_num_parse_threads == 1)); + } + else + { + LZHAM_ASSERT((match_accel_helper_threads + (m_num_parse_threads - 1)) <= params.m_max_helper_threads); + } + + if (!m_accel.init(this, params.m_pTask_pool, match_accel_helper_threads, dict_size, m_settings.m_match_accel_max_matches_per_probe, false, m_settings.m_match_accel_max_probes)) + return false; + + init_position_slots(params.m_dict_size_log2); + init_slot_tabs(); + + if (!m_state.init(*this, m_settings.m_fast_adaptive_huffman_updating, m_settings.m_use_polar_codes)) + return false; + + if (!m_block_buf.try_reserve(m_params.m_block_size)) + return false; + + if (!m_comp_buf.try_reserve(m_params.m_block_size*2)) + return false; + + for (uint i = 0; i < m_num_parse_threads; i++) + { + if (!m_parse_thread_state[i].m_initial_state.init(*this, m_settings.m_fast_adaptive_huffman_updating, m_settings.m_use_polar_codes)) + return false; + } + + m_block_history_size = 0; + m_block_history_next = 0; + + if (params.m_num_seed_bytes) + { + if (!init_seed_bytes()) + return false; + } + + if (!send_zlib_header()) + return false; + + m_src_size = 0; + + return true; + } + + // See http://www.gzip.org/zlib/rfc-zlib.html + // Method is set to 14 (LZHAM) and CINFO is (window_size - 15). + bool lzcompressor::send_zlib_header() + { + if ((m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_WRITE_ZLIB_STREAM) == 0) + return true; + + // set CM (method) and CINFO (dictionary size) fields + int cmf = LZHAM_Z_LZHAM | ((m_params.m_dict_size_log2 - 15) << 4); + + // set FLEVEL by mapping LZHAM's compression level to zlib's + int flg = 0; + switch (m_params.m_compression_level) + { + case LZHAM_COMP_LEVEL_FASTEST: + { + flg = 0 << 6; + break; + } + case LZHAM_COMP_LEVEL_FASTER: + { + flg = 1 << 6; + break; + } + case LZHAM_COMP_LEVEL_DEFAULT: + case LZHAM_COMP_LEVEL_BETTER: + { + flg = 2 << 6; + break; + } + default: + { + flg = 3 << 6; + break; + } + } + + // set FDICT flag + if (m_params.m_pSeed_bytes) + flg |= 32; + + int check = ((cmf << 8) + flg) % 31; + if (check) + flg += (31 - check); + + LZHAM_ASSERT(0 == (((cmf << 8) + flg) % 31)); + if (!m_comp_buf.try_push_back(static_cast(cmf))) + return false; + if (!m_comp_buf.try_push_back(static_cast(flg))) + return false; + + if (m_params.m_pSeed_bytes) + { + // send adler32 of DICT + uint dict_adler32 = adler32(m_params.m_pSeed_bytes, m_params.m_num_seed_bytes); + for (uint i = 0; i < 4; i++) + { + if (!m_comp_buf.try_push_back(static_cast(dict_adler32 >> 24))) + return false; + dict_adler32 <<= 8; + } + } + + return true; + } + + void lzcompressor::clear() + { + m_codec.clear(); + m_src_size = -1; + m_src_adler32 = cInitAdler32; + m_src_crc32 = cInitCRC32; + m_block_buf.clear(); + m_comp_buf.clear(); + + m_step = 0; + m_finished = false; + m_use_task_pool = false; + m_block_start_dict_ofs = 0; + m_block_index = 0; + m_state.clear(); + m_num_parse_threads = 0; + m_parse_jobs_remaining = 0; + + for (uint i = 0; i < cMaxParseThreads; i++) + { + parse_thread_state &parse_state = m_parse_thread_state[i]; + parse_state.m_initial_state.clear(); + + for (uint j = 0; j <= cMaxParseGraphNodes; j++) + parse_state.m_nodes[j].clear(); + + parse_state.m_start_ofs = 0; + parse_state.m_bytes_to_match = 0; + parse_state.m_best_decisions.clear(); + parse_state.m_issue_reset_state_partial = false; + parse_state.m_emit_decisions_backwards = false; + parse_state.m_failed = false; + } + + m_block_history_size = 0; + m_block_history_next = 0; + } + + bool lzcompressor::reset() + { + if (m_src_size < 0) + return false; + + m_accel.reset(); + m_codec.reset(); + m_stats.clear(); + m_src_size = 0; + m_src_adler32 = cInitAdler32; + m_src_crc32 = cInitCRC32; + m_block_buf.try_resize(0); + m_comp_buf.try_resize(0); + + m_step = 0; + m_finished = false; + m_block_start_dict_ofs = 0; + m_block_index = 0; + m_state.reset(); + + m_block_history_size = 0; + m_block_history_next = 0; + + if (m_params.m_num_seed_bytes) + { + if (!init_seed_bytes()) + return false; + } + + return send_zlib_header(); + } + + bool lzcompressor::code_decision(lzdecision lzdec, uint& cur_ofs, uint& bytes_to_match) + { +#ifdef LZHAM_LZDEBUG + if (!m_codec.encode_bits(CLZBase::cLZHAMDebugSyncMarkerValue, CLZBase::cLZHAMDebugSyncMarkerBits)) return false; + if (!m_codec.encode_bits(lzdec.is_match(), 1)) return false; + if (!m_codec.encode_bits(lzdec.get_len(), 17)) return false; + if (!m_codec.encode_bits(m_state.m_cur_state, 4)) return false; +#endif + +#ifdef LZHAM_LZVERIFY + if (lzdec.is_match()) + { + uint match_dist = lzdec.get_match_dist(m_state); + + LZHAM_VERIFY(m_accel[cur_ofs] == m_accel[(cur_ofs - match_dist) & (m_accel.get_max_dict_size() - 1)]); + } +#endif + + const uint len = lzdec.get_len(); + + if (!m_state.encode(m_codec, *this, m_accel, lzdec)) + return false; + + cur_ofs += len; + LZHAM_ASSERT(bytes_to_match >= len); + bytes_to_match -= len; + + m_accel.advance_bytes(len); + + m_step++; + + return true; + } + + bool lzcompressor::send_sync_block(lzham_flush_t flush_type) + { + m_codec.reset(); + + if (!m_codec.start_encoding(128)) + return false; +#ifdef LZHAM_LZDEBUG + if (!m_codec.encode_bits(166, 12)) + return false; +#endif + if (!m_codec.encode_bits(cSyncBlock, cBlockHeaderBits)) + return false; + + int flush_code = 0; + switch (flush_type) + { + case LZHAM_FULL_FLUSH: + flush_code = 2; + break; + case LZHAM_TABLE_FLUSH: + flush_code = 1; + break; + case LZHAM_SYNC_FLUSH: + case LZHAM_NO_FLUSH: + case LZHAM_FINISH: + flush_code = 0; + break; + } + if (!m_codec.encode_bits(flush_code, cBlockFlushTypeBits)) + return false; + + if (!m_codec.encode_align_to_byte()) + return false; + if (!m_codec.encode_bits(0x0000, 16)) + return false; + if (!m_codec.encode_bits(0xFFFF, 16)) + return false; + if (!m_codec.stop_encoding(true)) + return false; + if (!m_comp_buf.append(m_codec.get_encoding_buf())) + return false; + + m_block_index++; + return true; + } + + bool lzcompressor::flush(lzham_flush_t flush_type) + { + LZHAM_ASSERT(!m_finished); + if (m_finished) + return false; + + bool status = true; + if (m_block_buf.size()) + { + status = compress_block(m_block_buf.get_ptr(), m_block_buf.size()); + + m_block_buf.try_resize(0); + } + + if (status) + { + status = send_sync_block(flush_type); + + if (LZHAM_FULL_FLUSH == flush_type) + { + m_accel.flush(); + m_state.reset(); + } + } + + lzham_flush_buffered_printf(); + + return status; + } + + bool lzcompressor::put_bytes(const void* pBuf, uint buf_len) + { + LZHAM_ASSERT(!m_finished); + if (m_finished) + return false; + + bool status = true; + + if (!pBuf) + { + // Last block - flush whatever's left and send the final block. + if (m_block_buf.size()) + { + status = compress_block(m_block_buf.get_ptr(), m_block_buf.size()); + + m_block_buf.try_resize(0); + } + + if (status) + { + if (!send_final_block()) + { + status = false; + } + } + + m_finished = true; + } + else + { + // Compress blocks. + const uint8 *pSrcBuf = static_cast(pBuf); + uint num_src_bytes_remaining = buf_len; + + while (num_src_bytes_remaining) + { + const uint num_bytes_to_copy = LZHAM_MIN(num_src_bytes_remaining, m_params.m_block_size - m_block_buf.size()); + + if (num_bytes_to_copy == m_params.m_block_size) + { + LZHAM_ASSERT(!m_block_buf.size()); + + // Full-block available - compress in-place. + status = compress_block(pSrcBuf, num_bytes_to_copy); + } + else + { + // Less than a full block available - append to already accumulated bytes. + if (!m_block_buf.append(static_cast(pSrcBuf), num_bytes_to_copy)) + return false; + + LZHAM_ASSERT(m_block_buf.size() <= m_params.m_block_size); + + if (m_block_buf.size() == m_params.m_block_size) + { + status = compress_block(m_block_buf.get_ptr(), m_block_buf.size()); + + m_block_buf.try_resize(0); + } + } + + if (!status) + return false; + + pSrcBuf += num_bytes_to_copy; + num_src_bytes_remaining -= num_bytes_to_copy; + } + } + + lzham_flush_buffered_printf(); + + return status; + } + + bool lzcompressor::send_final_block() + { + if (!m_codec.start_encoding(16)) + return false; + +#ifdef LZHAM_LZDEBUG + if (!m_codec.encode_bits(166, 12)) + return false; +#endif + + if (!m_block_index) + { + if (!send_configuration()) + return false; + } + + if (!m_codec.encode_bits(cEOFBlock, cBlockHeaderBits)) + return false; + + if (!m_codec.encode_align_to_byte()) + return false; + + if (!m_codec.encode_bits(m_src_adler32, 32)) + return false; + + if (!m_codec.encode_bits(m_src_crc32, 32)) + return false; + + if (!m_codec.stop_encoding(true)) + return false; + + if (m_comp_buf.empty()) + { + m_comp_buf.swap(m_codec.get_encoding_buf()); + } + else + { + if (!m_comp_buf.append(m_codec.get_encoding_buf())) + return false; + } + + m_block_index++; + +#if LZHAM_UPDATE_STATS + m_stats.print(); +#endif + + return true; + } + + bool lzcompressor::send_configuration() + { + if (!m_codec.encode_bits(m_settings.m_fast_adaptive_huffman_updating, 1)) + return false; + if (!m_codec.encode_bits(m_settings.m_use_polar_codes, 1)) + return false; + + return true; + } + + void lzcompressor::node::add_state( + int parent_index, int parent_state_index, + const lzdecision &lzdec, state &parent_state, + bit_cost_t total_cost, + uint total_complexity) + { + state_base trial_state; + parent_state.save_partial_state(trial_state); + trial_state.partial_advance(lzdec); + + for (int i = m_num_node_states - 1; i >= 0; i--) + { + node_state &cur_node_state = m_node_states[i]; + if (cur_node_state.m_saved_state == trial_state) + { + if ( (total_cost < cur_node_state.m_total_cost) || + ((total_cost == cur_node_state.m_total_cost) && (total_complexity < cur_node_state.m_total_complexity)) ) + { + cur_node_state.m_parent_index = static_cast(parent_index); + cur_node_state.m_parent_state_index = static_cast(parent_state_index); + cur_node_state.m_lzdec = lzdec; + cur_node_state.m_total_cost = total_cost; + cur_node_state.m_total_complexity = total_complexity; + + while (i > 0) + { + if ((m_node_states[i].m_total_cost < m_node_states[i - 1].m_total_cost) || + ((m_node_states[i].m_total_cost == m_node_states[i - 1].m_total_cost) && (m_node_states[i].m_total_complexity < m_node_states[i - 1].m_total_complexity))) + { + std::swap(m_node_states[i], m_node_states[i - 1]); + i--; + } + else + break; + } + } + + return; + } + } + + int insert_index; + for (insert_index = m_num_node_states; insert_index > 0; insert_index--) + { + node_state &cur_node_state = m_node_states[insert_index - 1]; + + if ( (total_cost > cur_node_state.m_total_cost) || + ((total_cost == cur_node_state.m_total_cost) && (total_complexity >= cur_node_state.m_total_complexity)) ) + { + break; + } + } + + if (insert_index == cMaxNodeStates) + return; + + uint num_behind = m_num_node_states - insert_index; + uint num_to_move = (m_num_node_states < cMaxNodeStates) ? num_behind : (num_behind - 1); + if (num_to_move) + { + LZHAM_ASSERT((insert_index + 1 + num_to_move) <= cMaxNodeStates); + memmove( &m_node_states[insert_index + 1], &m_node_states[insert_index], sizeof(node_state) * num_to_move); + } + + node_state *pNew_node_state = &m_node_states[insert_index]; + pNew_node_state->m_parent_index = static_cast(parent_index); + pNew_node_state->m_parent_state_index = static_cast(parent_state_index); + pNew_node_state->m_lzdec = lzdec; + pNew_node_state->m_total_cost = total_cost; + pNew_node_state->m_total_complexity = total_complexity; + pNew_node_state->m_saved_state = trial_state; + + m_num_node_states = LZHAM_MIN(m_num_node_states + 1, static_cast(cMaxNodeStates)); + +#ifdef LZHAM_LZVERIFY + for (uint i = 0; i < (m_num_node_states - 1); ++i) + { + node_state &a = m_node_states[i]; + node_state &b = m_node_states[i + 1]; + LZHAM_VERIFY( + (a.m_total_cost < b.m_total_cost) || + ((a.m_total_cost == b.m_total_cost) && (a.m_total_complexity <= b.m_total_complexity)) ); + } +#endif + } + + // The "extreme" parser tracks the best node::cMaxNodeStates (4) candidate LZ decisions per lookahead character. + // This allows the compressor to make locally suboptimal decisions that ultimately result in a better parse. + // It assumes the input statistics are locally stationary over the input block to parse. + bool lzcompressor::extreme_parse(parse_thread_state &parse_state) + { + LZHAM_ASSERT(parse_state.m_bytes_to_match <= cMaxParseGraphNodes); + + parse_state.m_failed = false; + parse_state.m_emit_decisions_backwards = true; + + node *pNodes = parse_state.m_nodes; + for (uint i = 0; i <= cMaxParseGraphNodes; i++) + { + pNodes[i].clear(); + } + + state &approx_state = parse_state.m_initial_state; + + pNodes[0].m_num_node_states = 1; + node_state &first_node_state = pNodes[0].m_node_states[0]; + approx_state.save_partial_state(first_node_state.m_saved_state); + first_node_state.m_parent_index = -1; + first_node_state.m_parent_state_index = -1; + first_node_state.m_total_cost = 0; + first_node_state.m_total_complexity = 0; + + const uint bytes_to_parse = parse_state.m_bytes_to_match; + + const uint lookahead_start_ofs = m_accel.get_lookahead_pos() & m_accel.get_max_dict_size_mask(); + + uint cur_dict_ofs = parse_state.m_start_ofs; + uint cur_lookahead_ofs = cur_dict_ofs - lookahead_start_ofs; + uint cur_node_index = 0; + + enum { cMaxFullMatches = cMatchAccelMaxSupportedProbes }; + uint match_lens[cMaxFullMatches]; + uint match_distances[cMaxFullMatches]; + + bit_cost_t lzdec_bitcosts[cMaxMatchLen + 1]; + + node prev_lit_node; + prev_lit_node.clear(); + + while (cur_node_index < bytes_to_parse) + { + node* pCur_node = &pNodes[cur_node_index]; + + const uint max_admissable_match_len = LZHAM_MIN(static_cast(CLZBase::cMaxMatchLen), bytes_to_parse - cur_node_index); + const uint find_dict_size = m_accel.get_cur_dict_size() + cur_lookahead_ofs; + + const uint lit_pred0 = approx_state.get_pred_char(m_accel, cur_dict_ofs, 1); + + const uint8* pLookahead = &m_accel.m_dict[cur_dict_ofs]; + + // full matches + uint max_full_match_len = 0; + uint num_full_matches = 0; + uint len2_match_dist = 0; + + if (max_admissable_match_len >= CLZBase::cMinMatchLen) + { + const dict_match* pMatches = m_accel.find_matches(cur_lookahead_ofs); + if (pMatches) + { + for ( ; ; ) + { + uint match_len = pMatches->get_len(); + LZHAM_ASSERT((pMatches->get_dist() > 0) && (pMatches->get_dist() <= m_dict_size)); + match_len = LZHAM_MIN(match_len, max_admissable_match_len); + + if (match_len > max_full_match_len) + { + max_full_match_len = match_len; + + match_lens[num_full_matches] = match_len; + match_distances[num_full_matches] = pMatches->get_dist(); + num_full_matches++; + } + + if (pMatches->is_last()) + break; + pMatches++; + } + } + + len2_match_dist = m_accel.get_len2_match(cur_lookahead_ofs); + } + + for (uint cur_node_state_index = 0; cur_node_state_index < pCur_node->m_num_node_states; cur_node_state_index++) + { + node_state &cur_node_state = pCur_node->m_node_states[cur_node_state_index]; + + if (cur_node_index) + { + LZHAM_ASSERT(cur_node_state.m_parent_index >= 0); + + approx_state.restore_partial_state(cur_node_state.m_saved_state); + } + + uint is_match_model_index = LZHAM_IS_MATCH_MODEL_INDEX(lit_pred0, approx_state.m_cur_state); + + const bit_cost_t cur_node_total_cost = cur_node_state.m_total_cost; + const uint cur_node_total_complexity = cur_node_state.m_total_complexity; + + // rep matches + uint match_hist_max_len = 0; + uint match_hist_min_match_len = 1; + for (uint rep_match_index = 0; rep_match_index < cMatchHistSize; rep_match_index++) + { + uint hist_match_len = 0; + + uint dist = approx_state.m_match_hist[rep_match_index]; + if (dist <= find_dict_size) + { + const uint comp_pos = static_cast((m_accel.m_lookahead_pos + cur_lookahead_ofs - dist) & m_accel.m_max_dict_size_mask); + const uint8* pComp = &m_accel.m_dict[comp_pos]; + + for (hist_match_len = 0; hist_match_len < max_admissable_match_len; hist_match_len++) + if (pComp[hist_match_len] != pLookahead[hist_match_len]) + break; + } + + if (hist_match_len >= match_hist_min_match_len) + { + match_hist_max_len = math::maximum(match_hist_max_len, hist_match_len); + + approx_state.get_rep_match_costs(cur_dict_ofs, lzdec_bitcosts, rep_match_index, match_hist_min_match_len, hist_match_len, is_match_model_index); + + uint rep_match_total_complexity = cur_node_total_complexity + (cRep0Complexity + rep_match_index); + for (uint l = match_hist_min_match_len; l <= hist_match_len; l++) + { +#if LZHAM_VERIFY_MATCH_COSTS + { + lzdecision actual_dec(cur_dict_ofs, l, -((int)rep_match_index + 1)); + bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec); + LZHAM_ASSERT(actual_cost == lzdec_bitcosts[l]); + } +#endif + node& dst_node = pCur_node[l]; + + bit_cost_t rep_match_total_cost = cur_node_total_cost + lzdec_bitcosts[l]; + + dst_node.add_state(cur_node_index, cur_node_state_index, lzdecision(cur_dict_ofs, l, -((int)rep_match_index + 1)), approx_state, rep_match_total_cost, rep_match_total_complexity); + } + } + + match_hist_min_match_len = CLZBase::cMinMatchLen; + } + + uint min_truncate_match_len = match_hist_max_len; + + // nearest len2 match + if (len2_match_dist) + { + lzdecision lzdec(cur_dict_ofs, 2, len2_match_dist); + bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, lzdec); + pCur_node[2].add_state(cur_node_index, cur_node_state_index, lzdec, approx_state, cur_node_total_cost + actual_cost, cur_node_total_complexity + cShortMatchComplexity); + + min_truncate_match_len = LZHAM_MAX(min_truncate_match_len, 2); + } + + // full matches + if (max_full_match_len > min_truncate_match_len) + { + uint prev_max_match_len = LZHAM_MAX(1, min_truncate_match_len); + for (uint full_match_index = 0; full_match_index < num_full_matches; full_match_index++) + { + uint end_len = match_lens[full_match_index]; + if (end_len <= min_truncate_match_len) + continue; + + uint start_len = prev_max_match_len + 1; + uint match_dist = match_distances[full_match_index]; + + LZHAM_ASSERT(start_len <= end_len); + + approx_state.get_full_match_costs(*this, cur_dict_ofs, lzdec_bitcosts, match_dist, start_len, end_len, is_match_model_index); + + for (uint l = start_len; l <= end_len; l++) + { + uint match_complexity = (l >= cLongMatchComplexityLenThresh) ? cLongMatchComplexity : cShortMatchComplexity; + +#if LZHAM_VERIFY_MATCH_COSTS + { + lzdecision actual_dec(cur_dict_ofs, l, match_dist); + bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec); + LZHAM_ASSERT(actual_cost == lzdec_bitcosts[l]); + } +#endif + node& dst_node = pCur_node[l]; + + bit_cost_t match_total_cost = cur_node_total_cost + lzdec_bitcosts[l]; + uint match_total_complexity = cur_node_total_complexity + match_complexity; + + dst_node.add_state( cur_node_index, cur_node_state_index, lzdecision(cur_dict_ofs, l, match_dist), approx_state, match_total_cost, match_total_complexity); + } + + prev_max_match_len = end_len; + } + } + + // literal + bit_cost_t lit_cost = approx_state.get_lit_cost(m_accel, cur_dict_ofs, lit_pred0, is_match_model_index); + bit_cost_t lit_total_cost = cur_node_total_cost + lit_cost; + uint lit_total_complexity = cur_node_total_complexity + cLitComplexity; +#if LZHAM_VERIFY_MATCH_COSTS + { + lzdecision actual_dec(cur_dict_ofs, 0, 0); + bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec); + LZHAM_ASSERT(actual_cost == lit_cost); + } +#endif + + pCur_node[1].add_state( cur_node_index, cur_node_state_index, lzdecision(cur_dict_ofs, 0, 0), approx_state, lit_total_cost, lit_total_complexity); + + } // cur_node_state_index + + cur_dict_ofs++; + cur_lookahead_ofs++; + cur_node_index++; + } + + // Now get the optimal decisions by starting from the goal node. + // m_best_decisions is filled backwards. + if (!parse_state.m_best_decisions.try_reserve(bytes_to_parse)) + { + parse_state.m_failed = true; + return false; + } + + bit_cost_t lowest_final_cost = cBitCostMax; //math::cNearlyInfinite; + int node_state_index = 0; + node_state *pLast_node_states = pNodes[bytes_to_parse].m_node_states; + for (uint i = 0; i < pNodes[bytes_to_parse].m_num_node_states; i++) + { + if (pLast_node_states[i].m_total_cost < lowest_final_cost) + { + lowest_final_cost = pLast_node_states[i].m_total_cost; + node_state_index = i; + } + } + + int node_index = bytes_to_parse; + lzdecision *pDst_dec = parse_state.m_best_decisions.get_ptr(); + do + { + LZHAM_ASSERT((node_index >= 0) && (node_index <= (int)cMaxParseGraphNodes)); + + node& cur_node = pNodes[node_index]; + const node_state &cur_node_state = cur_node.m_node_states[node_state_index]; + + *pDst_dec++ = cur_node_state.m_lzdec; + + node_index = cur_node_state.m_parent_index; + node_state_index = cur_node_state.m_parent_state_index; + + } while (node_index > 0); + + parse_state.m_best_decisions.try_resize(static_cast(pDst_dec - parse_state.m_best_decisions.get_ptr())); + + return true; + } + + // Parsing notes: + // The regular "optimal" parser only tracks the single cheapest candidate LZ decision per lookahead character. + // This function finds the shortest path through an extremely dense node graph using a streamlined/simplified Dijkstra's algorithm with some coding heuristics. + // Graph edges are LZ "decisions", cost is measured in fractional bits needed to code each graph edge, and graph nodes are lookahead characters. + // There is no need to track visited/unvisted nodes, or find the next cheapest unvisted node in each iteration. The search always proceeds sequentially, visiting each lookahead character in turn from left/right. + // The major CPU expense of this function is the complexity of LZ decision cost evaluation, so a lot of implementation effort is spent here reducing this overhead. + // To simplify the problem, it assumes the input statistics are locally stationary over the input block to parse. (Otherwise, it would need to store, track, and update + // unique symbol statistics for each lookahead character, which would be very costly.) + // This function always sequentially pushes "forward" the unvisited node horizon. This horizon frequently collapses to a single node, which guarantees that the shortest path through the + // graph must pass through this node. LZMA tracks cumulative bitprices relative to this node, while LZHAM currently always tracks cumulative bitprices relative to the first node in the lookahead buffer. + // In very early versions of LZHAM the parse was much more understandable (straight Dijkstra with almost no bit price optimizations or coding heuristics). + bool lzcompressor::optimal_parse(parse_thread_state &parse_state) + { + LZHAM_ASSERT(parse_state.m_bytes_to_match <= cMaxParseGraphNodes); + + parse_state.m_failed = false; + parse_state.m_emit_decisions_backwards = true; + + node_state *pNodes = reinterpret_cast(parse_state.m_nodes); + pNodes[0].m_parent_index = -1; + pNodes[0].m_total_cost = 0; + pNodes[0].m_total_complexity = 0; + +#if 0 + for (uint i = 1; i <= cMaxParseGraphNodes; i++) + { + pNodes[i].clear(); + } +#else + memset( &pNodes[1], 0xFF, cMaxParseGraphNodes * sizeof(node_state)); +#endif + + state &approx_state = parse_state.m_initial_state; + + const uint bytes_to_parse = parse_state.m_bytes_to_match; + + const uint lookahead_start_ofs = m_accel.get_lookahead_pos() & m_accel.get_max_dict_size_mask(); + + uint cur_dict_ofs = parse_state.m_start_ofs; + uint cur_lookahead_ofs = cur_dict_ofs - lookahead_start_ofs; + uint cur_node_index = 0; + + enum { cMaxFullMatches = cMatchAccelMaxSupportedProbes }; + uint match_lens[cMaxFullMatches]; + uint match_distances[cMaxFullMatches]; + + bit_cost_t lzdec_bitcosts[cMaxMatchLen + 1]; + + while (cur_node_index < bytes_to_parse) + { + node_state* pCur_node = &pNodes[cur_node_index]; + + const uint max_admissable_match_len = LZHAM_MIN(static_cast(CLZBase::cMaxMatchLen), bytes_to_parse - cur_node_index); + const uint find_dict_size = m_accel.m_cur_dict_size + cur_lookahead_ofs; + + if (cur_node_index) + { + LZHAM_ASSERT(pCur_node->m_parent_index >= 0); + + // Move to this node's state using the lowest cost LZ decision found. + approx_state.restore_partial_state(pCur_node->m_saved_state); + approx_state.partial_advance(pCur_node->m_lzdec); + } + + const bit_cost_t cur_node_total_cost = pCur_node->m_total_cost; + // This assert includes a fudge factor - make sure we don't overflow our scaled costs. + LZHAM_ASSERT((cBitCostMax - cur_node_total_cost) > (cBitCostScale * 64)); + const uint cur_node_total_complexity = pCur_node->m_total_complexity; + + const uint lit_pred0 = approx_state.get_pred_char(m_accel, cur_dict_ofs, 1); + uint is_match_model_index = LZHAM_IS_MATCH_MODEL_INDEX(lit_pred0, approx_state.m_cur_state); + + const uint8* pLookahead = &m_accel.m_dict[cur_dict_ofs]; + + // rep matches + uint match_hist_max_len = 0; + uint match_hist_min_match_len = 1; + for (uint rep_match_index = 0; rep_match_index < cMatchHistSize; rep_match_index++) + { + uint hist_match_len = 0; + + uint dist = approx_state.m_match_hist[rep_match_index]; + if (dist <= find_dict_size) + { + const uint comp_pos = static_cast((m_accel.m_lookahead_pos + cur_lookahead_ofs - dist) & m_accel.m_max_dict_size_mask); + const uint8* pComp = &m_accel.m_dict[comp_pos]; + + for (hist_match_len = 0; hist_match_len < max_admissable_match_len; hist_match_len++) + if (pComp[hist_match_len] != pLookahead[hist_match_len]) + break; + } + + if (hist_match_len >= match_hist_min_match_len) + { + match_hist_max_len = math::maximum(match_hist_max_len, hist_match_len); + + approx_state.get_rep_match_costs(cur_dict_ofs, lzdec_bitcosts, rep_match_index, match_hist_min_match_len, hist_match_len, is_match_model_index); + + uint rep_match_total_complexity = cur_node_total_complexity + (cRep0Complexity + rep_match_index); + for (uint l = match_hist_min_match_len; l <= hist_match_len; l++) + { +#if LZHAM_VERIFY_MATCH_COSTS + { + lzdecision actual_dec(cur_dict_ofs, l, -((int)rep_match_index + 1)); + bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec); + LZHAM_ASSERT(actual_cost == lzdec_bitcosts[l]); + } +#endif + node_state& dst_node = pCur_node[l]; + + bit_cost_t rep_match_total_cost = cur_node_total_cost + lzdec_bitcosts[l]; + + if ((rep_match_total_cost > dst_node.m_total_cost) || ((rep_match_total_cost == dst_node.m_total_cost) && (rep_match_total_complexity >= dst_node.m_total_complexity))) + continue; + + dst_node.m_total_cost = rep_match_total_cost; + dst_node.m_total_complexity = rep_match_total_complexity; + dst_node.m_parent_index = (uint16)cur_node_index; + approx_state.save_partial_state(dst_node.m_saved_state); + dst_node.m_lzdec.init(cur_dict_ofs, l, -((int)rep_match_index + 1)); + dst_node.m_lzdec.m_len = l; + } + } + + match_hist_min_match_len = CLZBase::cMinMatchLen; + } + + uint max_match_len = match_hist_max_len; + + if (max_match_len >= m_settings.m_fast_bytes) + { + cur_dict_ofs += max_match_len; + cur_lookahead_ofs += max_match_len; + cur_node_index += max_match_len; + continue; + } + + // full matches + if (max_admissable_match_len >= CLZBase::cMinMatchLen) + { + uint num_full_matches = 0; + + if (match_hist_max_len < 2) + { + // Get the nearest len2 match if we didn't find a rep len2. + uint len2_match_dist = m_accel.get_len2_match(cur_lookahead_ofs); + if (len2_match_dist) + { + bit_cost_t cost = approx_state.get_len2_match_cost(*this, cur_dict_ofs, len2_match_dist, is_match_model_index); + +#if LZHAM_VERIFY_MATCH_COSTS + { + lzdecision actual_dec(cur_dict_ofs, 2, len2_match_dist); + bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec); + LZHAM_ASSERT(actual_cost == cost); + } +#endif + + node_state& dst_node = pCur_node[2]; + + bit_cost_t match_total_cost = cur_node_total_cost + cost; + uint match_total_complexity = cur_node_total_complexity + cShortMatchComplexity; + + if ((match_total_cost < dst_node.m_total_cost) || ((match_total_cost == dst_node.m_total_cost) && (match_total_complexity < dst_node.m_total_complexity))) + { + dst_node.m_total_cost = match_total_cost; + dst_node.m_total_complexity = match_total_complexity; + dst_node.m_parent_index = (uint16)cur_node_index; + approx_state.save_partial_state(dst_node.m_saved_state); + dst_node.m_lzdec.init(cur_dict_ofs, 2, len2_match_dist); + } + + max_match_len = 2; + } + } + + const uint min_truncate_match_len = max_match_len; + + // Now get all full matches: the nearest matches at each match length. (Actually, we don't + // always get the nearest match. The match finder favors those matches which have the lowest value + // in the nibble of each match distance, all other things being equal, to help exploit how the lowest + // nibble of match distances is separately coded.) + const dict_match* pMatches = m_accel.find_matches(cur_lookahead_ofs); + if (pMatches) + { + for ( ; ; ) + { + uint match_len = pMatches->get_len(); + LZHAM_ASSERT((pMatches->get_dist() > 0) && (pMatches->get_dist() <= m_dict_size)); + match_len = LZHAM_MIN(match_len, max_admissable_match_len); + + if (match_len > max_match_len) + { + max_match_len = match_len; + + match_lens[num_full_matches] = match_len; + match_distances[num_full_matches] = pMatches->get_dist(); + num_full_matches++; + } + + if (pMatches->is_last()) + break; + pMatches++; + } + } + + if (num_full_matches) + { + uint prev_max_match_len = LZHAM_MAX(1, min_truncate_match_len); + for (uint full_match_index = 0; full_match_index < num_full_matches; full_match_index++) + { + uint start_len = prev_max_match_len + 1; + uint end_len = match_lens[full_match_index]; + uint match_dist = match_distances[full_match_index]; + + LZHAM_ASSERT(start_len <= end_len); + + approx_state.get_full_match_costs(*this, cur_dict_ofs, lzdec_bitcosts, match_dist, start_len, end_len, is_match_model_index); + + for (uint l = start_len; l <= end_len; l++) + { + uint match_complexity = (l >= cLongMatchComplexityLenThresh) ? cLongMatchComplexity : cShortMatchComplexity; + +#if LZHAM_VERIFY_MATCH_COSTS + { + lzdecision actual_dec(cur_dict_ofs, l, match_dist); + bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec); + LZHAM_ASSERT(actual_cost == lzdec_bitcosts[l]); + } +#endif + node_state& dst_node = pCur_node[l]; + + bit_cost_t match_total_cost = cur_node_total_cost + lzdec_bitcosts[l]; + uint match_total_complexity = cur_node_total_complexity + match_complexity; + + if ((match_total_cost > dst_node.m_total_cost) || ((match_total_cost == dst_node.m_total_cost) && (match_total_complexity >= dst_node.m_total_complexity))) + continue; + + dst_node.m_total_cost = match_total_cost; + dst_node.m_total_complexity = match_total_complexity; + dst_node.m_parent_index = (uint16)cur_node_index; + approx_state.save_partial_state(dst_node.m_saved_state); + dst_node.m_lzdec.init(cur_dict_ofs, l, match_dist); + } + + prev_max_match_len = end_len; + } + } + } + + if (max_match_len >= m_settings.m_fast_bytes) + { + cur_dict_ofs += max_match_len; + cur_lookahead_ofs += max_match_len; + cur_node_index += max_match_len; + continue; + } + + // literal + bit_cost_t lit_cost = approx_state.get_lit_cost(m_accel, cur_dict_ofs, lit_pred0, is_match_model_index); + bit_cost_t lit_total_cost = cur_node_total_cost + lit_cost; + uint lit_total_complexity = cur_node_total_complexity + cLitComplexity; +#if LZHAM_VERIFY_MATCH_COSTS + { + lzdecision actual_dec(cur_dict_ofs, 0, 0); + bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec); + LZHAM_ASSERT(actual_cost == lit_cost); + } +#endif + if ((lit_total_cost < pCur_node[1].m_total_cost) || ((lit_total_cost == pCur_node[1].m_total_cost) && (lit_total_complexity < pCur_node[1].m_total_complexity))) + { + pCur_node[1].m_total_cost = lit_total_cost; + pCur_node[1].m_total_complexity = lit_total_complexity; + pCur_node[1].m_parent_index = (int16)cur_node_index; + approx_state.save_partial_state(pCur_node[1].m_saved_state); + pCur_node[1].m_lzdec.init(cur_dict_ofs, 0, 0); + } + + cur_dict_ofs++; + cur_lookahead_ofs++; + cur_node_index++; + + } // graph search + + // Now get the optimal decisions by starting from the goal node. + // m_best_decisions is filled backwards. + if (!parse_state.m_best_decisions.try_reserve(bytes_to_parse)) + { + parse_state.m_failed = true; + return false; + } + + int node_index = bytes_to_parse; + lzdecision *pDst_dec = parse_state.m_best_decisions.get_ptr(); + do + { + LZHAM_ASSERT((node_index >= 0) && (node_index <= (int)cMaxParseGraphNodes)); + node_state& cur_node = pNodes[node_index]; + + *pDst_dec++ = cur_node.m_lzdec; + + node_index = cur_node.m_parent_index; + + } while (node_index > 0); + + parse_state.m_best_decisions.try_resize(static_cast(pDst_dec - parse_state.m_best_decisions.get_ptr())); + + return true; + } + + void lzcompressor::parse_job_callback(uint64 data, void* pData_ptr) + { + const uint parse_job_index = (uint)data; + scoped_perf_section parse_job_timer(cVarArgs, "parse_job_callback %u", parse_job_index); + + (void)pData_ptr; + + parse_thread_state &parse_state = m_parse_thread_state[parse_job_index]; + + if ((m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_EXTREME_PARSING) && (m_params.m_compression_level == cCompressionLevelUber)) + extreme_parse(parse_state); + else + optimal_parse(parse_state); + + LZHAM_MEMORY_EXPORT_BARRIER + + if (atomic_decrement32(&m_parse_jobs_remaining) == 0) + { + m_parse_jobs_complete.release(); + } + } + + // ofs is the absolute dictionary offset, must be >= the lookahead offset. + // TODO: Doesn't find len2 matches + int lzcompressor::enumerate_lz_decisions(uint ofs, const state& cur_state, lzham::vector& decisions, uint min_match_len, uint max_match_len) + { + LZHAM_ASSERT(min_match_len >= 1); + + uint start_ofs = m_accel.get_lookahead_pos() & m_accel.get_max_dict_size_mask(); + LZHAM_ASSERT(ofs >= start_ofs); + const uint lookahead_ofs = ofs - start_ofs; + + uint largest_index = 0; + uint largest_len; + bit_cost_t largest_cost; + + if (min_match_len <= 1) + { + if (!decisions.try_resize(1)) + return -1; + + lzpriced_decision& lit_dec = decisions[0]; + lit_dec.init(ofs, 0, 0, 0); + lit_dec.m_cost = cur_state.get_cost(*this, m_accel, lit_dec); + largest_cost = lit_dec.m_cost; + + largest_len = 1; + } + else + { + if (!decisions.try_resize(0)) + return -1; + + largest_len = 0; + largest_cost = cBitCostMax; + } + + uint match_hist_max_len = 0; + + // Add rep matches. + for (uint i = 0; i < cMatchHistSize; i++) + { + uint hist_match_len = m_accel.get_match_len(lookahead_ofs, cur_state.m_match_hist[i], max_match_len); + if (hist_match_len < min_match_len) + continue; + + if ( ((hist_match_len == 1) && (i == 0)) || (hist_match_len >= CLZBase::cMinMatchLen) ) + { + match_hist_max_len = math::maximum(match_hist_max_len, hist_match_len); + + lzpriced_decision dec(ofs, hist_match_len, -((int)i + 1)); + dec.m_cost = cur_state.get_cost(*this, m_accel, dec); + + if (!decisions.try_push_back(dec)) + return -1; + + if ( (hist_match_len > largest_len) || ((hist_match_len == largest_len) && (dec.m_cost < largest_cost)) ) + { + largest_index = decisions.size() - 1; + largest_len = hist_match_len; + largest_cost = dec.m_cost; + } + } + } + + // Now add full matches. + if ((max_match_len >= CLZBase::cMinMatchLen) && (match_hist_max_len < m_settings.m_fast_bytes)) + { + const dict_match* pMatches = m_accel.find_matches(lookahead_ofs); + + if (pMatches) + { + for ( ; ; ) + { + uint match_len = math::minimum(pMatches->get_len(), max_match_len); + LZHAM_ASSERT((pMatches->get_dist() > 0) && (pMatches->get_dist() <= m_dict_size)); + + // Full matches are very likely to be more expensive than rep matches of the same length, so don't bother evaluating them. + if ((match_len >= min_match_len) && (match_len > match_hist_max_len)) + { + if ((max_match_len > CLZBase::cMaxMatchLen) && (match_len == CLZBase::cMaxMatchLen)) + { + match_len = m_accel.get_match_len(lookahead_ofs, pMatches->get_dist(), max_match_len, CLZBase::cMaxMatchLen); + } + + lzpriced_decision dec(ofs, match_len, pMatches->get_dist()); + dec.m_cost = cur_state.get_cost(*this, m_accel, dec); + + if (!decisions.try_push_back(dec)) + return -1; + + if ( (match_len > largest_len) || ((match_len == largest_len) && (dec.get_cost() < largest_cost)) ) + { + largest_index = decisions.size() - 1; + largest_len = match_len; + largest_cost = dec.get_cost(); + } + } + if (pMatches->is_last()) + break; + pMatches++; + } + } + } + + return largest_index; + } + + bool lzcompressor::greedy_parse(parse_thread_state &parse_state) + { + parse_state.m_failed = true; + parse_state.m_emit_decisions_backwards = false; + + const uint bytes_to_parse = parse_state.m_bytes_to_match; + + const uint lookahead_start_ofs = m_accel.get_lookahead_pos() & m_accel.get_max_dict_size_mask(); + + uint cur_dict_ofs = parse_state.m_start_ofs; + uint cur_lookahead_ofs = cur_dict_ofs - lookahead_start_ofs; + uint cur_ofs = 0; + + state &approx_state = parse_state.m_initial_state; + + lzham::vector &decisions = parse_state.m_temp_decisions; + + if (!decisions.try_reserve(384)) + return false; + + if (!parse_state.m_best_decisions.try_resize(0)) + return false; + + while (cur_ofs < bytes_to_parse) + { + const uint max_admissable_match_len = LZHAM_MIN(static_cast(CLZBase::cMaxHugeMatchLen), bytes_to_parse - cur_ofs); + + int largest_dec_index = enumerate_lz_decisions(cur_dict_ofs, approx_state, decisions, 1, max_admissable_match_len); + if (largest_dec_index < 0) + return false; + + const lzpriced_decision &dec = decisions[largest_dec_index]; + + if (!parse_state.m_best_decisions.try_push_back(dec)) + return false; + + approx_state.partial_advance(dec); + + uint match_len = dec.get_len(); + LZHAM_ASSERT(match_len <= max_admissable_match_len); + cur_dict_ofs += match_len; + cur_lookahead_ofs += match_len; + cur_ofs += match_len; + + if (parse_state.m_best_decisions.size() >= parse_state.m_max_greedy_decisions) + { + parse_state.m_greedy_parse_total_bytes_coded = cur_ofs; + parse_state.m_greedy_parse_gave_up = true; + return false; + } + } + + parse_state.m_greedy_parse_total_bytes_coded = cur_ofs; + + LZHAM_ASSERT(cur_ofs == bytes_to_parse); + + parse_state.m_failed = false; + + return true; + } + + bool lzcompressor::compress_block(const void* pBuf, uint buf_len) + { + uint cur_ofs = 0; + uint bytes_remaining = buf_len; + while (bytes_remaining) + { + uint bytes_to_compress = math::minimum(m_accel.get_max_add_bytes(), bytes_remaining); + if (!compress_block_internal(static_cast(pBuf) + cur_ofs, bytes_to_compress)) + return false; + + cur_ofs += bytes_to_compress; + bytes_remaining -= bytes_to_compress; + } + return true; + } + + void lzcompressor::update_block_history(uint comp_size, uint src_size, uint ratio, bool raw_block, bool reset_update_rate) + { + block_history& cur_block_history = m_block_history[m_block_history_next]; + m_block_history_next++; + m_block_history_next %= cMaxBlockHistorySize; + + cur_block_history.m_comp_size = comp_size; + cur_block_history.m_src_size = src_size; + cur_block_history.m_ratio = ratio; + cur_block_history.m_raw_block = raw_block; + cur_block_history.m_reset_update_rate = reset_update_rate; + + m_block_history_size = LZHAM_MIN(m_block_history_size + 1, static_cast(cMaxBlockHistorySize)); + } + + uint lzcompressor::get_recent_block_ratio() + { + if (!m_block_history_size) + return 0; + + uint64 total_scaled_ratio = 0; + for (uint i = 0; i < m_block_history_size; i++) + total_scaled_ratio += m_block_history[i].m_ratio; + total_scaled_ratio /= m_block_history_size; + + return static_cast(total_scaled_ratio); + } + + uint lzcompressor::get_min_block_ratio() + { + if (!m_block_history_size) + return 0; + uint min_scaled_ratio = UINT_MAX; + for (uint i = 0; i < m_block_history_size; i++) + min_scaled_ratio = LZHAM_MIN(m_block_history[i].m_ratio, min_scaled_ratio); + return min_scaled_ratio; + } + + uint lzcompressor::get_max_block_ratio() + { + if (!m_block_history_size) + return 0; + uint max_scaled_ratio = 0; + for (uint i = 0; i < m_block_history_size; i++) + max_scaled_ratio = LZHAM_MAX(m_block_history[i].m_ratio, max_scaled_ratio); + return max_scaled_ratio; + } + + uint lzcompressor::get_total_recent_reset_update_rate() + { + uint total_resets = 0; + for (uint i = 0; i < m_block_history_size; i++) + total_resets += m_block_history[i].m_reset_update_rate; + return total_resets; + } + + bool lzcompressor::compress_block_internal(const void* pBuf, uint buf_len) + { + scoped_perf_section compress_block_timer(cVarArgs, "****** compress_block %u", m_block_index); + + LZHAM_ASSERT(pBuf); + LZHAM_ASSERT(buf_len <= m_params.m_block_size); + + LZHAM_ASSERT(m_src_size >= 0); + if (m_src_size < 0) + return false; + + m_src_size += buf_len; + + // Important: Don't do any expensive work until after add_bytes_begin() is called, to increase parallelism. + if (!m_accel.add_bytes_begin(buf_len, static_cast(pBuf))) + return false; + + m_start_of_block_state = m_state; + + m_src_adler32 = adler32(pBuf, buf_len, m_src_adler32); + m_src_crc32 = crc32(m_src_adler32, (const lzham_uint8*)pBuf, buf_len); + + m_block_start_dict_ofs = m_accel.get_lookahead_pos() & (m_accel.get_max_dict_size() - 1); + + uint cur_dict_ofs = m_block_start_dict_ofs; + + uint bytes_to_match = buf_len; + + if (!m_codec.start_encoding((buf_len * 9) / 8)) + return false; + + if (!m_block_index) + { + if (!send_configuration()) + return false; + } + +#ifdef LZHAM_LZDEBUG + m_codec.encode_bits(166, 12); +#endif + + if (!m_codec.encode_bits(cCompBlock, cBlockHeaderBits)) + return false; + + if (!m_codec.encode_arith_init()) + return false; + + m_state.start_of_block(m_accel, cur_dict_ofs, m_block_index); + + bool emit_reset_update_rate_command = false; + + // Determine if it makes sense to reset the Huffman table update frequency back to their initial (maximum) rates. + if ((m_block_history_size) && (m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_TRADEOFF_DECOMPRESSION_RATE_FOR_COMP_RATIO)) + { + const block_history& prev_block_history = m_block_history[m_block_history_next ? (m_block_history_next - 1) : (cMaxBlockHistorySize - 1)]; + + if (prev_block_history.m_raw_block) + emit_reset_update_rate_command = true; + else if (get_total_recent_reset_update_rate() == 0) + { + if (get_recent_block_ratio() > (cBlockHistoryCompRatioScale * 95U / 100U)) + emit_reset_update_rate_command = true; + else + { + uint recent_min_block_ratio = get_min_block_ratio(); + //uint recent_max_block_ratio = get_max_block_ratio(); + + // Compression ratio has recently dropped quite a bit - slam the table update rates back up. + if (prev_block_history.m_ratio > (recent_min_block_ratio * 3U) / 2U) + { + //printf("Emitting reset: %u %u\n", prev_block_history.m_ratio, recent_min_block_ratio); + emit_reset_update_rate_command = true; + } + } + } + } + + if (emit_reset_update_rate_command) + m_state.reset_update_rate(); + + m_codec.encode_bits(emit_reset_update_rate_command ? 1 : 0, cBlockFlushTypeBits); + + //coding_stats initial_stats(m_stats); + + uint initial_step = m_step; + + while (bytes_to_match) + { + const uint cAvgAcceptableGreedyMatchLen = 384; + if ((m_params.m_pSeed_bytes) && (bytes_to_match >= cAvgAcceptableGreedyMatchLen)) + { + parse_thread_state &greedy_parse_state = m_parse_thread_state[cMaxParseThreads]; + + greedy_parse_state.m_initial_state = m_state; + greedy_parse_state.m_initial_state.m_cur_ofs = cur_dict_ofs; + + greedy_parse_state.m_issue_reset_state_partial = false; + greedy_parse_state.m_start_ofs = cur_dict_ofs; + greedy_parse_state.m_bytes_to_match = LZHAM_MIN(bytes_to_match, static_cast(CLZBase::cMaxHugeMatchLen)); + + greedy_parse_state.m_max_greedy_decisions = LZHAM_MAX((bytes_to_match / cAvgAcceptableGreedyMatchLen), 2); + greedy_parse_state.m_greedy_parse_gave_up = false; + greedy_parse_state.m_greedy_parse_total_bytes_coded = 0; + + if (!greedy_parse(greedy_parse_state)) + { + if (!greedy_parse_state.m_greedy_parse_gave_up) + return false; + } + + uint num_greedy_decisions_to_code = 0; + + const lzham::vector &best_decisions = greedy_parse_state.m_best_decisions; + + if (!greedy_parse_state.m_greedy_parse_gave_up) + num_greedy_decisions_to_code = best_decisions.size(); + else + { + uint num_small_decisions = 0; + uint total_match_len = 0; + uint max_match_len = 0; + + uint i; + for (i = 0; i < best_decisions.size(); i++) + { + const lzdecision &dec = best_decisions[i]; + if (dec.get_len() <= CLZBase::cMaxMatchLen) + { + num_small_decisions++; + if (num_small_decisions > 16) + break; + } + + total_match_len += dec.get_len(); + max_match_len = LZHAM_MAX(max_match_len, dec.get_len()); + } + + if (max_match_len > CLZBase::cMaxMatchLen) + { + if ((total_match_len / i) >= cAvgAcceptableGreedyMatchLen) + { + num_greedy_decisions_to_code = i; + } + } + } + + if (num_greedy_decisions_to_code) + { + for (uint i = 0; i < num_greedy_decisions_to_code; i++) + { + LZHAM_ASSERT(best_decisions[i].m_pos == (int)cur_dict_ofs); + //LZHAM_ASSERT(i >= 0); + LZHAM_ASSERT(i < best_decisions.size()); + +#if LZHAM_UPDATE_STATS + bit_cost_t cost = m_state.get_cost(*this, m_accel, best_decisions[i]); + m_stats.update(best_decisions[i], m_state, m_accel, cost); +#endif + + if (!code_decision(best_decisions[i], cur_dict_ofs, bytes_to_match)) + return false; + } + + if ((!greedy_parse_state.m_greedy_parse_gave_up) || (!bytes_to_match)) + continue; + } + } + + uint num_parse_jobs = LZHAM_MIN(m_num_parse_threads, (bytes_to_match + cMaxParseGraphNodes - 1) / cMaxParseGraphNodes); + if ((m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_DETERMINISTIC_PARSING) == 0) + { + if (m_use_task_pool && m_accel.get_max_helper_threads()) + { + // Increase the number of active parse jobs as the match finder finishes up to keep CPU utilization up. + num_parse_jobs += m_accel.get_num_completed_helper_threads(); + num_parse_jobs = LZHAM_MIN(num_parse_jobs, cMaxParseThreads); + } + } + if (bytes_to_match < 1536) + num_parse_jobs = 1; + + // Reduce block size near the beginning of the file so statistical models get going a bit faster. + bool force_small_block = false; + if ((!m_block_index) && ((cur_dict_ofs - m_block_start_dict_ofs) < cMaxParseGraphNodes)) + { + num_parse_jobs = 1; + force_small_block = true; + } + + uint parse_thread_start_ofs = cur_dict_ofs; + uint parse_thread_total_size = LZHAM_MIN(bytes_to_match, cMaxParseGraphNodes * num_parse_jobs); + if (force_small_block) + { + parse_thread_total_size = LZHAM_MIN(parse_thread_total_size, 1536); + } + + uint parse_thread_remaining = parse_thread_total_size; + for (uint parse_thread_index = 0; parse_thread_index < num_parse_jobs; parse_thread_index++) + { + parse_thread_state &parse_thread = m_parse_thread_state[parse_thread_index]; + + parse_thread.m_initial_state = m_state; + parse_thread.m_initial_state.m_cur_ofs = parse_thread_start_ofs; + + if (parse_thread_index > 0) + { + parse_thread.m_initial_state.reset_state_partial(); + parse_thread.m_issue_reset_state_partial = true; + } + else + { + parse_thread.m_issue_reset_state_partial = false; + } + + parse_thread.m_start_ofs = parse_thread_start_ofs; + if (parse_thread_index == (num_parse_jobs - 1)) + parse_thread.m_bytes_to_match = parse_thread_remaining; + else + parse_thread.m_bytes_to_match = parse_thread_total_size / num_parse_jobs; + + parse_thread.m_bytes_to_match = LZHAM_MIN(parse_thread.m_bytes_to_match, cMaxParseGraphNodes); + LZHAM_ASSERT(parse_thread.m_bytes_to_match > 0); + + parse_thread.m_max_greedy_decisions = UINT_MAX; + parse_thread.m_greedy_parse_gave_up = false; + + parse_thread_start_ofs += parse_thread.m_bytes_to_match; + parse_thread_remaining -= parse_thread.m_bytes_to_match; + } + + { + scoped_perf_section parse_timer("parsing"); + + if ((m_use_task_pool) && (num_parse_jobs > 1)) + { + m_parse_jobs_remaining = num_parse_jobs; + + { + scoped_perf_section queue_task_timer("queuing parse tasks"); + + if (!m_params.m_pTask_pool->queue_multiple_object_tasks(this, &lzcompressor::parse_job_callback, 1, num_parse_jobs - 1)) + return false; + } + + parse_job_callback(0, NULL); + + { + scoped_perf_section wait_timer("waiting for jobs"); + + m_parse_jobs_complete.wait(); + } + } + else + { + m_parse_jobs_remaining = INT_MAX; + for (uint parse_thread_index = 0; parse_thread_index < num_parse_jobs; parse_thread_index++) + { + parse_job_callback(parse_thread_index, NULL); + } + } + } + + { + scoped_perf_section coding_timer("coding"); + + for (uint parse_thread_index = 0; parse_thread_index < num_parse_jobs; parse_thread_index++) + { + parse_thread_state &parse_thread = m_parse_thread_state[parse_thread_index]; + if (parse_thread.m_failed) + return false; + + const lzham::vector &best_decisions = parse_thread.m_best_decisions; + + if (parse_thread.m_issue_reset_state_partial) + { + if (!m_state.encode_reset_state_partial(m_codec, m_accel, cur_dict_ofs)) + return false; + m_step++; + } + + if (best_decisions.size()) + { + int i = 0; + int end_dec_index = static_cast(best_decisions.size()) - 1; + int dec_step = 1; + if (parse_thread.m_emit_decisions_backwards) + { + i = static_cast(best_decisions.size()) - 1; + end_dec_index = 0; + dec_step = -1; + LZHAM_ASSERT(best_decisions.back().m_pos == (int)parse_thread.m_start_ofs); + } + else + { + LZHAM_ASSERT(best_decisions.front().m_pos == (int)parse_thread.m_start_ofs); + } + + // Loop rearranged to avoid bad x64 codegen problem with MSVC2008. + for ( ; ; ) + { + LZHAM_ASSERT(best_decisions[i].m_pos == (int)cur_dict_ofs); + LZHAM_ASSERT(i >= 0); + LZHAM_ASSERT(i < (int)best_decisions.size()); + +#if LZHAM_UPDATE_STATS + bit_cost_t cost = m_state.get_cost(*this, m_accel, best_decisions[i]); + m_stats.update(best_decisions[i], m_state, m_accel, cost); + //m_state.print(m_codec, *this, m_accel, best_decisions[i]); +#endif + + if (!code_decision(best_decisions[i], cur_dict_ofs, bytes_to_match)) + return false; + if (i == end_dec_index) + break; + i += dec_step; + } + + LZHAM_NOTE_UNUSED(i); + } + + LZHAM_ASSERT(cur_dict_ofs == parse_thread.m_start_ofs + parse_thread.m_bytes_to_match); + + } // parse_thread_index + + } + } + + { + scoped_perf_section add_bytes_timer("add_bytes_end"); + m_accel.add_bytes_end(); + } + + if (!m_state.encode_eob(m_codec, m_accel, cur_dict_ofs)) + return false; + +#ifdef LZHAM_LZDEBUG + if (!m_codec.encode_bits(366, 12)) return false; +#endif + + { + scoped_perf_section stop_encoding_timer("stop_encoding"); + if (!m_codec.stop_encoding(true)) return false; + } + + // Coded the entire block - now see if it makes more sense to just send a raw/uncompressed block. + + uint compressed_size = m_codec.get_encoding_buf().size(); + LZHAM_NOTE_UNUSED(compressed_size); + + bool used_raw_block = false; + +#if !LZHAM_FORCE_ALL_RAW_BLOCKS + #if (defined(LZHAM_DISABLE_RAW_BLOCKS) || defined(LZHAM_LZDEBUG)) + if (0) + #else + if (compressed_size >= buf_len) + #endif +#endif + { + // Failed to compress the block, so go back to our original state and just code a raw block. + m_state = m_start_of_block_state; + m_step = initial_step; + //m_stats = initial_stats; + + m_codec.reset(); + + if (!m_codec.start_encoding(buf_len + 16)) + return false; + + if (!m_block_index) + { + if (!send_configuration()) + return false; + } + +#ifdef LZHAM_LZDEBUG + if (!m_codec.encode_bits(166, 12)) + return false; +#endif + + if (!m_codec.encode_bits(cRawBlock, cBlockHeaderBits)) + return false; + + LZHAM_ASSERT(buf_len <= 0x1000000); + if (!m_codec.encode_bits(buf_len - 1, 24)) + return false; + + // Write buf len check bits, to help increase the probability of detecting corrupted data more early. + uint buf_len0 = (buf_len - 1) & 0xFF; + uint buf_len1 = ((buf_len - 1) >> 8) & 0xFF; + uint buf_len2 = ((buf_len - 1) >> 16) & 0xFF; + if (!m_codec.encode_bits((buf_len0 ^ buf_len1) ^ buf_len2, 8)) + return false; + + if (!m_codec.encode_align_to_byte()) + return false; + + const uint8* pSrc = m_accel.get_ptr(m_block_start_dict_ofs); + + for (uint i = 0; i < buf_len; i++) + { + if (!m_codec.encode_bits(*pSrc++, 8)) + return false; + } + + if (!m_codec.stop_encoding(true)) + return false; + + used_raw_block = true; + emit_reset_update_rate_command = false; + } + + uint comp_size = m_codec.get_encoding_buf().size(); + uint scaled_ratio = (comp_size * cBlockHistoryCompRatioScale) / buf_len; + update_block_history(comp_size, buf_len, scaled_ratio, used_raw_block, emit_reset_update_rate_command); + + //printf("\n%u, %u, %u, %u\n", m_block_index, 500*emit_reset_update_rate_command, scaled_ratio, get_recent_block_ratio()); + + { + scoped_perf_section append_timer("append"); + + if (m_comp_buf.empty()) + { + m_comp_buf.swap(m_codec.get_encoding_buf()); + } + else + { + if (!m_comp_buf.append(m_codec.get_encoding_buf())) + return false; + } + } +#if LZHAM_UPDATE_STATS + LZHAM_VERIFY(m_stats.m_total_bytes == m_src_size); + if (emit_reset_update_rate_command) + m_stats.m_total_update_rate_resets++; +#endif + + m_block_index++; + + return true; + } + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/lzhamcomp/lzham_lzcomp_internal.h b/r5dev/thirdparty/lzham/lzhamcomp/lzham_lzcomp_internal.h new file mode 100644 index 00000000..b2991570 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzhamcomp/lzham_lzcomp_internal.h @@ -0,0 +1,481 @@ +// File: lzham_lzcomp_internal.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once +#include "../include/lzham_match_accel.h" +#include "../include/lzham_symbol_codec.h" +#include "../include/lzham_lzbase.h" + +namespace lzham +{ + typedef lzham::vector byte_vec; + + const uint cMaxParseGraphNodes = 3072; + const uint cMaxParseThreads = 8; + + enum compression_level + { + cCompressionLevelFastest, + cCompressionLevelFaster, + cCompressionLevelDefault, + cCompressionLevelBetter, + cCompressionLevelUber, + + cCompressionLevelCount + }; + + struct comp_settings + { + uint m_fast_bytes; + bool m_fast_adaptive_huffman_updating; + bool m_use_polar_codes; + uint m_match_accel_max_matches_per_probe; + uint m_match_accel_max_probes; + }; + + class lzcompressor : public CLZBase + { + public: + lzcompressor(); + + struct init_params + { + enum + { + cMinDictSizeLog2 = CLZBase::cMinDictSizeLog2, + cMaxDictSizeLog2 = CLZBase::cMaxDictSizeLog2, + cDefaultBlockSize = 1024U*512U + }; + + init_params() : + m_pTask_pool(NULL), + m_max_helper_threads(0), + m_compression_level(cCompressionLevelDefault), + m_dict_size_log2(22), + m_block_size(cDefaultBlockSize), + m_num_cachelines(0), + m_cacheline_size(0), + m_lzham_compress_flags(0), + m_pSeed_bytes(0), + m_num_seed_bytes(0) + { + } + + task_pool* m_pTask_pool; + uint m_max_helper_threads; + + compression_level m_compression_level; + uint m_dict_size_log2; + + uint m_block_size; + + uint m_num_cachelines; + uint m_cacheline_size; + + uint m_lzham_compress_flags; + + const void *m_pSeed_bytes; + uint m_num_seed_bytes; + }; + + bool init(const init_params& params); + void clear(); + + // sync, or sync+dictionary flush + bool flush(lzham_flush_t flush_type); + + bool reset(); + + bool put_bytes(const void* pBuf, uint buf_len); + + const byte_vec& get_compressed_data() const { return m_comp_buf; } + byte_vec& get_compressed_data() { return m_comp_buf; } + + uint32 get_src_adler32() const { return m_src_adler32; } + uint32 get_src_crc32() const { return m_src_crc32; } + + private: + class state; + + enum + { + cLitComplexity = 1, + cRep0Complexity = 2, + cRep3Complexity = 5, + + cLongMatchComplexity = 6, + cLongMatchComplexityLenThresh = 9, + + cShortMatchComplexity = 7 + }; + + struct lzdecision + { + int m_pos; // dict position where decision was evaluated + int m_len; // 0 if literal, 1+ if match + int m_dist; // <0 if match rep, else >=1 is match dist + + inline lzdecision() { } + inline lzdecision(int pos, int len, int dist) : m_pos(pos), m_len(len), m_dist(dist) { } + + inline void init(int pos, int len, int dist) { m_pos = pos; m_len = len; m_dist = dist; } + + inline bool is_lit() const { return !m_len; } + inline bool is_match() const { return m_len > 0; } // may be a rep or full match + inline bool is_full_match() const { return (m_len > 0) && (m_dist >= 1); } + inline uint get_len() const { return math::maximum(m_len, 1); } + inline bool is_rep() const { return m_dist < 0; } + inline bool is_rep0() const { return m_dist == -1; } + + uint get_match_dist(const state& s) const; + + inline uint get_complexity() const + { + if (is_lit()) + return cLitComplexity; + else if (is_rep()) + { + LZHAM_ASSUME(cRep0Complexity == 2); + return 1 + -m_dist; // 2, 3, 4, or 5 + } + else if (get_len() >= cLongMatchComplexityLenThresh) + return cLongMatchComplexity; + else + return cShortMatchComplexity; + } + + inline uint get_min_codable_len() const + { + if (is_lit() || is_rep0()) + return 1; + else + return CLZBase::cMinMatchLen; + } + }; + + struct lzpriced_decision : lzdecision + { + lzpriced_decision() { } + + inline lzpriced_decision(int pos, int len, int dist) : lzdecision(pos, len, dist) { } + inline lzpriced_decision(int pos, int len, int dist, bit_cost_t cost) : lzdecision(pos, len, dist), m_cost(cost) { } + + inline void init(int pos, int len, int dist, bit_cost_t cost) { lzdecision::init(pos, len, dist); m_cost = cost; } + + inline bit_cost_t get_cost() const { return m_cost; } + + bit_cost_t m_cost; + }; + + struct state_base + { + uint m_cur_ofs; + uint m_cur_state; + uint m_match_hist[CLZBase::cMatchHistSize]; + + inline bool operator== (const state_base &rhs) const + { + if (m_cur_state != rhs.m_cur_state) + return false; + for (uint i = 0; i < CLZBase::cMatchHistSize; i++) + if (m_match_hist[i] != rhs.m_match_hist[i]) + return false; + return true; + } + + void partial_advance(const lzdecision& lzdec); + + inline void save_partial_state(state_base& dst) + { + dst.m_cur_ofs = m_cur_ofs; + dst.m_cur_state = m_cur_state; + memcpy(dst.m_match_hist, m_match_hist, sizeof(m_match_hist)); + } + + inline void restore_partial_state(const state_base& src) + { + m_cur_ofs = src.m_cur_ofs; + m_cur_state = src.m_cur_state; + memcpy(m_match_hist, src.m_match_hist, sizeof(m_match_hist)); + } + }; + + class state : public state_base + { + public: + state(); + + void clear(); + + bool init(CLZBase& lzbase, bool fast_adaptive_huffman_updating, bool use_polar_codes); + void reset(); + + bit_cost_t get_cost(CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec) const; + bit_cost_t get_len2_match_cost(CLZBase& lzbase, uint dict_pos, uint len2_match_dist, uint is_match_model_index); + bit_cost_t get_lit_cost(const search_accelerator& dict, uint dict_pos, uint lit_pred0, uint is_match_model_index) const; + + // Returns actual cost. + void get_rep_match_costs(uint dict_pos, bit_cost_t *pBitcosts, uint match_hist_index, int min_len, int max_len, uint is_match_model_index) const; + void get_full_match_costs(CLZBase& lzbase, uint dict_pos, bit_cost_t *pBitcosts, uint match_dist, int min_len, int max_len, uint is_match_model_index) const; + + bit_cost_t update_stats(CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec); + + bool advance(CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec); + bool encode(symbol_codec& codec, CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec); + + void print(symbol_codec& codec, CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec); + + bool encode_eob(symbol_codec& codec, const search_accelerator& dict, uint dict_pos); + bool encode_reset_state_partial(symbol_codec& codec, const search_accelerator& dict, uint dict_pos); + + void update_match_hist(uint match_dist); + int find_match_dist(uint match_hist) const; + + void reset_state_partial(); + void start_of_block(const search_accelerator& dict, uint cur_ofs, uint block_index); + + void reset_update_rate(); + + uint get_pred_char(const search_accelerator& dict, int pos, int backward_ofs) const; + + inline bool will_reference_last_match(const lzdecision& lzdec) const + { + return (!lzdec.is_match()) && (m_cur_state >= CLZBase::cNumLitStates); + } + + uint m_block_start_dict_ofs; + + adaptive_bit_model m_is_match_model[CLZBase::cNumStates * (1 << CLZBase::cNumIsMatchContextBits)]; + + adaptive_bit_model m_is_rep_model[CLZBase::cNumStates]; + adaptive_bit_model m_is_rep0_model[CLZBase::cNumStates]; + adaptive_bit_model m_is_rep0_single_byte_model[CLZBase::cNumStates]; + adaptive_bit_model m_is_rep1_model[CLZBase::cNumStates]; + adaptive_bit_model m_is_rep2_model[CLZBase::cNumStates]; + +#if LZHAM_USE_ALL_ARITHMETIC_CODING + typedef adaptive_arith_data_model sym_data_model; +#else + typedef quasi_adaptive_huffman_data_model sym_data_model; +#endif + + sym_data_model m_lit_table[1 << CLZBase::cNumLitPredBits]; + sym_data_model m_delta_lit_table[1 << CLZBase::cNumDeltaLitPredBits]; + + sym_data_model m_main_table; + sym_data_model m_rep_len_table[2]; + sym_data_model m_large_len_table[2]; + sym_data_model m_dist_lsb_table; + }; + + class tracked_stat + { + public: + tracked_stat() { clear(); } + + void clear() { m_num = 0; m_total = 0.0f; m_total2 = 0.0f; m_min_val = 9e+99; m_max_val = -9e+99; } + + void update(double val) { m_num++; m_total += val; m_total2 += val * val; m_min_val = LZHAM_MIN(m_min_val, val); m_max_val = LZHAM_MAX(m_max_val, val); } + + tracked_stat &operator += (double val) { update(val); return *this; } + operator double() const { return m_total; } + + uint64 get_number_of_values() { return m_num; } + uint32 get_number_of_values32() { return static_cast(LZHAM_MIN(UINT_MAX, m_num)); } + double get_total() const { return m_total; } + double get_average() const { return m_num ? m_total / m_num : 0.0f; }; + double get_std_dev() const { return m_num ? sqrt( m_num * m_total2 - m_total * m_total ) / m_num: 0.0f; } + double get_min_val() const { return m_num ? m_min_val : 0.0f; } + double get_max_val() const { return m_num ? m_max_val : 0.0f; } + + private: + uint64 m_num; + double m_total; + double m_total2; + double m_min_val; + double m_max_val; + }; + + struct coding_stats + { + coding_stats() { clear(); } + + void clear(); + + void update(const lzdecision& lzdec, const state& cur_state, const search_accelerator& dict, bit_cost_t cost); + void print(); + + uint m_total_bytes; + uint m_total_contexts; + double m_total_cost; + + tracked_stat m_context_stats; + + double m_total_match_bits_cost; + double m_worst_match_bits_cost; + double m_total_is_match0_bits_cost; + double m_total_is_match1_bits_cost; + + uint m_total_truncated_matches; + uint m_match_truncation_len_hist[CLZBase::cMaxMatchLen + 1]; + uint m_match_truncation_hist[CLZBase::cMaxMatchLen + 1]; + uint m_match_type_truncation_hist[CLZBase::cNumStates][5]; + uint m_match_type_was_not_truncated_hist[CLZBase::cNumStates][5]; + + uint m_total_nonmatches; + uint m_total_matches; + + tracked_stat m_lit_stats; + tracked_stat m_delta_lit_stats; + + tracked_stat m_rep_stats[CLZBase::cMatchHistSize]; + tracked_stat m_rep0_len1_stats; + tracked_stat m_rep0_len2_plus_stats; + + tracked_stat m_full_match_stats[cMaxMatchLen + 1]; + + uint m_total_far_len2_matches; + uint m_total_near_len2_matches; + + uint m_total_update_rate_resets; + + uint m_max_len2_dist; + }; + + init_params m_params; + comp_settings m_settings; + + int64 m_src_size; + uint32 m_src_adler32; + uint32 m_src_crc32; + + search_accelerator m_accel; + + symbol_codec m_codec; + + coding_stats m_stats; + + byte_vec m_block_buf; + byte_vec m_comp_buf; + + uint m_step; + + uint m_block_start_dict_ofs; + + uint m_block_index; + + bool m_finished; + bool m_use_task_pool; + + struct node_state + { + LZHAM_FORCE_INLINE void clear() + { + m_total_cost = cBitCostMax; //math::cNearlyInfinite; + m_total_complexity = UINT_MAX; + } + + // the lzdecision that led from parent to this node_state + lzdecision m_lzdec; + + // This is either the state of the parent node (optimal parsing), or the state of the child node (extreme parsing). + state::state_base m_saved_state; + + // Total cost to arrive at this node state. + bit_cost_t m_total_cost; + uint m_total_complexity; + + // Parent node index. + int16 m_parent_index; + + // Parent node state index (only valid when extreme parsing). + int8 m_parent_state_index; + }; + + struct node + { + LZHAM_FORCE_INLINE void clear() + { + m_num_node_states = 0; + } + + uint m_num_node_states; + enum { cMaxNodeStates = 4 }; + node_state m_node_states[cMaxNodeStates]; + + void add_state(int parent_index, int parent_state_index, const lzdecision &lzdec, state &parent_state, bit_cost_t total_cost, uint total_complexity); + }; + + state m_start_of_block_state; // state at start of block + + state m_state; // main thread's current coding state + + struct raw_parse_thread_state + { + uint m_start_ofs; + uint m_bytes_to_match; + + state m_initial_state; + + node m_nodes[cMaxParseGraphNodes + 1]; + + lzham::vector m_best_decisions; + bool m_emit_decisions_backwards; + + lzham::vector m_temp_decisions; + + uint m_max_greedy_decisions; + uint m_greedy_parse_total_bytes_coded; + bool m_greedy_parse_gave_up; + + bool m_issue_reset_state_partial; + bool m_failed; + }; + + struct parse_thread_state : raw_parse_thread_state + { + uint8 m_unused_alignment_array[128 - (sizeof(raw_parse_thread_state) & 127)]; + }; + + uint m_num_parse_threads; + parse_thread_state m_parse_thread_state[cMaxParseThreads + 1]; // +1 extra for the greedy parser thread (only used for delta compression) + + volatile atomic32_t m_parse_jobs_remaining; + semaphore m_parse_jobs_complete; + + enum { cMaxBlockHistorySize = 6, cBlockHistoryCompRatioScale = 1000U }; + struct block_history + { + uint m_comp_size; + uint m_src_size; + uint m_ratio; + bool m_raw_block; + bool m_reset_update_rate; + }; + block_history m_block_history[cMaxBlockHistorySize]; + uint m_block_history_size; + uint m_block_history_next; + void update_block_history(uint comp_size, uint src_size, uint ratio, bool raw_block, bool reset_update_rate); + uint get_recent_block_ratio(); + uint get_min_block_ratio(); + uint get_max_block_ratio(); + uint get_total_recent_reset_update_rate(); + + bool send_zlib_header(); + bool init_seed_bytes(); + bool send_final_block(); + bool send_configuration(); + bool extreme_parse(parse_thread_state &parse_state); + bool optimal_parse(parse_thread_state &parse_state); + int enumerate_lz_decisions(uint ofs, const state& cur_state, lzham::vector& decisions, uint min_match_len, uint max_match_len); + bool greedy_parse(parse_thread_state &parse_state); + void parse_job_callback(uint64 data, void* pData_ptr); + bool compress_block(const void* pBuf, uint buf_len); + bool compress_block_internal(const void* pBuf, uint buf_len); + bool code_decision(lzdecision lzdec, uint& cur_ofs, uint& bytes_to_match); + bool send_sync_block(lzham_flush_t flush_type); + }; + +} // namespace lzham + + + diff --git a/r5dev/thirdparty/lzham/lzhamcomp/lzham_lzcomp_state.cpp b/r5dev/thirdparty/lzham/lzhamcomp/lzham_lzcomp_state.cpp new file mode 100644 index 00000000..b6d6798c --- /dev/null +++ b/r5dev/thirdparty/lzham/lzhamcomp/lzham_lzcomp_state.cpp @@ -0,0 +1,1463 @@ +// File: lzham_lzcomp_state.cpp +// See Copyright Notice and license at the end of include/lzham.h +#include "../include/lzham_core.h" +#include "lzham_lzcomp_internal.h" + +namespace lzham +{ + static uint get_huge_match_code_len(uint len) + { + LZHAM_ASSERT((len > CLZBase::cMaxMatchLen) && (len <= CLZBase::cMaxHugeMatchLen)); + len -= (CLZBase::cMaxMatchLen + 1); + + if (len < 256) + return 1 + 8; + else if (len < (256 + 1024)) + return 2 + 10; + else if (len < (256 + 1024 + 4096)) + return 3 + 12; + else + return 3 + 16; + } + + static uint get_huge_match_code_bits(uint len) + { + LZHAM_ASSERT((len > CLZBase::cMaxMatchLen) && (len <= CLZBase::cMaxHugeMatchLen)); + len -= (CLZBase::cMaxMatchLen + 1); + + uint c; + if (len < 256) + c = len; + else if (len < (256 + 1024)) + { + uint r = (len - 256); + LZHAM_ASSERT(r <= 1023); + c = r | (2 << 10); + } + else if (len < (256 + 1024 + 4096)) + { + uint r = (len - (256 + 1024)); + LZHAM_ASSERT(r <= 4095); + c = r | (6 << 12); + } + else + { + uint r = (len - (256 + 1024 + 4096)); + LZHAM_ASSERT(r <= 65535); + c = r | (7 << 16); + } + + return c; + } + + uint lzcompressor::lzdecision::get_match_dist(const state& cur_state) const + { + if (!is_match()) + return 0; + else if (is_rep()) + { + int index = -m_dist - 1; + LZHAM_ASSERT(index < CLZBase::cMatchHistSize); + return cur_state.m_match_hist[index]; + } + else + return m_dist; + } + + lzcompressor::state::state() + { + clear(); + } + + void lzcompressor::state::clear() + { + m_cur_ofs = 0; + m_cur_state = 0; + m_block_start_dict_ofs = 0; + + for (uint i = 0; i < 2; i++) + { + m_rep_len_table[i].clear(); + m_large_len_table[i].clear(); + } + m_main_table.clear(); + m_dist_lsb_table.clear(); + + for (uint i = 0; i < (1 << CLZBase::cNumLitPredBits); i++) + m_lit_table[i].clear(); + + for (uint i = 0; i < (1 << CLZBase::cNumDeltaLitPredBits); i++) + m_delta_lit_table[i].clear(); + + m_match_hist[0] = 1; + m_match_hist[1] = 1; + m_match_hist[2] = 1; + m_match_hist[3] = 1; + } + + void lzcompressor::state::reset() + { + m_cur_ofs = 0; + m_cur_state = 0; + m_block_start_dict_ofs = 0; + + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_is_match_model); i++) + m_is_match_model[i].clear(); + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_is_rep_model); i++) + m_is_rep_model[i].clear(); + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_is_rep0_model); i++) + m_is_rep0_model[i].clear(); + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_is_rep0_single_byte_model); i++) + m_is_rep0_single_byte_model[i].clear(); + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_is_rep1_model); i++) + m_is_rep1_model[i].clear(); + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_is_rep2_model); i++) + m_is_rep2_model[i].clear(); + + for (uint i = 0; i < 2; i++) + { + m_rep_len_table[i].reset(); + m_large_len_table[i].reset(); + } + m_main_table.reset(); + m_dist_lsb_table.reset(); + + // Only reset the first table in the array, then just clone it to the others because they're all the same when reset. (This is ~9x faster than resetting each one.) + m_lit_table[0].reset(); + for (uint i = 1; i < LZHAM_ARRAY_SIZE(m_lit_table); i++) + m_lit_table[i] = m_lit_table[0]; + + m_delta_lit_table[0].reset(); + for (uint i = 1; i < LZHAM_ARRAY_SIZE(m_delta_lit_table); i++) + m_delta_lit_table[i] = m_delta_lit_table[0]; + + m_match_hist[0] = 1; + m_match_hist[1] = 1; + m_match_hist[2] = 1; + m_match_hist[3] = 1; + } + + bool lzcompressor::state::init(CLZBase& lzbase, bool fast_adaptive_huffman_updating, bool use_polar_codes) + { + m_cur_ofs = 0; + m_cur_state = 0; + + if (!m_rep_len_table[0].init(true, CLZBase::cNumHugeMatchCodes + (CLZBase::cMaxMatchLen - CLZBase::cMinMatchLen + 1), fast_adaptive_huffman_updating, use_polar_codes)) + return false; + if (!m_rep_len_table[1].assign(m_rep_len_table[0])) + return false; + + if (!m_large_len_table[0].init(true, CLZBase::cNumHugeMatchCodes + CLZBase::cLZXNumSecondaryLengths, fast_adaptive_huffman_updating, use_polar_codes)) + return false; + if (!m_large_len_table[1].assign(m_large_len_table[0])) + return false; + + if (!m_main_table.init(true, CLZBase::cLZXNumSpecialLengths + (lzbase.m_num_lzx_slots - CLZBase::cLZXLowestUsableMatchSlot) * 8, fast_adaptive_huffman_updating, use_polar_codes)) + return false; + if (!m_dist_lsb_table.init(true, 16, fast_adaptive_huffman_updating, use_polar_codes)) + return false; + + if (!m_lit_table[0].init(true, 256, fast_adaptive_huffman_updating, use_polar_codes)) + return false; + for (uint i = 1; i < (1 << CLZBase::cNumLitPredBits); i++) + if (!m_lit_table[i].assign(m_lit_table[0])) + return false; + + if (!m_delta_lit_table[0].init(true, 256, fast_adaptive_huffman_updating, use_polar_codes)) + return false; + for (uint i = 1; i < (1 << CLZBase::cNumDeltaLitPredBits); i++) + if (!m_delta_lit_table[i].assign(m_delta_lit_table[0])) + return false; + + m_match_hist[0] = 1; + m_match_hist[1] = 1; + m_match_hist[2] = 1; + m_match_hist[3] = 1; + + return true; + } + + void lzcompressor::state_base::partial_advance(const lzdecision& lzdec) + { + if (lzdec.m_len == 0) + { + if (m_cur_state < 4) m_cur_state = 0; else if (m_cur_state < 10) m_cur_state -= 3; else m_cur_state -= 6; + } + else + { + if (lzdec.m_dist < 0) + { + int match_hist_index = -lzdec.m_dist - 1; + + if (!match_hist_index) + { + if (lzdec.m_len == 1) + { + m_cur_state = (m_cur_state < CLZBase::cNumLitStates) ? 9 : 11; + } + else + { + m_cur_state = (m_cur_state < CLZBase::cNumLitStates) ? 8 : 11; + } + } + else + { + if (match_hist_index == 1) + { + std::swap(m_match_hist[0], m_match_hist[1]); + } + else if (match_hist_index == 2) + { + int dist = m_match_hist[2]; + m_match_hist[2] = m_match_hist[1]; + m_match_hist[1] = m_match_hist[0]; + m_match_hist[0] = dist; + } + else + { + LZHAM_ASSERT(match_hist_index == 3); + + int dist = m_match_hist[3]; + m_match_hist[3] = m_match_hist[2]; + m_match_hist[2] = m_match_hist[1]; + m_match_hist[1] = m_match_hist[0]; + m_match_hist[0] = dist; + } + + m_cur_state = (m_cur_state < CLZBase::cNumLitStates) ? 8 : 11; + } + } + else + { + // full + LZHAM_ASSUME(CLZBase::cMatchHistSize == 4); + m_match_hist[3] = m_match_hist[2]; + m_match_hist[2] = m_match_hist[1]; + m_match_hist[1] = m_match_hist[0]; + m_match_hist[0] = lzdec.m_dist; + + m_cur_state = (m_cur_state < CLZBase::cNumLitStates) ? CLZBase::cNumLitStates : CLZBase::cNumLitStates + 3; + } + } + + m_cur_ofs = lzdec.m_pos + lzdec.get_len(); + } + + uint lzcompressor::state::get_pred_char(const search_accelerator& dict, int pos, int backward_ofs) const + { + LZHAM_ASSERT(pos >= (int)m_block_start_dict_ofs); + int limit = pos - m_block_start_dict_ofs; + if (backward_ofs > limit) + return 0; + return dict[pos - backward_ofs]; + } + + bit_cost_t lzcompressor::state::get_cost(CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec) const + { + const uint lit_pred0 = get_pred_char(dict, lzdec.m_pos, 1); + + uint is_match_model_index = LZHAM_IS_MATCH_MODEL_INDEX(lit_pred0, m_cur_state); + LZHAM_ASSERT(is_match_model_index < LZHAM_ARRAY_SIZE(m_is_match_model)); + bit_cost_t cost = m_is_match_model[is_match_model_index].get_cost(lzdec.is_match()); + + if (!lzdec.is_match()) + { + const uint lit = dict[lzdec.m_pos]; + + if (m_cur_state < CLZBase::cNumLitStates) + { + const uint lit_pred1 = get_pred_char(dict, lzdec.m_pos, 2); + + uint lit_pred = (lit_pred0 >> (8 - CLZBase::cNumLitPredBits/2)) | + (((lit_pred1 >> (8 - CLZBase::cNumLitPredBits/2)) << CLZBase::cNumLitPredBits/2)); + + // literal + cost += m_lit_table[lit_pred].get_cost(lit); + } + else + { + // delta literal + const uint rep_lit0 = dict[(lzdec.m_pos - m_match_hist[0]) & dict.m_max_dict_size_mask]; + const uint rep_lit1 = dict[(lzdec.m_pos - m_match_hist[0] - 1) & dict.m_max_dict_size_mask]; + + uint delta_lit = rep_lit0 ^ lit; + + uint lit_pred = (rep_lit0 >> (8 - CLZBase::cNumDeltaLitPredBits/2)) | + ((rep_lit1 >> (8 - CLZBase::cNumDeltaLitPredBits/2)) << CLZBase::cNumDeltaLitPredBits/2); + + cost += m_delta_lit_table[lit_pred].get_cost(delta_lit); + } + } + else + { + // match + if (lzdec.m_dist < 0) + { + // rep match + cost += m_is_rep_model[m_cur_state].get_cost(1); + + int match_hist_index = -lzdec.m_dist - 1; + + if (!match_hist_index) + { + // rep0 match + cost += m_is_rep0_model[m_cur_state].get_cost(1); + + if (lzdec.m_len == 1) + { + // single byte rep0 + cost += m_is_rep0_single_byte_model[m_cur_state].get_cost(1); + } + else + { + // normal rep0 + cost += m_is_rep0_single_byte_model[m_cur_state].get_cost(0); + + if (lzdec.m_len > CLZBase::cMaxMatchLen) + { + cost += get_huge_match_code_len(lzdec.m_len) + m_rep_len_table[m_cur_state >= CLZBase::cNumLitStates].get_cost((CLZBase::cMaxMatchLen + 1) - CLZBase::cMinMatchLen); + } + else + { + cost += m_rep_len_table[m_cur_state >= CLZBase::cNumLitStates].get_cost(lzdec.m_len - CLZBase::cMinMatchLen); + } + } + } + else + { + if (lzdec.m_len > CLZBase::cMaxMatchLen) + { + cost += get_huge_match_code_len(lzdec.m_len) + m_rep_len_table[m_cur_state >= CLZBase::cNumLitStates].get_cost((CLZBase::cMaxMatchLen + 1) - CLZBase::cMinMatchLen); + } + else + { + cost += m_rep_len_table[m_cur_state >= CLZBase::cNumLitStates].get_cost(lzdec.m_len - CLZBase::cMinMatchLen); + } + + // rep1-rep3 match + cost += m_is_rep0_model[m_cur_state].get_cost(0); + + if (match_hist_index == 1) + { + // rep1 + cost += m_is_rep1_model[m_cur_state].get_cost(1); + } + else + { + cost += m_is_rep1_model[m_cur_state].get_cost(0); + + if (match_hist_index == 2) + { + // rep2 + cost += m_is_rep2_model[m_cur_state].get_cost(1); + } + else + { + LZHAM_ASSERT(match_hist_index == 3); + // rep3 + cost += m_is_rep2_model[m_cur_state].get_cost(0); + } + } + } + } + else + { + cost += m_is_rep_model[m_cur_state].get_cost(0); + + LZHAM_ASSERT(lzdec.m_len >= CLZBase::cMinMatchLen); + + // full match + uint match_slot, match_extra; + lzbase.compute_lzx_position_slot(lzdec.m_dist, match_slot, match_extra); + + uint match_low_sym = 0; + if (lzdec.m_len >= 9) + { + match_low_sym = 7; + if (lzdec.m_len > CLZBase::cMaxMatchLen) + { + cost += get_huge_match_code_len(lzdec.m_len) + m_large_len_table[m_cur_state >= CLZBase::cNumLitStates].get_cost((CLZBase::cMaxMatchLen + 1) - 9); + } + else + { + cost += m_large_len_table[m_cur_state >= CLZBase::cNumLitStates].get_cost(lzdec.m_len - 9); + } + } + else + match_low_sym = lzdec.m_len - 2; + + uint match_high_sym = 0; + + LZHAM_ASSERT(match_slot >= CLZBase::cLZXLowestUsableMatchSlot && (match_slot < lzbase.m_num_lzx_slots)); + match_high_sym = match_slot - CLZBase::cLZXLowestUsableMatchSlot; + + uint main_sym = match_low_sym | (match_high_sym << 3); + + cost += m_main_table.get_cost(CLZBase::cLZXNumSpecialLengths + main_sym); + + uint num_extra_bits = lzbase.m_lzx_position_extra_bits[match_slot]; + if (num_extra_bits < 3) + cost += convert_to_scaled_bitcost(num_extra_bits); + else + { + if (num_extra_bits > 4) + cost += convert_to_scaled_bitcost(num_extra_bits - 4); + + cost += m_dist_lsb_table.get_cost(match_extra & 15); + } + } + } + + return cost; + } + + bit_cost_t lzcompressor::state::get_len2_match_cost(CLZBase& lzbase, uint dict_pos, uint len2_match_dist, uint is_match_model_index) + { + LZHAM_NOTE_UNUSED(dict_pos); + + bit_cost_t cost = m_is_match_model[is_match_model_index].get_cost(1); + + cost += m_is_rep_model[m_cur_state].get_cost(0); + + // full match + uint match_slot, match_extra; + lzbase.compute_lzx_position_slot(len2_match_dist, match_slot, match_extra); + + const uint match_len = 2; + uint match_low_sym = match_len - 2; + + uint match_high_sym = 0; + + LZHAM_ASSERT(match_slot >= CLZBase::cLZXLowestUsableMatchSlot && (match_slot < lzbase.m_num_lzx_slots)); + match_high_sym = match_slot - CLZBase::cLZXLowestUsableMatchSlot; + + uint main_sym = match_low_sym | (match_high_sym << 3); + + cost += m_main_table.get_cost(CLZBase::cLZXNumSpecialLengths + main_sym); + + uint num_extra_bits = lzbase.m_lzx_position_extra_bits[match_slot]; + if (num_extra_bits < 3) + cost += convert_to_scaled_bitcost(num_extra_bits); + else + { + if (num_extra_bits > 4) + cost += convert_to_scaled_bitcost(num_extra_bits - 4); + + cost += m_dist_lsb_table.get_cost(match_extra & 15); + } + + return cost; + } + + bit_cost_t lzcompressor::state::get_lit_cost(const search_accelerator& dict, uint dict_pos, uint lit_pred0, uint is_match_model_index) const + { + bit_cost_t cost = m_is_match_model[is_match_model_index].get_cost(0); + + const uint lit = dict[dict_pos]; + + if (m_cur_state < CLZBase::cNumLitStates) + { + // literal + const uint lit_pred1 = get_pred_char(dict, dict_pos, 2); + + uint lit_pred = (lit_pred0 >> (8 - CLZBase::cNumLitPredBits/2)) | + (((lit_pred1 >> (8 - CLZBase::cNumLitPredBits/2)) << CLZBase::cNumLitPredBits/2)); + + cost += m_lit_table[lit_pred].get_cost(lit); + } + else + { + // delta literal + const uint rep_lit0 = dict[(dict_pos - m_match_hist[0]) & dict.m_max_dict_size_mask]; + const uint rep_lit1 = dict[(dict_pos - m_match_hist[0] - 1) & dict.m_max_dict_size_mask]; + + uint delta_lit = rep_lit0 ^ lit; + + uint lit_pred = (rep_lit0 >> (8 - CLZBase::cNumDeltaLitPredBits/2)) | + ((rep_lit1 >> (8 - CLZBase::cNumDeltaLitPredBits/2)) << CLZBase::cNumDeltaLitPredBits/2); + + cost += m_delta_lit_table[lit_pred].get_cost(delta_lit); + } + + return cost; + } + + void lzcompressor::state::get_rep_match_costs(uint dict_pos, bit_cost_t *pBitcosts, uint match_hist_index, int min_len, int max_len, uint is_match_model_index) const + { + LZHAM_NOTE_UNUSED(dict_pos); + // match + const sym_data_model &rep_len_table = m_rep_len_table[m_cur_state >= CLZBase::cNumLitStates]; + + bit_cost_t base_cost = m_is_match_model[is_match_model_index].get_cost(1); + + base_cost += m_is_rep_model[m_cur_state].get_cost(1); + + if (!match_hist_index) + { + // rep0 match + base_cost += m_is_rep0_model[m_cur_state].get_cost(1); + } + else + { + // rep1-rep3 matches + base_cost += m_is_rep0_model[m_cur_state].get_cost(0); + + if (match_hist_index == 1) + { + // rep1 + base_cost += m_is_rep1_model[m_cur_state].get_cost(1); + } + else + { + base_cost += m_is_rep1_model[m_cur_state].get_cost(0); + + if (match_hist_index == 2) + { + // rep2 + base_cost += m_is_rep2_model[m_cur_state].get_cost(1); + } + else + { + // rep3 + base_cost += m_is_rep2_model[m_cur_state].get_cost(0); + } + } + } + + // rep match + if (!match_hist_index) + { + if (min_len == 1) + { + // single byte rep0 + pBitcosts[1] = base_cost + m_is_rep0_single_byte_model[m_cur_state].get_cost(1); + min_len++; + } + + bit_cost_t rep0_match_base_cost = base_cost + m_is_rep0_single_byte_model[m_cur_state].get_cost(0); + for (int match_len = min_len; match_len <= max_len; match_len++) + { + // normal rep0 + if (match_len > CLZBase::cMaxMatchLen) + { + pBitcosts[match_len] = get_huge_match_code_len(match_len) + rep0_match_base_cost + rep_len_table.get_cost((CLZBase::cMaxMatchLen + 1) - CLZBase::cMinMatchLen); + } + else + { + pBitcosts[match_len] = rep0_match_base_cost + rep_len_table.get_cost(match_len - CLZBase::cMinMatchLen); + } + } + } + else + { + for (int match_len = min_len; match_len <= max_len; match_len++) + { + if (match_len > CLZBase::cMaxMatchLen) + { + pBitcosts[match_len] = get_huge_match_code_len(match_len) + base_cost + rep_len_table.get_cost((CLZBase::cMaxMatchLen + 1) - CLZBase::cMinMatchLen); + } + else + { + pBitcosts[match_len] = base_cost + rep_len_table.get_cost(match_len - CLZBase::cMinMatchLen); + } + } + } + } + + void lzcompressor::state::get_full_match_costs(CLZBase& lzbase, uint dict_pos, bit_cost_t *pBitcosts, uint match_dist, int min_len, int max_len, uint is_match_model_index) const + { + LZHAM_NOTE_UNUSED(dict_pos); + LZHAM_ASSERT(min_len >= CLZBase::cMinMatchLen); + + bit_cost_t cost = m_is_match_model[is_match_model_index].get_cost(1); + + cost += m_is_rep_model[m_cur_state].get_cost(0); + + uint match_slot, match_extra; + lzbase.compute_lzx_position_slot(match_dist, match_slot, match_extra); + LZHAM_ASSERT(match_slot >= CLZBase::cLZXLowestUsableMatchSlot && (match_slot < lzbase.m_num_lzx_slots)); + + uint num_extra_bits = lzbase.m_lzx_position_extra_bits[match_slot]; + + if (num_extra_bits < 3) + cost += convert_to_scaled_bitcost(num_extra_bits); + else + { + if (num_extra_bits > 4) + cost += convert_to_scaled_bitcost(num_extra_bits - 4); + + cost += m_dist_lsb_table.get_cost(match_extra & 15); + } + + uint match_high_sym = match_slot - CLZBase::cLZXLowestUsableMatchSlot; + + const sym_data_model &large_len_table = m_large_len_table[m_cur_state >= CLZBase::cNumLitStates]; + + for (int match_len = min_len; match_len <= max_len; match_len++) + { + bit_cost_t len_cost = cost; + + uint match_low_sym = 0; + if (match_len >= 9) + { + match_low_sym = 7; + if (match_len > CLZBase::cMaxMatchLen) + { + len_cost += get_huge_match_code_len(match_len) + large_len_table.get_cost((CLZBase::cMaxMatchLen + 1) - 9); + } + else + { + len_cost += large_len_table.get_cost(match_len - 9); + } + } + else + match_low_sym = match_len - 2; + + uint main_sym = match_low_sym | (match_high_sym << 3); + + pBitcosts[match_len] = len_cost + m_main_table.get_cost(CLZBase::cLZXNumSpecialLengths + main_sym); + } + } + + bool lzcompressor::state::advance(CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec) + { + const uint lit_pred0 = get_pred_char(dict, lzdec.m_pos, 1); + + uint is_match_model_index = LZHAM_IS_MATCH_MODEL_INDEX(lit_pred0, m_cur_state); + m_is_match_model[is_match_model_index].update(lzdec.is_match()); + + if (!lzdec.is_match()) + { + const uint lit = dict[lzdec.m_pos]; + + if (m_cur_state < CLZBase::cNumLitStates) + { + const uint lit_pred1 = get_pred_char(dict, lzdec.m_pos, 2); + + uint lit_pred = (lit_pred0 >> (8 - CLZBase::cNumLitPredBits/2)) | + (((lit_pred1 >> (8 - CLZBase::cNumLitPredBits/2)) << CLZBase::cNumLitPredBits/2)); + + // literal + if (!m_lit_table[lit_pred].update(lit)) return false; + } + else + { + // delta literal + const uint rep_lit0 = dict[(lzdec.m_pos - m_match_hist[0]) & dict.m_max_dict_size_mask]; + const uint rep_lit1 = dict[(lzdec.m_pos - m_match_hist[0] - 1) & dict.m_max_dict_size_mask]; + + uint delta_lit = rep_lit0 ^ lit; + + uint lit_pred = (rep_lit0 >> (8 - CLZBase::cNumDeltaLitPredBits/2)) | + ((rep_lit1 >> (8 - CLZBase::cNumDeltaLitPredBits/2)) << CLZBase::cNumDeltaLitPredBits/2); + + if (!m_delta_lit_table[lit_pred].update(delta_lit)) return false; + } + + if (m_cur_state < 4) m_cur_state = 0; else if (m_cur_state < 10) m_cur_state -= 3; else m_cur_state -= 6; + } + else + { + // match + if (lzdec.m_dist < 0) + { + // rep match + m_is_rep_model[m_cur_state].update(1); + + int match_hist_index = -lzdec.m_dist - 1; + + if (!match_hist_index) + { + // rep0 match + m_is_rep0_model[m_cur_state].update(1); + + if (lzdec.m_len == 1) + { + // single byte rep0 + m_is_rep0_single_byte_model[m_cur_state].update(1); + + m_cur_state = (m_cur_state < CLZBase::cNumLitStates) ? 9 : 11; + } + else + { + // normal rep0 + m_is_rep0_single_byte_model[m_cur_state].update(0); + + if (lzdec.m_len > CLZBase::cMaxMatchLen) + { + if (!m_rep_len_table[m_cur_state >= CLZBase::cNumLitStates].update((CLZBase::cMaxMatchLen + 1) - CLZBase::cMinMatchLen)) return false; + } + else + { + if (!m_rep_len_table[m_cur_state >= CLZBase::cNumLitStates].update(lzdec.m_len - CLZBase::cMinMatchLen)) return false; + } + + m_cur_state = (m_cur_state < CLZBase::cNumLitStates) ? 8 : 11; + } + } + else + { + // rep1-rep3 match + m_is_rep0_model[m_cur_state].update(0); + + if (lzdec.m_len > CLZBase::cMaxMatchLen) + { + if (!m_rep_len_table[m_cur_state >= CLZBase::cNumLitStates].update((CLZBase::cMaxMatchLen + 1) - CLZBase::cMinMatchLen)) return false; + } + else + { + if (!m_rep_len_table[m_cur_state >= CLZBase::cNumLitStates].update(lzdec.m_len - CLZBase::cMinMatchLen)) return false; + } + + if (match_hist_index == 1) + { + // rep1 + m_is_rep1_model[m_cur_state].update(1); + + std::swap(m_match_hist[0], m_match_hist[1]); + } + else + { + m_is_rep1_model[m_cur_state].update(0); + + if (match_hist_index == 2) + { + // rep2 + m_is_rep2_model[m_cur_state].update(1); + + int dist = m_match_hist[2]; + m_match_hist[2] = m_match_hist[1]; + m_match_hist[1] = m_match_hist[0]; + m_match_hist[0] = dist; + } + else + { + // rep3 + m_is_rep2_model[m_cur_state].update(0); + + int dist = m_match_hist[3]; + m_match_hist[3] = m_match_hist[2]; + m_match_hist[2] = m_match_hist[1]; + m_match_hist[1] = m_match_hist[0]; + m_match_hist[0] = dist; + } + } + + m_cur_state = (m_cur_state < CLZBase::cNumLitStates) ? 8 : 11; + } + } + else + { + m_is_rep_model[m_cur_state].update(0); + + LZHAM_ASSERT(lzdec.m_len >= CLZBase::cMinMatchLen); + + // full match + uint match_slot, match_extra; + lzbase.compute_lzx_position_slot(lzdec.m_dist, match_slot, match_extra); + + uint match_low_sym = 0; + int large_len_sym = -1; + if (lzdec.m_len >= 9) + { + match_low_sym = 7; + + large_len_sym = lzdec.m_len - 9; + } + else + match_low_sym = lzdec.m_len - 2; + + uint match_high_sym = 0; + + LZHAM_ASSERT(match_slot >= CLZBase::cLZXLowestUsableMatchSlot && (match_slot < lzbase.m_num_lzx_slots)); + match_high_sym = match_slot - CLZBase::cLZXLowestUsableMatchSlot; + + uint main_sym = match_low_sym | (match_high_sym << 3); + + if (!m_main_table.update(CLZBase::cLZXNumSpecialLengths + main_sym)) return false; + + if (large_len_sym >= 0) + { + if (lzdec.m_len > CLZBase::cMaxMatchLen) + { + if (!m_large_len_table[m_cur_state >= CLZBase::cNumLitStates].update((CLZBase::cMaxMatchLen + 1) - 9)) return false; + } + else + { + if (!m_large_len_table[m_cur_state >= CLZBase::cNumLitStates].update(large_len_sym)) return false; + } + } + + uint num_extra_bits = lzbase.m_lzx_position_extra_bits[match_slot]; + if (num_extra_bits >= 3) + { + if (!m_dist_lsb_table.update(match_extra & 15)) return false; + } + + update_match_hist(lzdec.m_dist); + + m_cur_state = (m_cur_state < CLZBase::cNumLitStates) ? CLZBase::cNumLitStates : CLZBase::cNumLitStates + 3; + } + } + + m_cur_ofs = lzdec.m_pos + lzdec.get_len(); + return true; + } + + bool lzcompressor::state::encode(symbol_codec& codec, CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec) + { + const uint lit_pred0 = get_pred_char(dict, lzdec.m_pos, 1); + + uint is_match_model_index = LZHAM_IS_MATCH_MODEL_INDEX(lit_pred0, m_cur_state); + if (!codec.encode(lzdec.is_match(), m_is_match_model[is_match_model_index])) return false; + + if (!lzdec.is_match()) + { + const uint lit = dict[lzdec.m_pos]; + +#ifdef LZHAM_LZDEBUG + if (!codec.encode_bits(lit, 8)) return false; +#endif + + if (m_cur_state < CLZBase::cNumLitStates) + { + const uint lit_pred1 = get_pred_char(dict, lzdec.m_pos, 2); + + uint lit_pred = (lit_pred0 >> (8 - CLZBase::cNumLitPredBits/2)) | + (((lit_pred1 >> (8 - CLZBase::cNumLitPredBits/2)) << CLZBase::cNumLitPredBits/2)); + + // literal + if (!codec.encode(lit, m_lit_table[lit_pred])) return false; + } + else + { + // delta literal + const uint rep_lit0 = dict[(lzdec.m_pos - m_match_hist[0]) & dict.m_max_dict_size_mask]; + const uint rep_lit1 = dict[(lzdec.m_pos - m_match_hist[0] - 1) & dict.m_max_dict_size_mask]; + + uint delta_lit = rep_lit0 ^ lit; + + uint lit_pred = (rep_lit0 >> (8 - CLZBase::cNumDeltaLitPredBits/2)) | + ((rep_lit1 >> (8 - CLZBase::cNumDeltaLitPredBits/2)) << CLZBase::cNumDeltaLitPredBits/2); + +#ifdef LZHAM_LZDEBUG + if (!codec.encode_bits(rep_lit0, 8)) return false; +#endif + + if (!codec.encode(delta_lit, m_delta_lit_table[lit_pred])) return false; + } + + if (m_cur_state < 4) m_cur_state = 0; else if (m_cur_state < 10) m_cur_state -= 3; else m_cur_state -= 6; + } + else + { + // match + if (lzdec.m_dist < 0) + { + // rep match + if (!codec.encode(1, m_is_rep_model[m_cur_state])) return false; + + int match_hist_index = -lzdec.m_dist - 1; + + if (!match_hist_index) + { + // rep0 match + if (!codec.encode(1, m_is_rep0_model[m_cur_state])) return false; + + if (lzdec.m_len == 1) + { + // single byte rep0 + if (!codec.encode(1, m_is_rep0_single_byte_model[m_cur_state])) return false; + + m_cur_state = (m_cur_state < CLZBase::cNumLitStates) ? 9 : 11; + } + else + { + // normal rep0 + if (!codec.encode(0, m_is_rep0_single_byte_model[m_cur_state])) return false; + + if (lzdec.m_len > CLZBase::cMaxMatchLen) + { + if (!codec.encode((CLZBase::cMaxMatchLen + 1) - CLZBase::cMinMatchLen, m_rep_len_table[m_cur_state >= CLZBase::cNumLitStates])) return false; + if (!codec.encode_bits(get_huge_match_code_bits(lzdec.m_len), get_huge_match_code_len(lzdec.m_len))) return false; + } + else + { + if (!codec.encode(lzdec.m_len - CLZBase::cMinMatchLen, m_rep_len_table[m_cur_state >= CLZBase::cNumLitStates])) return false; + } + + m_cur_state = (m_cur_state < CLZBase::cNumLitStates) ? 8 : 11; + } + } + else + { + // rep1-rep3 match + if (!codec.encode(0, m_is_rep0_model[m_cur_state])) return false; + + if (lzdec.m_len > CLZBase::cMaxMatchLen) + { + if (!codec.encode((CLZBase::cMaxMatchLen + 1) - CLZBase::cMinMatchLen, m_rep_len_table[m_cur_state >= CLZBase::cNumLitStates])) return false; + if (!codec.encode_bits(get_huge_match_code_bits(lzdec.m_len), get_huge_match_code_len(lzdec.m_len))) return false; + } + else + { + if (!codec.encode(lzdec.m_len - CLZBase::cMinMatchLen, m_rep_len_table[m_cur_state >= CLZBase::cNumLitStates])) return false; + } + + if (match_hist_index == 1) + { + // rep1 + if (!codec.encode(1, m_is_rep1_model[m_cur_state])) return false; + + std::swap(m_match_hist[0], m_match_hist[1]); + } + else + { + if (!codec.encode(0, m_is_rep1_model[m_cur_state])) return false; + + if (match_hist_index == 2) + { + // rep2 + if (!codec.encode(1, m_is_rep2_model[m_cur_state])) return false; + + int dist = m_match_hist[2]; + m_match_hist[2] = m_match_hist[1]; + m_match_hist[1] = m_match_hist[0]; + m_match_hist[0] = dist; + } + else + { + // rep3 + if (!codec.encode(0, m_is_rep2_model[m_cur_state])) return false; + + int dist = m_match_hist[3]; + m_match_hist[3] = m_match_hist[2]; + m_match_hist[2] = m_match_hist[1]; + m_match_hist[1] = m_match_hist[0]; + m_match_hist[0] = dist; + } + } + + m_cur_state = (m_cur_state < CLZBase::cNumLitStates) ? 8 : 11; + } + } + else + { + if (!codec.encode(0, m_is_rep_model[m_cur_state])) return false; + + LZHAM_ASSERT(lzdec.m_len >= CLZBase::cMinMatchLen); + + // full match + uint match_slot, match_extra; + lzbase.compute_lzx_position_slot(lzdec.m_dist, match_slot, match_extra); + + uint match_low_sym = 0; + int large_len_sym = -1; + if (lzdec.m_len >= 9) + { + match_low_sym = 7; + + large_len_sym = lzdec.m_len - 9; + } + else + match_low_sym = lzdec.m_len - 2; + + uint match_high_sym = 0; + + LZHAM_ASSERT(match_slot >= CLZBase::cLZXLowestUsableMatchSlot && (match_slot < lzbase.m_num_lzx_slots)); + match_high_sym = match_slot - CLZBase::cLZXLowestUsableMatchSlot; + + uint main_sym = match_low_sym | (match_high_sym << 3); + + if (!codec.encode(CLZBase::cLZXNumSpecialLengths + main_sym, m_main_table)) return false; + + if (large_len_sym >= 0) + { + if (lzdec.m_len > CLZBase::cMaxMatchLen) + { + if (!codec.encode((CLZBase::cMaxMatchLen + 1) - 9, m_large_len_table[m_cur_state >= CLZBase::cNumLitStates])) return false; + if (!codec.encode_bits(get_huge_match_code_bits(lzdec.m_len), get_huge_match_code_len(lzdec.m_len))) return false; + } + else + { + if (!codec.encode(large_len_sym, m_large_len_table[m_cur_state >= CLZBase::cNumLitStates])) return false; + } + } + + uint num_extra_bits = lzbase.m_lzx_position_extra_bits[match_slot]; + if (num_extra_bits < 3) + { + if (!codec.encode_bits(match_extra, num_extra_bits)) return false; + } + else + { + if (num_extra_bits > 4) + { + if (!codec.encode_bits((match_extra >> 4), num_extra_bits - 4)) return false; + } + + if (!codec.encode(match_extra & 15, m_dist_lsb_table)) return false; + } + + update_match_hist(lzdec.m_dist); + + m_cur_state = (m_cur_state < CLZBase::cNumLitStates) ? CLZBase::cNumLitStates : CLZBase::cNumLitStates + 3; + } + +#ifdef LZHAM_LZDEBUG + if (!codec.encode_bits(m_match_hist[0], 29)) return false; +#endif + } + + m_cur_ofs = lzdec.m_pos + lzdec.get_len(); + return true; + } + + void lzcompressor::state::print(symbol_codec& codec, CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec) + { + LZHAM_NOTE_UNUSED(codec), LZHAM_NOTE_UNUSED(lzbase), LZHAM_NOTE_UNUSED(dict); + + const uint lit_pred0 = get_pred_char(dict, lzdec.m_pos, 1); + + uint is_match_model_index = LZHAM_IS_MATCH_MODEL_INDEX(lit_pred0, m_cur_state); + + printf(" pos: %u, state: %u, match_pred: %u, is_match_model_index: %u, is_match: %u, cost: %f\n", + lzdec.m_pos, + m_cur_state, + lit_pred0, is_match_model_index, lzdec.is_match(), get_cost(lzbase, dict, lzdec) / (float)cBitCostScale); + + if (!lzdec.is_match()) + { + const uint lit = dict[lzdec.m_pos]; + + if (m_cur_state < CLZBase::cNumLitStates) + { + const uint lit_pred1 = get_pred_char(dict, lzdec.m_pos, 2); + + uint lit_pred = (lit_pred0 >> (8 - CLZBase::cNumLitPredBits/2)) | + (((lit_pred1 >> (8 - CLZBase::cNumLitPredBits/2)) << CLZBase::cNumLitPredBits/2)); + + printf("---Regular lit: %u '%c', lit_pred: %u '%c'\n", + lit, ((lit >= 32) && (lit <= 127)) ? lit : '.', + lit_pred, ((lit_pred >= 32) && (lit_pred <= 127)) ? lit_pred : '.'); + } + else + { + // delta literal + const uint rep_lit0 = dict[(lzdec.m_pos - m_match_hist[0]) & dict.m_max_dict_size_mask]; + const uint rep_lit1 = dict[(lzdec.m_pos - m_match_hist[0] - 1) & dict.m_max_dict_size_mask]; + + uint delta_lit = rep_lit0 ^ lit; + + uint lit_pred = (rep_lit0 >> (8 - CLZBase::cNumDeltaLitPredBits/2)) | + ((rep_lit1 >> (8 - CLZBase::cNumDeltaLitPredBits/2)) << CLZBase::cNumDeltaLitPredBits/2); + + printf("***Delta lit: %u '%c', Mismatch: %u '%c', Delta: 0x%02X, lit_pred: %u\n", + lit, ((lit >= 32) && (lit <= 127)) ? lit : '.', + rep_lit0, ((rep_lit0 >= 32) && (rep_lit0 <= 127)) ? rep_lit0 : '.', + delta_lit, lit_pred); + } + } + else + { + uint actual_match_len = dict.get_match_len(0, lzdec.get_match_dist(*this), CLZBase::cMaxMatchLen); + LZHAM_ASSERT(actual_match_len >= lzdec.get_len()); + + // match + if (lzdec.m_dist < 0) + { + int match_hist_index = -lzdec.m_dist - 1; + + if (!match_hist_index) + { + if (lzdec.m_len == 1) + { + printf("!!!Rep 0 len1\n"); + } + else + { + printf("!!!Rep 0 full len %u\n", lzdec.m_len); + } + } + else + { + printf("!!!Rep %u full len %u\n", match_hist_index, lzdec.m_len); + } + } + else + { + LZHAM_ASSERT(lzdec.m_len >= CLZBase::cMinMatchLen); + + // full match + uint match_slot, match_extra; + lzbase.compute_lzx_position_slot(lzdec.m_dist, match_slot, match_extra); + + uint match_low_sym = 0; LZHAM_NOTE_UNUSED(match_low_sym); + int large_len_sym = -1; LZHAM_NOTE_UNUSED(large_len_sym); + if (lzdec.m_len >= 9) + { + match_low_sym = 7; + + large_len_sym = lzdec.m_len - 9; + } + else + match_low_sym = lzdec.m_len - 2; + + uint match_high_sym = 0; LZHAM_NOTE_UNUSED(match_high_sym); + + LZHAM_ASSERT(match_slot >= CLZBase::cLZXLowestUsableMatchSlot && (match_slot < lzbase.m_num_lzx_slots)); + match_high_sym = match_slot - CLZBase::cLZXLowestUsableMatchSlot; + + //uint main_sym = match_low_sym | (match_high_sym << 3); + + uint num_extra_bits = lzbase.m_lzx_position_extra_bits[match_slot]; + printf("^^^Full match Len %u Dist %u, Slot %u, ExtraBits: %u", lzdec.m_len, lzdec.m_dist, match_slot, num_extra_bits); + + if (num_extra_bits < 3) + { + } + else + { + printf(" (Low 4 bits: %u vs. %u)", lzdec.m_dist & 15, match_extra & 15); + } + printf("\n"); + } + + if (actual_match_len > lzdec.get_len()) + { + printf(" TRUNCATED match, actual len is %u, shortened by %u\n", actual_match_len, actual_match_len - lzdec.get_len()); + } + } + } + + bool lzcompressor::state::encode_eob(symbol_codec& codec, const search_accelerator& dict, uint dict_pos) + { +#ifdef LZHAM_LZDEBUG + if (!codec.encode_bits(CLZBase::cLZHAMDebugSyncMarkerValue, CLZBase::cLZHAMDebugSyncMarkerBits)) return false; + if (!codec.encode_bits(1, 1)) return false; + if (!codec.encode_bits(0, 17)) return false; + if (!codec.encode_bits(m_cur_state, 4)) return false; +#endif + + const uint match_pred = get_pred_char(dict, dict_pos, 1); + uint is_match_model_index = LZHAM_IS_MATCH_MODEL_INDEX(match_pred, m_cur_state); + if (!codec.encode(1, m_is_match_model[is_match_model_index])) return false; + + // full match + if (!codec.encode(0, m_is_rep_model[m_cur_state])) return false; + + return codec.encode(CLZBase::cLZXSpecialCodeEndOfBlockCode, m_main_table); + } + + bool lzcompressor::state::encode_reset_state_partial(symbol_codec& codec, const search_accelerator& dict, uint dict_pos) + { +#ifdef LZHAM_LZDEBUG + if (!codec.encode_bits(CLZBase::cLZHAMDebugSyncMarkerValue, CLZBase::cLZHAMDebugSyncMarkerBits)) return false; + if (!codec.encode_bits(1, 1)) return false; + if (!codec.encode_bits(0, 17)) return false; + if (!codec.encode_bits(m_cur_state, 4)) return false; +#endif + + const uint match_pred = get_pred_char(dict, dict_pos, 1); + uint is_match_model_index = LZHAM_IS_MATCH_MODEL_INDEX(match_pred, m_cur_state); + if (!codec.encode(1, m_is_match_model[is_match_model_index])) return false; + + // full match + if (!codec.encode(0, m_is_rep_model[m_cur_state])) return false; + + if (!codec.encode(CLZBase::cLZXSpecialCodePartialStateReset, m_main_table)) + return false; + + reset_state_partial(); + return true; + } + + void lzcompressor::state::update_match_hist(uint match_dist) + { + LZHAM_ASSUME(CLZBase::cMatchHistSize == 4); + m_match_hist[3] = m_match_hist[2]; + m_match_hist[2] = m_match_hist[1]; + m_match_hist[1] = m_match_hist[0]; + m_match_hist[0] = match_dist; + } + + int lzcompressor::state::find_match_dist(uint match_dist) const + { + for (uint match_hist_index = 0; match_hist_index < CLZBase::cMatchHistSize; match_hist_index++) + if (match_dist == m_match_hist[match_hist_index]) + return match_hist_index; + + return -1; + } + + void lzcompressor::state::reset_state_partial() + { + LZHAM_ASSUME(CLZBase::cMatchHistSize == 4); + m_match_hist[0] = 1; + m_match_hist[1] = 1; + m_match_hist[2] = 1; + m_match_hist[3] = 1; + m_cur_state = 0; + } + + void lzcompressor::state::start_of_block(const search_accelerator& dict, uint cur_ofs, uint block_index) + { + LZHAM_NOTE_UNUSED(dict), LZHAM_NOTE_UNUSED(block_index); + + reset_state_partial(); + + m_cur_ofs = cur_ofs; + m_block_start_dict_ofs = cur_ofs; + } + + void lzcompressor::state::reset_update_rate() + { + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_lit_table); i++) + m_lit_table[i].reset_update_rate(); + + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_delta_lit_table); i++) + m_delta_lit_table[i].reset_update_rate(); + + m_main_table.reset_update_rate(); + + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_rep_len_table); i++) + m_rep_len_table[i].reset_update_rate(); + + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_large_len_table); i++) + m_large_len_table[i].reset_update_rate(); + + m_dist_lsb_table.reset_update_rate(); + } + + void lzcompressor::coding_stats::clear() + { + m_total_bytes = 0; + m_total_contexts = 0; + m_total_match_bits_cost = 0; + m_worst_match_bits_cost = 0; + m_total_is_match0_bits_cost = 0; + m_total_is_match1_bits_cost = 0; + m_context_stats.clear(); + + m_total_nonmatches = 0; + m_total_matches = 0; + m_total_cost = 0.0f; + + m_lit_stats.clear(); + m_delta_lit_stats.clear(); + + m_rep0_len1_stats.clear(); + for (uint i = 0; i < CLZBase::cMatchHistSize; i++) + m_rep_stats[i].clear(); + m_rep0_len1_stats.clear(); + m_rep0_len2_plus_stats.clear(); + + for (uint i = 0; i <= CLZBase::cMaxMatchLen; i++) + m_full_match_stats[i].clear(); + + m_total_far_len2_matches = 0; + m_total_near_len2_matches = 0; + + m_total_truncated_matches = 0; + utils::zero_object(m_match_truncation_len_hist); + utils::zero_object(m_match_truncation_hist); + utils::zero_object(m_match_type_truncation_hist); + utils::zero_object(m_match_type_was_not_truncated_hist); + + m_total_update_rate_resets = 0; + + m_max_len2_dist = 0; + } + + void lzcompressor::coding_stats::print() + { + if (!m_total_contexts) + return; + + printf("-----------\n"); + printf("Coding statistics:\n"); + printf("Total update rate resets: %u\n", m_total_update_rate_resets); + printf("Total Bytes: %u, Total Contexts: %u, Total Cost: %f bits (%f bytes)\nContext ave cost: %f StdDev: %f Min: %f Max: %f\n", m_total_bytes, m_total_contexts, m_total_cost, m_total_cost / 8.0f, m_context_stats.get_average(), m_context_stats.get_std_dev(), m_context_stats.get_min_val(), m_context_stats.get_max_val()); + printf("Ave bytes per context: %f\n", m_total_bytes / (float)m_total_contexts); + + printf("IsMatch:\n"); + printf(" Total: %u, Cost: %f (%f bytes), Ave. Cost: %f, Worst Cost: %f\n", + m_total_contexts, m_total_match_bits_cost, m_total_match_bits_cost / 8.0f, m_total_match_bits_cost / math::maximum(1, m_total_contexts), m_worst_match_bits_cost); + + printf(" IsMatch(0): %u, Cost: %f (%f bytes), Ave. Cost: %f\n", + m_total_nonmatches, m_total_is_match0_bits_cost, m_total_is_match0_bits_cost / 8.0f, m_total_is_match0_bits_cost / math::maximum(1, m_total_nonmatches)); + + printf(" IsMatch(1): %u, Cost: %f (%f bytes), Ave. Cost: %f\n", + m_total_matches, m_total_is_match1_bits_cost, m_total_is_match1_bits_cost / 8.0f, m_total_is_match1_bits_cost / math::maximum(1, m_total_matches)); + + printf("Literal stats:\n"); + printf(" Count: %u, Cost: %f (%f bytes), Ave: %f StdDev: %f Min: %f Max: %f\n", m_lit_stats.get_number_of_values32(), m_lit_stats.get_total(), m_lit_stats.get_total() / 8.0f, m_lit_stats.get_average(), m_lit_stats.get_std_dev(), m_lit_stats.get_min_val(), m_lit_stats.get_max_val()); + + printf("Delta literal stats:\n"); + printf(" Count: %u, Cost: %f (%f bytes), Ave: %f StdDev: %f Min: %f Max: %f\n", m_delta_lit_stats.get_number_of_values32(), m_delta_lit_stats.get_total(), m_delta_lit_stats.get_total() / 8.0f, m_delta_lit_stats.get_average(), m_delta_lit_stats.get_std_dev(), m_delta_lit_stats.get_min_val(), m_delta_lit_stats.get_max_val()); + + printf("Rep0 Len1 stats:\n"); + printf(" Count: %u, Cost: %f (%f bytes), Ave. Cost: %f StdDev: %f Min: %f Max: %f\n", m_rep0_len1_stats.get_number_of_values32(), m_rep0_len1_stats.get_total(), m_rep0_len1_stats.get_total() / 8.0f, m_rep0_len1_stats.get_average(), m_rep0_len1_stats.get_std_dev(), m_rep0_len1_stats.get_min_val(), m_rep0_len1_stats.get_max_val()); + + printf("Rep0 Len2+ stats:\n"); + printf(" Count: %u, Cost: %f (%f bytes), Ave. Cost: %f StdDev: %f Min: %f Max: %f\n", m_rep0_len2_plus_stats.get_number_of_values32(), m_rep0_len2_plus_stats.get_total(), m_rep0_len2_plus_stats.get_total() / 8.0f, m_rep0_len2_plus_stats.get_average(), m_rep0_len2_plus_stats.get_std_dev(), m_rep0_len2_plus_stats.get_min_val(), m_rep0_len2_plus_stats.get_max_val()); + + for (uint i = 0; i < CLZBase::cMatchHistSize; i++) + { + printf("Rep %u stats:\n", i); + printf(" Count: %u, Cost: %f (%f bytes), Ave. Cost: %f StdDev: %f Min: %f Max: %f\n", m_rep_stats[i].get_number_of_values32(), m_rep_stats[i].get_total(), m_rep_stats[i].get_total() / 8.0f, m_rep_stats[i].get_average(), m_rep_stats[i].get_std_dev(), m_rep_stats[i].get_min_val(), m_rep_stats[i].get_max_val()); + } + + for (uint i = CLZBase::cMinMatchLen; i <= CLZBase::cMaxMatchLen; i++) + { + printf("Match %u: Total: %u, Cost: %f (%f bytes), Ave: %f StdDev: %f Min: %f Max: %f\n", i, + m_full_match_stats[i].get_number_of_values32(), m_full_match_stats[i].get_total(), m_full_match_stats[i].get_total() / 8.0f, + m_full_match_stats[i].get_average(), m_full_match_stats[i].get_std_dev(), m_full_match_stats[i].get_min_val(), m_full_match_stats[i].get_max_val()); + } + + printf("Total near len2 matches: %u, total far len2 matches: %u\n", m_total_near_len2_matches, m_total_far_len2_matches); + printf("Total matches: %u, truncated matches: %u\n", m_total_matches, m_total_truncated_matches); + printf("Max full match len2 distance: %u\n", m_max_len2_dist); + +#if 0 + printf("Size of truncation histogram:\n"); + for (uint i = 0; i <= CLZBase::cMaxMatchLen; i++) + { + printf("%05u ", m_match_truncation_len_hist[i]); + if ((i & 15) == 15) printf("\n"); + } + printf("\n"); + + printf("Number of truncations per encoded match length histogram:\n"); + for (uint i = 0; i <= CLZBase::cMaxMatchLen; i++) + { + printf("%05u ", m_match_truncation_hist[i]); + if ((i & 15) == 15) printf("\n"); + } + printf("\n"); + + for (uint s = 0; s < CLZBase::cNumStates; s++) + { + printf("-- Match type truncation hist for state %u:\n", s); + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_match_type_truncation_hist[s]); i++) + { + printf("%u truncated (%3.1f%%), %u not truncated\n", m_match_type_truncation_hist[s][i], 100.0f * (float)m_match_type_truncation_hist[s][i] / (m_match_type_truncation_hist[s][i] + m_match_type_was_not_truncated_hist[s][i]), m_match_type_was_not_truncated_hist[s][i]); + } + } +#endif + } + + void lzcompressor::coding_stats::update(const lzdecision& lzdec, const state& cur_state, const search_accelerator& dict, bit_cost_t cost) + { + m_total_bytes += lzdec.get_len(); + m_total_contexts++; + + float cost_in_bits = cost / (float)cBitCostScale; + LZHAM_ASSERT(cost_in_bits > 0.0f); + m_total_cost += cost_in_bits; + + m_context_stats.update(cost_in_bits); + + uint match_pred = cur_state.get_pred_char(dict, lzdec.m_pos, 1); + uint is_match_model_index = LZHAM_IS_MATCH_MODEL_INDEX(match_pred, cur_state.m_cur_state); + + if (lzdec.m_len == 0) + { + float match_bit_cost = cur_state.m_is_match_model[is_match_model_index].get_cost(0) / (float)cBitCostScale; + + m_total_is_match0_bits_cost += match_bit_cost; + m_total_match_bits_cost += match_bit_cost; + m_worst_match_bits_cost = math::maximum(m_worst_match_bits_cost, static_cast(match_bit_cost)); + m_total_nonmatches++; + + if (cur_state.m_cur_state < CLZBase::cNumLitStates) + { + m_lit_stats.update(cost_in_bits); + } + else + { + m_delta_lit_stats.update(cost_in_bits); + } + } + else if (lzdec.m_len <= CLZBase::cMaxMatchLen) + { + const uint match_len = lzdec.get_len(); + + { + uint match_dist = lzdec.get_match_dist(cur_state); + + uint cur_lookahead_size = dict.get_lookahead_size(); + + uint actual_match_len = dict.get_match_len(0, match_dist, LZHAM_MIN(cur_lookahead_size, static_cast(CLZBase::cMaxMatchLen))); + LZHAM_VERIFY(match_len <= actual_match_len); + + m_total_truncated_matches += match_len < actual_match_len; + m_match_truncation_len_hist[math::maximum(0, actual_match_len - match_len)]++; + + uint type_index = 4; + if (!lzdec.is_full_match()) + { + LZHAM_ASSUME(CLZBase::cMatchHistSize == 4); + type_index = -lzdec.m_dist - 1; + } + + if (actual_match_len > match_len) + { + m_match_truncation_hist[match_len]++; + + m_match_type_truncation_hist[cur_state.m_cur_state][type_index]++; + } + else + { + m_match_type_was_not_truncated_hist[cur_state.m_cur_state][type_index]++; + } + } + + float match_bit_cost = cur_state.m_is_match_model[is_match_model_index].get_cost(1) / (float)cBitCostScale; + m_total_is_match1_bits_cost += match_bit_cost; + m_total_match_bits_cost += match_bit_cost; + m_worst_match_bits_cost = math::maximum(m_worst_match_bits_cost, static_cast(match_bit_cost)); + m_total_matches++; + + if (lzdec.m_dist < 0) + { + // rep match + int match_hist_index = -lzdec.m_dist - 1; + LZHAM_ASSERT(match_hist_index < CLZBase::cMatchHistSize); + + m_rep_stats[match_hist_index].update(cost_in_bits); + + if (!match_hist_index) + { + // rep0 match + if (lzdec.m_len == 1) + { + m_rep0_len1_stats.update(cost_in_bits); + } + else + { + m_rep0_len2_plus_stats.update(cost_in_bits); + } + } + } + else + { + m_full_match_stats[math::minimum(cMaxMatchLen, match_len)].update(cost_in_bits); + + if (match_len == 2) + { + if (lzdec.m_dist <= 512) + m_total_near_len2_matches++; + else + m_total_far_len2_matches++; + + m_max_len2_dist = LZHAM_MAX((int)m_max_len2_dist, lzdec.m_dist); + } + } + } + else + { + // TODO: Handle huge matches. + } + } +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/lzhamdecomp/lzham_decomp.h b/r5dev/thirdparty/lzham/lzhamdecomp/lzham_decomp.h new file mode 100644 index 00000000..eb9a74d7 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzhamdecomp/lzham_decomp.h @@ -0,0 +1,37 @@ +// File: lzham_decomp.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once +#include "../include/lzham.h" + +namespace lzham +{ + void LZHAM_CDECL lzham_lib_set_memory_callbacks(lzham_realloc_func pRealloc, lzham_msize_func pMSize, void* pUser_data); + + lzham_decompress_state_ptr LZHAM_CDECL lzham_lib_decompress_init(const lzham_decompress_params *pParams); + + lzham_decompress_state_ptr LZHAM_CDECL lzham_lib_decompress_reinit(lzham_decompress_state_ptr pState, const lzham_decompress_params *pParams); + + lzham_decompress_checksums* LZHAM_CDECL lzham_lib_decompress_deinit(lzham_decompress_state_ptr pState); + + lzham_decompress_status_t LZHAM_CDECL lzham_lib_decompress( + lzham_decompress_state_ptr pState, + const lzham_uint8 *pIn_buf, size_t *pIn_buf_size, + lzham_uint8 *pOut_buf, size_t *pOut_buf_size, + lzham_bool no_more_input_bytes_flag); + + lzham_decompress_status_t LZHAM_CDECL lzham_lib_decompress_memory(const lzham_decompress_params *pParams, + lzham_uint8* pDst_buf, size_t *pDst_len, + const lzham_uint8* pSrc_buf, size_t src_len, lzham_uint32 *pAdler32, lzham_uint32* pCrc32); + + int LZHAM_CDECL lzham_lib_z_inflateInit2(lzham_z_streamp pStream, int window_bits); + int LZHAM_CDECL lzham_lib_z_inflateInit(lzham_z_streamp pStream); + int LZHAM_CDECL lzham_lib_z_inflateReset(lzham_z_streamp pStream); + int LZHAM_CDECL lzham_lib_z_inflate(lzham_z_streamp pStream, int flush); + int LZHAM_CDECL lzham_lib_z_inflateEnd(lzham_z_streamp pStream); + int LZHAM_CDECL lzham_lib_z_uncompress(unsigned char *pDest, lzham_z_ulong *pDest_len, const unsigned char *pSource, lzham_z_ulong source_len); + + const char * LZHAM_CDECL lzham_lib_z_error(int err); + lzham_z_ulong lzham_lib_z_adler32(lzham_z_ulong adler, const unsigned char *ptr, size_t buf_len); + lzham_z_ulong LZHAM_CDECL lzham_lib_z_crc32(lzham_z_ulong crc, const lzham_uint8 *ptr, size_t buf_len); + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/lzhamdecomp/lzham_lzdecomp.cpp b/r5dev/thirdparty/lzham/lzhamdecomp/lzham_lzdecomp.cpp new file mode 100644 index 00000000..53deceb1 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzhamdecomp/lzham_lzdecomp.cpp @@ -0,0 +1,1590 @@ +// File: lzham_lzdecomp.cpp +// See Copyright Notice and license at the end of include/lzham.h +// +// See "Coroutines in C": +// http://www.chiark.greenend.org.uk/~sgtatham/coroutines.html +// Also see "Protothreads - Lightweight, Stackless Threads in C": +// http://www.sics.se/~adam/pt/ +#include "../include/lzham_core.h" +#include "lzham_decomp.h" +#include "../include/lzham_symbol_codec.h" +#include "../include/lzham_checksum.h" +#include "lzham_lzdecompbase.h" + +using namespace lzham; + +namespace lzham +{ + static const uint8 s_literal_next_state[24] = + { + 0, 0, 0, 0, 1, 2, 3, // 0-6: literal states + 4, 5, 6, 4, 5, // 7-11: match states + 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10 // 12-23: unused + }; + + static const uint s_huge_match_base_len[4] = { CLZDecompBase::cMaxMatchLen + 1, CLZDecompBase::cMaxMatchLen + 1 + 256, CLZDecompBase::cMaxMatchLen + 1 + 256 + 1024, CLZDecompBase::cMaxMatchLen + 1 + 256 + 1024 + 4096 }; + static const uint8 s_huge_match_code_len[4] = { 8, 10, 12, 16 }; + + struct lzham_decompressor + { + void init(); + + template lzham_decompress_status_t decompress(); + + void reset_all_tables(); + void reset_huffman_table_update_rates(); + + int m_state; + + CLZDecompBase m_lzBase; + symbol_codec m_codec; + + uint32 m_raw_decomp_buf_size; + uint8 *m_pRaw_decomp_buf; + uint8 *m_pDecomp_buf; + uint32 m_decomp_adler32; + uint32 m_decomp_crc32; + + const uint8 *m_pIn_buf; + size_t *m_pIn_buf_size; + uint8 *m_pOut_buf; + size_t *m_pOut_buf_size; + bool m_no_more_input_bytes_flag; + + uint8 *m_pOrig_out_buf; + size_t m_orig_out_buf_size; + + lzham_decompress_params m_params; + + lzham_decompress_status_t m_status; + +#if LZHAM_USE_ALL_ARITHMETIC_CODING + typedef adaptive_arith_data_model sym_data_model; +#else + typedef quasi_adaptive_huffman_data_model sym_data_model; +#endif + + sym_data_model m_lit_table[1 << CLZDecompBase::cNumLitPredBits]; + sym_data_model m_delta_lit_table[1 << CLZDecompBase::cNumDeltaLitPredBits]; + sym_data_model m_main_table; + sym_data_model m_rep_len_table[2]; + sym_data_model m_large_len_table[2]; + sym_data_model m_dist_lsb_table; + + adaptive_bit_model m_is_match_model[CLZDecompBase::cNumStates * (1 << CLZDecompBase::cNumIsMatchContextBits)]; + adaptive_bit_model m_is_rep_model[CLZDecompBase::cNumStates]; + adaptive_bit_model m_is_rep0_model[CLZDecompBase::cNumStates]; + adaptive_bit_model m_is_rep0_single_byte_model[CLZDecompBase::cNumStates]; + adaptive_bit_model m_is_rep1_model[CLZDecompBase::cNumStates]; + adaptive_bit_model m_is_rep2_model[CLZDecompBase::cNumStates]; + + uint m_dst_ofs; + + uint m_step; + uint m_block_step; + uint m_initial_step; + + uint m_block_index; + + int m_match_hist0; + int m_match_hist1; + int m_match_hist2; + int m_match_hist3; + uint m_cur_state; + + uint m_start_block_dst_ofs; + uint m_prev_char; + uint m_prev_prev_char; + + uint m_block_type; + + const uint8 *m_pFlush_src; + size_t m_flush_num_bytes_remaining; + size_t m_flush_n; + + uint m_seed_bytes_to_ignore_when_flushing; + + uint m_file_src_file_adler32; + uint m_file_src_file_crc32; + + uint m_rep_lit0; + uint m_match_len; + uint m_match_slot; + uint m_extra_bits; + uint m_num_extra_bits; + + uint m_src_ofs; + const uint8* m_pCopy_src; + uint m_num_raw_bytes_remaining; + + uint m_debug_is_match; + uint m_debug_match_len; + uint m_debug_match_dist; + uint m_debug_lit; + + lzham_decompress_status_t m_z_last_status; + uint m_z_first_call; + uint m_z_has_flushed; + uint m_z_cmf; + uint m_z_flg; + uint m_z_dict_adler32; + + uint m_tmp; + }; + + // Ordinarily I dislike macros like this, but in this case I think using them makes the decompression function easier to follow. + + // Coroutine helpers. + #define LZHAM_CR_INITIAL_STATE 0 + #define LZHAM_CR_BEGIN(state) switch( state ) { case LZHAM_CR_INITIAL_STATE: + #define LZHAM_CR_RETURN(state, result) do { state = __LINE__; return (result); case __LINE__:; } while (0) + #define LZHAM_CR_FINISH } + + // Helpers to save/restore local variables (hopefully CPU registers) to memory. + #define LZHAM_RESTORE_STATE LZHAM_RESTORE_LOCAL_STATE \ + match_hist0 = m_match_hist0; match_hist1 = m_match_hist1; match_hist2 = m_match_hist2; match_hist3 = m_match_hist3; \ + cur_state = m_cur_state; prev_char = m_prev_char; prev_prev_char = m_prev_prev_char; dst_ofs = m_dst_ofs; + + #define LZHAM_SAVE_STATE LZHAM_SAVE_LOCAL_STATE \ + m_match_hist0 = match_hist0; m_match_hist1 = match_hist1; m_match_hist2 = match_hist2; m_match_hist3 = match_hist3; \ + m_cur_state = cur_state; m_prev_char = prev_char; m_prev_prev_char = prev_prev_char; m_dst_ofs = dst_ofs; + + // Helper that coroutine returns to the caller with a request for more input bytes. + #define LZHAM_DECODE_NEEDS_BYTES \ + LZHAM_SAVE_STATE \ + for ( ; ; ) \ + { \ + *m_pIn_buf_size = static_cast(m_codec.decode_get_bytes_consumed()); \ + *m_pOut_buf_size = 0; \ + LZHAM_CR_RETURN(m_state, LZHAM_DECOMP_STATUS_NEEDS_MORE_INPUT); \ + m_codec.decode_set_input_buffer(m_pIn_buf, *m_pIn_buf_size, m_pIn_buf, m_no_more_input_bytes_flag); \ + if ((m_codec.m_decode_buf_eof) || (m_codec.m_decode_buf_size)) break; \ + } \ + LZHAM_RESTORE_STATE + + #if LZHAM_PLATFORM_X360 + #define LZHAM_BULK_MEMCPY XMemCpy + #define LZHAM_MEMCPY memcpy + #else + #define LZHAM_BULK_MEMCPY memcpy + #define LZHAM_MEMCPY memcpy + #endif + // Flush the output buffer/dictionary by doing a coroutine return to the caller. + // The caller must permit the decompressor to flush total_bytes from the dictionary, or (in the + // case of corrupted data, or a bug) we must report a DEST_BUF_TOO_SMALL error. + #define LZHAM_FLUSH_OUTPUT_BUFFER(total_bytes) \ + LZHAM_SAVE_STATE \ + m_pFlush_src = m_pDecomp_buf + m_seed_bytes_to_ignore_when_flushing; \ + m_flush_num_bytes_remaining = total_bytes - m_seed_bytes_to_ignore_when_flushing; \ + m_seed_bytes_to_ignore_when_flushing = 0; \ + while (m_flush_num_bytes_remaining) \ + { \ + m_flush_n = LZHAM_MIN(m_flush_num_bytes_remaining, *m_pOut_buf_size); \ + if (0 == (m_params.m_decompress_flags & LZHAM_DECOMP_FLAG_COMPUTE_ADLER32)) \ + { \ + LZHAM_BULK_MEMCPY(m_pOut_buf, m_pFlush_src, m_flush_n); \ + } \ + else \ + { \ + size_t copy_ofs = 0; \ + while (copy_ofs < m_flush_n) \ + { \ + const uint cBytesToMemCpyPerIteration = 8192U; \ + size_t bytes_to_copy = LZHAM_MIN((size_t)(m_flush_n - copy_ofs), cBytesToMemCpyPerIteration); \ + LZHAM_MEMCPY(m_pOut_buf + copy_ofs, m_pFlush_src + copy_ofs, bytes_to_copy); \ + m_decomp_adler32 = adler32(m_pFlush_src + copy_ofs, bytes_to_copy, m_decomp_adler32); \ + m_decomp_crc32 = crc32(m_decomp_crc32, m_pFlush_src + copy_ofs, bytes_to_copy); \ + copy_ofs += bytes_to_copy; \ + } \ + } \ + *m_pIn_buf_size = static_cast(m_codec.decode_get_bytes_consumed()); \ + *m_pOut_buf_size = m_flush_n; \ + LZHAM_CR_RETURN(m_state, m_flush_n ? LZHAM_DECOMP_STATUS_NOT_FINISHED : LZHAM_DECOMP_STATUS_HAS_MORE_OUTPUT); \ + m_codec.decode_set_input_buffer(m_pIn_buf, *m_pIn_buf_size, m_pIn_buf, m_no_more_input_bytes_flag); \ + m_pFlush_src += m_flush_n; \ + m_flush_num_bytes_remaining -= m_flush_n; \ + } \ + LZHAM_RESTORE_STATE \ + + #if LZHAM_USE_ALL_ARITHMETIC_CODING + #define LZHAM_DECOMPRESS_DECODE_ADAPTIVE_SYMBOL(codec, result, model) LZHAM_SYMBOL_CODEC_DECODE_ADAPTIVE_ARITHMETIC(codec, result, model) + #else + #define LZHAM_DECOMPRESS_DECODE_ADAPTIVE_SYMBOL(codec, result, model) LZHAM_SYMBOL_CODEC_DECODE_ADAPTIVE_HUFFMAN(codec, result, model) + #endif + + //------------------------------------------------------------------------------------------------------------------ + void lzham_decompressor::init() + { + m_lzBase.init_position_slots(m_params.m_dict_size_log2); + +#ifdef LZHAM_LZDEBUG + if (m_pDecomp_buf) + memset(m_pDecomp_buf, 0xCE, 1U << m_params.m_dict_size_log2); +#endif + + m_state = LZHAM_CR_INITIAL_STATE; + m_step = 0; + m_block_step = 0; + m_block_index = 0; + m_initial_step = 0; + + m_dst_ofs = 0; + + m_pIn_buf = NULL; + m_pIn_buf_size = NULL; + m_pOut_buf = NULL; + m_pOut_buf_size = NULL; + m_no_more_input_bytes_flag = false; + m_status = LZHAM_DECOMP_STATUS_NOT_FINISHED; + m_pOrig_out_buf = NULL; + m_orig_out_buf_size = 0; + m_decomp_adler32 = cInitAdler32; + m_decomp_crc32 = cInitCRC32; + m_seed_bytes_to_ignore_when_flushing = 0; + + m_z_last_status = LZHAM_DECOMP_STATUS_NOT_FINISHED; + m_z_first_call = 1; + m_z_has_flushed = 0; + m_z_cmf = 0; + m_z_flg = 0; + m_z_dict_adler32 = 0; + + m_tmp = 0; + } + + void lzham_decompressor::reset_all_tables() + { + m_lit_table[0].reset(); + for (uint i = 1; i < LZHAM_ARRAY_SIZE(m_lit_table); i++) + m_lit_table[i] = m_lit_table[0]; + + m_delta_lit_table[0].reset(); + for (uint i = 1; i < LZHAM_ARRAY_SIZE(m_delta_lit_table); i++) + m_delta_lit_table[i] = m_delta_lit_table[0]; + + m_main_table.reset(); + + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_rep_len_table); i++) + m_rep_len_table[i].reset(); + + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_large_len_table); i++) + m_large_len_table[i].reset(); + + m_dist_lsb_table.reset(); + + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_is_match_model); i++) + m_is_match_model[i].clear(); + + for (uint i = 0; i < CLZDecompBase::cNumStates; i++) + { + m_is_rep_model[i].clear(); + m_is_rep0_model[i].clear(); + m_is_rep0_single_byte_model[i].clear(); + m_is_rep1_model[i].clear(); + m_is_rep2_model[i].clear(); + } + } + + void lzham_decompressor::reset_huffman_table_update_rates() + { + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_lit_table); i++) + m_lit_table[i].reset_update_rate(); + + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_delta_lit_table); i++) + m_delta_lit_table[i].reset_update_rate(); + + m_main_table.reset_update_rate(); + + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_rep_len_table); i++) + m_rep_len_table[i].reset_update_rate(); + + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_large_len_table); i++) + m_large_len_table[i].reset_update_rate(); + + m_dist_lsb_table.reset_update_rate(); + } + + //------------------------------------------------------------------------------------------------------------------ + // Decompression method. Implemented as a coroutine so it can be paused and resumed to support streaming. + //------------------------------------------------------------------------------------------------------------------ + template + lzham_decompress_status_t lzham_decompressor::decompress() + { + // Important: This function is a coroutine. ANY locals variables that need to be preserved across coroutine + // returns must be either be a member variable, or a local which is saved/restored to a member variable at + // the right times. (This makes this function difficult to follow and freaking ugly due to the macros of doom - but hey it works.) + // The most often used variables are in locals so the compiler hopefully puts them into CPU registers. + symbol_codec &codec = m_codec; + const uint dict_size = 1U << m_params.m_dict_size_log2; + const uint dict_size_mask = unbuffered ? UINT_MAX : (dict_size - 1); + + int match_hist0 = 0, match_hist1 = 0, match_hist2 = 0, match_hist3 = 0; + uint cur_state = 0, prev_char = 0, prev_prev_char = 0, dst_ofs = 0; + + const size_t out_buf_size = *m_pOut_buf_size; + + uint8* pDst = unbuffered ? reinterpret_cast(m_pOut_buf) : reinterpret_cast(m_pDecomp_buf); + uint8* pDst_end = unbuffered ? (reinterpret_cast(m_pOut_buf) + out_buf_size) : (reinterpret_cast(m_pDecomp_buf) + dict_size); + + LZHAM_SYMBOL_CODEC_DECODE_DECLARE(codec); + +#define LZHAM_SAVE_LOCAL_STATE +#define LZHAM_RESTORE_LOCAL_STATE + + // Important: Do not use any switch() statements below here. + LZHAM_CR_BEGIN(m_state) + + if ((!unbuffered) && (m_params.m_num_seed_bytes)) + { + LZHAM_BULK_MEMCPY(pDst, m_params.m_pSeed_bytes, m_params.m_num_seed_bytes); + dst_ofs += m_params.m_num_seed_bytes; + if (dst_ofs >= dict_size) + dst_ofs = 0; + else + m_seed_bytes_to_ignore_when_flushing = dst_ofs; + } + + if (!m_codec.start_decoding(m_pIn_buf, *m_pIn_buf_size, m_no_more_input_bytes_flag, NULL, NULL)) + return LZHAM_DECOMP_STATUS_FAILED_INITIALIZING; + + LZHAM_SYMBOL_CODEC_DECODE_BEGIN(codec); + + { + bool fast_table_updating, use_polar_codes; + + if (m_params.m_decompress_flags & LZHAM_DECOMP_FLAG_READ_ZLIB_STREAM) + { + uint check; + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, m_z_cmf, 8); + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, m_z_flg, 8); + check = ((m_z_cmf << 8) + m_z_flg) % 31; + if ((check != 0) || ((m_z_cmf & 15) != LZHAM_Z_LZHAM)) + return LZHAM_DECOMP_STATUS_FAILED_BAD_ZLIB_HEADER; + if (m_z_flg & 32) + { + if ((!m_params.m_pSeed_bytes) || (unbuffered)) + return LZHAM_DECOMP_STATUS_FAILED_NEED_SEED_BYTES; + m_z_dict_adler32 = 0; + for (m_tmp = 0; m_tmp < 4; ++m_tmp) + { + uint n; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, n, 8); + m_z_dict_adler32 = (m_z_dict_adler32 << 8) | n; + } + if (adler32(m_params.m_pSeed_bytes, m_params.m_num_seed_bytes) != m_z_dict_adler32) + return LZHAM_DECOMP_STATUS_FAILED_BAD_SEED_BYTES; + } + } + + { + uint tmp; + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, tmp, 2); + fast_table_updating = (tmp & 2) != 0; + use_polar_codes = (tmp & 1) != 0; + } + + bool succeeded = m_lit_table[0].init(false, 256, fast_table_updating, use_polar_codes); + for (uint i = 1; i < LZHAM_ARRAY_SIZE(m_lit_table); i++) + succeeded = succeeded && m_lit_table[i].assign(m_lit_table[0]); + + succeeded = succeeded && m_delta_lit_table[0].init(false, 256, fast_table_updating, use_polar_codes); + for (uint i = 1; i < LZHAM_ARRAY_SIZE(m_delta_lit_table); i++) + succeeded = succeeded && m_delta_lit_table[i].assign(m_delta_lit_table[0]); + + succeeded = succeeded && m_main_table.init(false, CLZDecompBase::cLZXNumSpecialLengths + (m_lzBase.m_num_lzx_slots - CLZDecompBase::cLZXLowestUsableMatchSlot) * 8, fast_table_updating, use_polar_codes); + + for (uint i = 0; i < 2; i++) + { + succeeded = succeeded && m_rep_len_table[i].init(false, CLZDecompBase::cNumHugeMatchCodes + (CLZDecompBase::cMaxMatchLen - CLZDecompBase::cMinMatchLen + 1), fast_table_updating, use_polar_codes); + succeeded = succeeded && m_large_len_table[i].init(false, CLZDecompBase::cNumHugeMatchCodes + CLZDecompBase::cLZXNumSecondaryLengths, fast_table_updating, use_polar_codes); + } + + succeeded = succeeded && m_dist_lsb_table.init(false, 16, fast_table_updating, use_polar_codes); + if (!succeeded) + return LZHAM_DECOMP_STATUS_FAILED_INITIALIZING; + + for (uint i = 0; i < LZHAM_ARRAY_SIZE(m_is_match_model); i++) + m_is_match_model[i].clear(); + + for (uint i = 0; i < CLZDecompBase::cNumStates; i++) + { + m_is_rep_model[i].clear(); + m_is_rep0_model[i].clear(); + m_is_rep0_single_byte_model[i].clear(); + m_is_rep1_model[i].clear(); + m_is_rep2_model[i].clear(); + } + } + + // Output block loop. + do + { +#ifdef LZHAM_LZDEBUG + uint outer_sync_marker; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, k, 12); + LZHAM_VERIFY(outer_sync_marker == 166); +#endif + + // Decode block type. + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, m_block_type, CLZDecompBase::cBlockHeaderBits); + + if (m_block_type == CLZDecompBase::cSyncBlock) + { + // Sync block + // Reset either the symbol table update rates, or all statistics, then force a coroutine return to give the caller a chance to handle the output right now. + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, m_tmp, CLZDecompBase::cBlockFlushTypeBits); + if (m_tmp == 1) + reset_huffman_table_update_rates(); + else if (m_tmp == 2) + reset_all_tables(); + + LZHAM_SYMBOL_CODEC_DECODE_ALIGN_TO_BYTE(codec); + + uint n; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, n, 16); + if (n != 0) + { + LZHAM_SYMBOL_CODEC_DECODE_END(codec); + *m_pIn_buf_size = static_cast(codec.decode_get_bytes_consumed()); + *m_pOut_buf_size = 0; + for ( ; ; ) { LZHAM_CR_RETURN(m_state, LZHAM_DECOMP_STATUS_FAILED_BAD_SYNC_BLOCK); } + } + + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, n, 16); + if (n != 0xFFFF) + { + LZHAM_SYMBOL_CODEC_DECODE_END(codec); + *m_pIn_buf_size = static_cast(codec.decode_get_bytes_consumed()); + *m_pOut_buf_size = 0; + for ( ; ; ) { LZHAM_CR_RETURN(m_state, LZHAM_DECOMP_STATUS_FAILED_BAD_SYNC_BLOCK); } + } + + if (m_tmp == 2) + { + // It's a full flush, so immediately give caller whatever output we have. Also gives the caller a chance to reposition the input stream ptr somewhere else before continuing. + // It would be nice to do this with partial flushes too, but the current way the output buffer is flushed makes this tricky. + LZHAM_SYMBOL_CODEC_DECODE_END(codec); + + if ((!unbuffered) && (dst_ofs)) + { + LZHAM_FLUSH_OUTPUT_BUFFER(dst_ofs); + } + else + { + *m_pIn_buf_size = static_cast(codec.decode_get_bytes_consumed()); + *m_pOut_buf_size = dst_ofs; + + LZHAM_SAVE_STATE + LZHAM_CR_RETURN(m_state, LZHAM_DECOMP_STATUS_NOT_FINISHED); + LZHAM_RESTORE_STATE + + m_codec.decode_set_input_buffer(m_pIn_buf, *m_pIn_buf_size, m_pIn_buf, m_no_more_input_bytes_flag); + } + + LZHAM_SYMBOL_CODEC_DECODE_BEGIN(codec); + + dst_ofs = 0; + } + } + else if (m_block_type == CLZDecompBase::cRawBlock) + { + // Raw block handling is complex because we ultimately want to (safely) handle as many bytes as possible using a small number of memcpy()'s. + uint num_raw_bytes_remaining; + num_raw_bytes_remaining = 0; + +#undef LZHAM_SAVE_LOCAL_STATE +#undef LZHAM_RESTORE_LOCAL_STATE +#define LZHAM_SAVE_LOCAL_STATE m_num_raw_bytes_remaining = num_raw_bytes_remaining; +#define LZHAM_RESTORE_LOCAL_STATE num_raw_bytes_remaining = m_num_raw_bytes_remaining; + + // Determine how large this raw block is. + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, num_raw_bytes_remaining, 24); + + // Get and verify raw block length check bits. + uint num_raw_bytes_check_bits; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, num_raw_bytes_check_bits, 8); + uint raw_bytes_remaining0, raw_bytes_remaining1, raw_bytes_remaining2; + raw_bytes_remaining0 = num_raw_bytes_remaining & 0xFF; + raw_bytes_remaining1 = (num_raw_bytes_remaining >> 8) & 0xFF; + raw_bytes_remaining2 = (num_raw_bytes_remaining >> 16) & 0xFF; + if (num_raw_bytes_check_bits != ((raw_bytes_remaining0 ^ raw_bytes_remaining1) ^ raw_bytes_remaining2)) + { + LZHAM_SYMBOL_CODEC_DECODE_END(codec); + *m_pIn_buf_size = static_cast(codec.decode_get_bytes_consumed()); + *m_pOut_buf_size = 0; + for ( ; ; ) { LZHAM_CR_RETURN(m_state, LZHAM_DECOMP_STATUS_FAILED_BAD_RAW_BLOCK); } + } + + num_raw_bytes_remaining++; + + // Discard any partial bytes from the bit buffer (align up to the next byte). + LZHAM_SYMBOL_CODEC_DECODE_ALIGN_TO_BYTE(codec); + + // Flush any full bytes from the bit buffer. + do + { + int b; + LZHAM_SYMBOL_CODEC_DECODE_REMOVE_BYTE_FROM_BIT_BUF(codec, b); + if (b < 0) + break; + + if ((unbuffered) && (dst_ofs >= out_buf_size)) + { + LZHAM_SYMBOL_CODEC_DECODE_END(codec); + *m_pIn_buf_size = static_cast(codec.decode_get_bytes_consumed()); + *m_pOut_buf_size = 0; + for ( ; ; ) { LZHAM_CR_RETURN(m_state, LZHAM_DECOMP_STATUS_FAILED_DEST_BUF_TOO_SMALL); } + } + + pDst[dst_ofs++] = static_cast(b); + + if ((!unbuffered) && (dst_ofs > dict_size_mask)) + { + LZHAM_SYMBOL_CODEC_DECODE_END(codec); + LZHAM_FLUSH_OUTPUT_BUFFER(dict_size); + LZHAM_SYMBOL_CODEC_DECODE_BEGIN(codec); + dst_ofs = 0; + } + + num_raw_bytes_remaining--; + } while (num_raw_bytes_remaining); + + LZHAM_SYMBOL_CODEC_DECODE_END(codec); + + // Now handle the bulk of the raw data with memcpy(). + while (num_raw_bytes_remaining) + { + uint64 in_buf_ofs, in_buf_remaining; + in_buf_ofs = codec.decode_get_bytes_consumed(); + in_buf_remaining = *m_pIn_buf_size - in_buf_ofs; + + while (!in_buf_remaining) + { + // We need more bytes from the caller. + *m_pIn_buf_size = static_cast(in_buf_ofs); + *m_pOut_buf_size = 0; + + if (m_no_more_input_bytes_flag) + { + for ( ; ; ) { LZHAM_CR_RETURN(m_state, LZHAM_DECOMP_STATUS_FAILED_EXPECTED_MORE_RAW_BYTES); } + } + + LZHAM_SAVE_STATE + LZHAM_CR_RETURN(m_state, LZHAM_DECOMP_STATUS_NEEDS_MORE_INPUT); + LZHAM_RESTORE_STATE + + m_codec.decode_set_input_buffer(m_pIn_buf, *m_pIn_buf_size, m_pIn_buf, m_no_more_input_bytes_flag); + + in_buf_ofs = 0; + in_buf_remaining = *m_pIn_buf_size; + } + + // Determine how many bytes we can safely memcpy() in a single call. + uint num_bytes_to_copy; + num_bytes_to_copy = static_cast(LZHAM_MIN(num_raw_bytes_remaining, in_buf_remaining)); + if (!unbuffered) + num_bytes_to_copy = LZHAM_MIN(num_bytes_to_copy, dict_size - dst_ofs); + + if ((unbuffered) && ((dst_ofs + num_bytes_to_copy) > out_buf_size)) + { + // Output buffer is not large enough. + *m_pIn_buf_size = static_cast(in_buf_ofs); + *m_pOut_buf_size = 0; + for ( ; ; ) { LZHAM_CR_RETURN(m_state, LZHAM_DECOMP_STATUS_FAILED_DEST_BUF_TOO_SMALL); } + } + + // Copy the raw bytes. + LZHAM_BULK_MEMCPY(pDst + dst_ofs, m_pIn_buf + in_buf_ofs, num_bytes_to_copy); + + in_buf_ofs += num_bytes_to_copy; + num_raw_bytes_remaining -= num_bytes_to_copy; + + codec.decode_set_input_buffer(m_pIn_buf, *m_pIn_buf_size, m_pIn_buf + in_buf_ofs, m_no_more_input_bytes_flag); + + dst_ofs += num_bytes_to_copy; + + if ((!unbuffered) && (dst_ofs > dict_size_mask)) + { + LZHAM_ASSERT(dst_ofs == dict_size); + + LZHAM_FLUSH_OUTPUT_BUFFER(dict_size); + + dst_ofs = 0; + } + } + + LZHAM_SYMBOL_CODEC_DECODE_BEGIN(codec); + +#undef LZHAM_SAVE_LOCAL_STATE +#undef LZHAM_RESTORE_LOCAL_STATE +#define LZHAM_SAVE_LOCAL_STATE +#define LZHAM_RESTORE_LOCAL_STATE + } + else if (m_block_type == CLZDecompBase::cCompBlock) + { + LZHAM_SYMBOL_CODEC_DECODE_ARITH_START(codec) + + match_hist0 = 1; + match_hist1 = 1; + match_hist2 = 1; + match_hist3 = 1; + cur_state = 0; + prev_char = 0; + prev_prev_char = 0; + + m_start_block_dst_ofs = dst_ofs; + + { + uint block_flush_type; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, block_flush_type, CLZDecompBase::cBlockFlushTypeBits); + if (block_flush_type == 1) + reset_huffman_table_update_rates(); + else if (block_flush_type == 2) + reset_all_tables(); + } + +#ifdef LZHAM_LZDEBUG + m_initial_step = m_step; + m_block_step = 0; + for ( ; ; m_step++, m_block_step++) +#else + for ( ; ; ) +#endif + { +#ifdef LZHAM_LZDEBUG + uint sync_marker; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, x, CLZDecompBase::cLZHAMDebugSyncMarkerBits); + LZHAM_VERIFY(sync_marker == CLZDecompBase::cLZHAMDebugSyncMarkerValue); + + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, m_debug_is_match, 1); + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, m_debug_match_len, 17); + + uint debug_cur_state; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, debug_cur_state, 4); + LZHAM_VERIFY(cur_state == debug_cur_state); +#endif + +#ifdef _DEBUG +{ + uint total_block_bytes = ((dst_ofs - m_start_block_dst_ofs) & dict_size_mask); + if (total_block_bytes > 0) + { + LZHAM_ASSERT(prev_char == pDst[(dst_ofs - 1) & dict_size_mask]); + } + else + { + LZHAM_ASSERT(prev_char == 0); + } + + if (total_block_bytes > 1) + { + LZHAM_ASSERT(prev_prev_char == pDst[(dst_ofs - 2) & dict_size_mask]); + } + else + { + LZHAM_ASSERT(prev_prev_char == 0); + } +} +#endif + // Read "is match" bit. + uint match_model_index; + match_model_index = LZHAM_IS_MATCH_MODEL_INDEX(prev_char, cur_state); + LZHAM_ASSERT(match_model_index < LZHAM_ARRAY_SIZE(m_is_match_model)); + + uint is_match_bit; LZHAM_SYMBOL_CODEC_DECODE_ARITH_BIT(codec, is_match_bit, m_is_match_model[match_model_index]); + +#ifdef LZHAM_LZDEBUG + LZHAM_VERIFY(is_match_bit == m_debug_is_match); +#endif + + if (LZHAM_BUILTIN_EXPECT(!is_match_bit, 0)) + { + // Handle literal. + +#ifdef LZHAM_LZDEBUG + LZHAM_VERIFY(m_debug_match_len == 1); +#endif + +#ifdef LZHAM_LZDEBUG + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, m_debug_lit, 8); +#endif + + if ((unbuffered) && (LZHAM_BUILTIN_EXPECT(dst_ofs >= out_buf_size, 0))) + { + LZHAM_SYMBOL_CODEC_DECODE_END(codec); + *m_pIn_buf_size = static_cast(codec.decode_get_bytes_consumed()); + *m_pOut_buf_size = 0; + for ( ; ; ) { LZHAM_CR_RETURN(m_state, LZHAM_DECOMP_STATUS_FAILED_DEST_BUF_TOO_SMALL); } + } + + if (LZHAM_BUILTIN_EXPECT(cur_state < CLZDecompBase::cNumLitStates, 1)) + { + // Regular literal + uint lit_pred; + lit_pred = (prev_char >> (8 - CLZDecompBase::cNumLitPredBits / 2)) | (prev_prev_char >> (8 - CLZDecompBase::cNumLitPredBits / 2)) << (CLZDecompBase::cNumLitPredBits / 2); + + uint r; LZHAM_DECOMPRESS_DECODE_ADAPTIVE_SYMBOL(codec, r, m_lit_table[lit_pred]); + pDst[dst_ofs] = static_cast(r); + prev_prev_char = prev_char; + prev_char = r; + +#ifdef LZHAM_LZDEBUG + LZHAM_VERIFY(pDst[dst_ofs] == m_debug_lit); +#endif + } + else + { + // Delta literal + uint match_hist0_ofs, rep_lit0, rep_lit1; + + // Determine delta literal's partial context. + match_hist0_ofs = dst_ofs - match_hist0; + rep_lit0 = pDst[match_hist0_ofs & dict_size_mask]; + rep_lit1 = pDst[(match_hist0_ofs - 1) & dict_size_mask]; + + uint lit_pred; + lit_pred = (rep_lit0 >> (8 - CLZDecompBase::cNumDeltaLitPredBits / 2)) | + ((rep_lit1 >> (8 - CLZDecompBase::cNumDeltaLitPredBits / 2)) << CLZDecompBase::cNumDeltaLitPredBits / 2); + +#undef LZHAM_SAVE_LOCAL_STATE +#undef LZHAM_RESTORE_LOCAL_STATE +#define LZHAM_SAVE_LOCAL_STATE m_rep_lit0 = rep_lit0; +#define LZHAM_RESTORE_LOCAL_STATE rep_lit0 = m_rep_lit0; + +#ifdef LZHAM_LZDEBUG + uint debug_rep_lit0; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, debug_rep_lit0, 8); + LZHAM_VERIFY(debug_rep_lit0 == rep_lit0); +#endif + + uint r; LZHAM_DECOMPRESS_DECODE_ADAPTIVE_SYMBOL(codec, r, m_delta_lit_table[lit_pred]); + r ^= rep_lit0; + pDst[dst_ofs] = static_cast(r); + prev_prev_char = prev_char; + prev_char = r; + +#ifdef LZHAM_LZDEBUG + LZHAM_VERIFY(pDst[dst_ofs] == m_debug_lit); +#endif + +#undef LZHAM_SAVE_LOCAL_STATE +#undef LZHAM_RESTORE_LOCAL_STATE +#define LZHAM_SAVE_LOCAL_STATE +#define LZHAM_RESTORE_LOCAL_STATE + } + + cur_state = s_literal_next_state[cur_state]; + + dst_ofs++; + if ((!unbuffered) && (LZHAM_BUILTIN_EXPECT(dst_ofs > dict_size_mask, 0))) + { + LZHAM_SYMBOL_CODEC_DECODE_END(codec); + LZHAM_FLUSH_OUTPUT_BUFFER(dict_size); + LZHAM_SYMBOL_CODEC_DECODE_BEGIN(codec); + dst_ofs = 0; + } + } + else + { + // Handle match. + uint match_len; + match_len = 1; + +#undef LZHAM_SAVE_LOCAL_STATE +#undef LZHAM_RESTORE_LOCAL_STATE +#define LZHAM_SAVE_LOCAL_STATE m_match_len = match_len; +#define LZHAM_RESTORE_LOCAL_STATE match_len = m_match_len; + + // Determine if match is a rep_match, and if so what type. + uint is_rep; LZHAM_SYMBOL_CODEC_DECODE_ARITH_BIT(codec, is_rep, m_is_rep_model[cur_state]); + if (LZHAM_BUILTIN_EXPECT(is_rep, 1)) + { + uint is_rep0; LZHAM_SYMBOL_CODEC_DECODE_ARITH_BIT(codec, is_rep0, m_is_rep0_model[cur_state]); + if (LZHAM_BUILTIN_EXPECT(is_rep0, 1)) + { + uint is_rep0_len1; LZHAM_SYMBOL_CODEC_DECODE_ARITH_BIT(codec, is_rep0_len1, m_is_rep0_single_byte_model[cur_state]); + if (LZHAM_BUILTIN_EXPECT(is_rep0_len1, 1)) + { + cur_state = (cur_state < CLZDecompBase::cNumLitStates) ? 9 : 11; + } + else + { + LZHAM_DECOMPRESS_DECODE_ADAPTIVE_SYMBOL(codec, match_len, m_rep_len_table[cur_state >= CLZDecompBase::cNumLitStates]); + match_len += CLZDecompBase::cMinMatchLen; + + if (match_len == (CLZDecompBase::cMaxMatchLen + 1)) + { + // Decode "huge" match length. + match_len = 0; + do + { + uint b; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, b, 1); + if (!b) + break; + match_len++; + } while (match_len < 3); + uint k; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, k, s_huge_match_code_len[match_len]); + match_len = s_huge_match_base_len[match_len] + k; + } + + cur_state = (cur_state < CLZDecompBase::cNumLitStates) ? 8 : 11; + } + } + else + { + LZHAM_DECOMPRESS_DECODE_ADAPTIVE_SYMBOL(codec, match_len, m_rep_len_table[cur_state >= CLZDecompBase::cNumLitStates]); + match_len += CLZDecompBase::cMinMatchLen; + + if (match_len == (CLZDecompBase::cMaxMatchLen + 1)) + { + // Decode "huge" match length. + match_len = 0; + do + { + uint b; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, b, 1); + if (!b) + break; + match_len++; + } while (match_len < 3); + uint k; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, k, s_huge_match_code_len[match_len]); + match_len = s_huge_match_base_len[match_len] + k; + } + + uint is_rep1; LZHAM_SYMBOL_CODEC_DECODE_ARITH_BIT(codec, is_rep1, m_is_rep1_model[cur_state]); + if (LZHAM_BUILTIN_EXPECT(is_rep1, 1)) + { + uint temp = match_hist1; + match_hist1 = match_hist0; + match_hist0 = temp; + } + else + { + uint is_rep2; LZHAM_SYMBOL_CODEC_DECODE_ARITH_BIT(codec, is_rep2, m_is_rep2_model[cur_state]); + + if (LZHAM_BUILTIN_EXPECT(is_rep2, 1)) + { + // rep2 + uint temp = match_hist2; + match_hist2 = match_hist1; + match_hist1 = match_hist0; + match_hist0 = temp; + } + else + { + // rep3 + uint temp = match_hist3; + match_hist3 = match_hist2; + match_hist2 = match_hist1; + match_hist1 = match_hist0; + match_hist0 = temp; + } + } + + cur_state = (cur_state < CLZDecompBase::cNumLitStates) ? 8 : 11; + } + } + else + { + // Handle normal/full match. + uint sym; LZHAM_DECOMPRESS_DECODE_ADAPTIVE_SYMBOL(codec, sym, m_main_table); + sym -= CLZDecompBase::cLZXNumSpecialLengths; + + if (LZHAM_BUILTIN_EXPECT(static_cast(sym) < 0, 0)) + { + // Handle special symbols. + if (static_cast(sym) == (CLZDecompBase::cLZXSpecialCodeEndOfBlockCode - CLZDecompBase::cLZXNumSpecialLengths)) + break; + else + { + // Must be cLZXSpecialCodePartialStateReset. + match_hist0 = 1; + match_hist1 = 1; + match_hist2 = 1; + match_hist3 = 1; + cur_state = 0; + continue; + } + } + + // Low 3 bits of symbol = match length category, higher bits = distance category. + match_len = (sym & 7) + 2; + + uint match_slot; + match_slot = (sym >> 3) + CLZDecompBase::cLZXLowestUsableMatchSlot; + +#undef LZHAM_SAVE_LOCAL_STATE +#undef LZHAM_RESTORE_LOCAL_STATE +#define LZHAM_SAVE_LOCAL_STATE m_match_len = match_len; m_match_slot = match_slot; +#define LZHAM_RESTORE_LOCAL_STATE match_len = m_match_len; match_slot = m_match_slot; + + if (LZHAM_BUILTIN_EXPECT(match_len == 9, 0)) + { + // Match is >= 9 bytes, decode the actual length. + uint e; LZHAM_DECOMPRESS_DECODE_ADAPTIVE_SYMBOL(codec, e, m_large_len_table[cur_state >= CLZDecompBase::cNumLitStates]); + match_len += e; + + if (match_len == (CLZDecompBase::cMaxMatchLen + 1)) + { + // Decode "huge" match length. + match_len = 0; + do + { + uint b; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, b, 1); + if (!b) + break; + match_len++; + } while (match_len < 3); + uint k; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, k, s_huge_match_code_len[match_len]); + match_len = s_huge_match_base_len[match_len] + k; + } + } + + uint num_extra_bits; + num_extra_bits = m_lzBase.m_lzx_position_extra_bits[match_slot]; + + uint extra_bits; + +#undef LZHAM_SAVE_LOCAL_STATE +#undef LZHAM_RESTORE_LOCAL_STATE +#define LZHAM_SAVE_LOCAL_STATE m_match_len = match_len; m_match_slot = match_slot; m_num_extra_bits = num_extra_bits; +#define LZHAM_RESTORE_LOCAL_STATE match_len = m_match_len; match_slot = m_match_slot; num_extra_bits = m_num_extra_bits; + + if (LZHAM_BUILTIN_EXPECT(num_extra_bits < 3, 0)) + { + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, extra_bits, num_extra_bits); + } + else + { + extra_bits = 0; + if (LZHAM_BUILTIN_EXPECT(num_extra_bits > 4, 1)) + { + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, extra_bits, num_extra_bits - 4); + extra_bits <<= 4; + } + +#undef LZHAM_SAVE_LOCAL_STATE +#undef LZHAM_RESTORE_LOCAL_STATE +#define LZHAM_SAVE_LOCAL_STATE m_match_len = match_len; m_match_slot = match_slot; m_extra_bits = extra_bits; +#define LZHAM_RESTORE_LOCAL_STATE match_len = m_match_len; match_slot = m_match_slot; extra_bits = m_extra_bits; + + uint j; LZHAM_DECOMPRESS_DECODE_ADAPTIVE_SYMBOL(codec, j, m_dist_lsb_table); + extra_bits += j; + } + + match_hist3 = match_hist2; + match_hist2 = match_hist1; + match_hist1 = match_hist0; + match_hist0 = m_lzBase.m_lzx_position_base[match_slot] + extra_bits; + + cur_state = (cur_state < CLZDecompBase::cNumLitStates) ? CLZDecompBase::cNumLitStates : CLZDecompBase::cNumLitStates + 3; + +#undef LZHAM_SAVE_LOCAL_STATE +#undef LZHAM_RESTORE_LOCAL_STATE +#define LZHAM_SAVE_LOCAL_STATE m_match_len = match_len; +#define LZHAM_RESTORE_LOCAL_STATE match_len = m_match_len; + } + + // We have the match's length and distance, now do the copy. + +#ifdef LZHAM_LZDEBUG + LZHAM_VERIFY(match_len == m_debug_match_len); + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, m_debug_match_dist, 25); + uint d; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, d, 4); + m_debug_match_dist = (m_debug_match_dist << 4) | d; + LZHAM_VERIFY((uint)match_hist0 == m_debug_match_dist); +#endif + if ( (unbuffered) && LZHAM_BUILTIN_EXPECT((((size_t)match_hist0 > dst_ofs) || ((dst_ofs + match_len) > out_buf_size)), 0) ) + { + LZHAM_SYMBOL_CODEC_DECODE_END(codec); + *m_pIn_buf_size = static_cast(codec.decode_get_bytes_consumed()); + *m_pOut_buf_size = 0; + for ( ; ; ) { LZHAM_CR_RETURN(m_state, LZHAM_DECOMP_STATUS_FAILED_BAD_CODE); } + } + + uint src_ofs; + const uint8* pCopy_src; + src_ofs = (dst_ofs - match_hist0) & dict_size_mask; + pCopy_src = pDst + src_ofs; + +#undef LZHAM_SAVE_LOCAL_STATE +#undef LZHAM_RESTORE_LOCAL_STATE +#define LZHAM_SAVE_LOCAL_STATE m_match_len = match_len; m_src_ofs = src_ofs; m_pCopy_src = pCopy_src; +#define LZHAM_RESTORE_LOCAL_STATE match_len = m_match_len; src_ofs = m_src_ofs; pCopy_src = m_pCopy_src; + + if ( (!unbuffered) && LZHAM_BUILTIN_EXPECT( ((LZHAM_MAX(src_ofs, dst_ofs) + match_len) > dict_size_mask), 0) ) + { + // Match source or destination wraps around the end of the dictionary to the beginning, so handle the copy one byte at a time. + do + { + uint8 c; + c = *pCopy_src++; + prev_prev_char = prev_char; + prev_char = c; + pDst[dst_ofs++] = c; + + if (LZHAM_BUILTIN_EXPECT(pCopy_src == pDst_end, 0)) + pCopy_src = pDst; + + if (LZHAM_BUILTIN_EXPECT(dst_ofs > dict_size_mask, 0)) + { + LZHAM_SYMBOL_CODEC_DECODE_END(codec); + LZHAM_FLUSH_OUTPUT_BUFFER(dict_size); + LZHAM_SYMBOL_CODEC_DECODE_BEGIN(codec); + dst_ofs = 0; + } + + match_len--; + } while (LZHAM_BUILTIN_EXPECT(match_len > 0, 1)); + } + else + { + uint8* pCopy_dst = pDst + dst_ofs; + if (LZHAM_BUILTIN_EXPECT(match_hist0 == 1, 0)) + { + // Handle byte runs. + uint8 c = *pCopy_src; + if (LZHAM_BUILTIN_EXPECT(match_len < 8, 1)) + { + for (int i = match_len; i > 0; i--) + *pCopy_dst++ = c; + if (LZHAM_BUILTIN_EXPECT(match_len == 1, 1)) + prev_prev_char = prev_char; + else + prev_prev_char = c; + } + else + { + memset(pCopy_dst, c, match_len); + prev_prev_char = c; + } + prev_char = c; + } + else if (LZHAM_BUILTIN_EXPECT(match_len == 1, 1)) + { + // Handle single byte matches. + prev_prev_char = prev_char; + prev_char = *pCopy_src; + *pCopy_dst = static_cast(prev_char); + } + else + { + // Handle matches of length 2 or higher. + uint bytes_to_copy = match_len - 2; + if (LZHAM_BUILTIN_EXPECT(((bytes_to_copy < 8) || ((int)bytes_to_copy > match_hist0)), 1)) + { + for (int i = bytes_to_copy; i > 0; i--) + *pCopy_dst++ = *pCopy_src++; + } + else + { + LZHAM_MEMCPY(pCopy_dst, pCopy_src, bytes_to_copy); + pCopy_dst += bytes_to_copy; + pCopy_src += bytes_to_copy; + } + // Handle final 2 bytes of match specially, because we always track the last 2 bytes output in + // local variables (needed for computing context) to avoid load hit stores on some CPU's. + prev_prev_char = *pCopy_src++; + *pCopy_dst++ = static_cast(prev_prev_char); + + prev_char = *pCopy_src++; + *pCopy_dst++ = static_cast(prev_char); + } + dst_ofs += match_len; + } + } // lit or match + +#undef LZHAM_SAVE_LOCAL_STATE +#undef LZHAM_RESTORE_LOCAL_STATE +#define LZHAM_SAVE_LOCAL_STATE +#define LZHAM_RESTORE_LOCAL_STATE + } // for ( ; ; ) + +#ifdef LZHAM_LZDEBUG + uint end_sync_marker; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, end_sync_marker, 12); + LZHAM_VERIFY(end_sync_marker == 366); +#endif + LZHAM_SYMBOL_CODEC_DECODE_ALIGN_TO_BYTE(codec); + } + else if (m_block_type == CLZDecompBase::cEOFBlock) + { + // Received EOF. + m_status = LZHAM_DECOMP_STATUS_SUCCESS; + } + else + { + // This block type is currently undefined. + m_status = LZHAM_DECOMP_STATUS_FAILED_BAD_CODE; + } + + m_block_index++; + + } while (m_status == LZHAM_DECOMP_STATUS_NOT_FINISHED); + + if ((!unbuffered) && (dst_ofs)) + { + LZHAM_SYMBOL_CODEC_DECODE_END(codec); + LZHAM_FLUSH_OUTPUT_BUFFER(dst_ofs); + LZHAM_SYMBOL_CODEC_DECODE_BEGIN(codec); + } + + if (m_status == LZHAM_DECOMP_STATUS_SUCCESS) + { + LZHAM_SYMBOL_CODEC_DECODE_ALIGN_TO_BYTE(codec); + LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, m_file_src_file_adler32, 16); + uint l; LZHAM_SYMBOL_CODEC_DECODE_GET_BITS(codec, l, 16); + m_file_src_file_adler32 = (m_file_src_file_adler32 << 16) | l; + + if (m_params.m_decompress_flags & LZHAM_DECOMP_FLAG_COMPUTE_ADLER32) + { + if (unbuffered) + { + m_decomp_adler32 = adler32(pDst, dst_ofs, cInitAdler32); + } + + if (m_file_src_file_adler32 != m_decomp_adler32) + { + m_status = LZHAM_DECOMP_STATUS_FAILED_ADLER32; + } + } + else + { + m_decomp_adler32 = m_file_src_file_adler32; + } + if (m_params.m_decompress_flags & LZHAM_DECOMP_FLAG_COMPUTE_CRC32) + { + if (unbuffered) + { + m_decomp_crc32 = crc32(cInitCRC32, pDst, dst_ofs); + } + + //if (m_file_src_file_crc32 != m_decomp_crc32) + //{ + // printf("m_file_src_file_crc32 %zX\n", m_file_src_file_crc32); + // m_status = LZHAM_DECOMP_STATUS_FAILED_CRC32; + //} + } + else + { + m_decomp_crc32 = m_file_src_file_crc32; + } + } + + LZHAM_SYMBOL_CODEC_DECODE_END(codec); + + *m_pIn_buf_size = static_cast(codec.stop_decoding()); + *m_pOut_buf_size = unbuffered ? dst_ofs : 0; + LZHAM_CR_RETURN(m_state, m_status); + + for ( ; ; ) + { + *m_pIn_buf_size = 0; + *m_pOut_buf_size = 0; + LZHAM_CR_RETURN(m_state, m_status); + } + + LZHAM_CR_FINISH + + return m_status; + } + + static bool check_params(const lzham_decompress_params *pParams) + { + if ((!pParams) || (pParams->m_struct_size != sizeof(lzham_decompress_params))) + return false; + + if ((pParams->m_dict_size_log2 < CLZDecompBase::cMinDictSizeLog2) || (pParams->m_dict_size_log2 > CLZDecompBase::cMaxDictSizeLog2)) + return false; + + if (pParams->m_num_seed_bytes) + { + if (((pParams->m_decompress_flags & LZHAM_DECOMP_FLAG_OUTPUT_UNBUFFERED) != 0) || (!pParams->m_pSeed_bytes)) + return false; + if (pParams->m_num_seed_bytes > (1U << pParams->m_dict_size_log2)) + return false; + } + return true; + } + + lzham_decompress_state_ptr LZHAM_CDECL lzham_lib_decompress_init(const lzham_decompress_params *pParams) + { + LZHAM_ASSUME(CLZDecompBase::cMinDictSizeLog2 == LZHAM_MIN_DICT_SIZE_LOG2); + LZHAM_ASSUME(CLZDecompBase::cMaxDictSizeLog2 == LZHAM_MAX_DICT_SIZE_LOG2_X64); + + if (!check_params(pParams)) + return NULL; + + lzham_decompressor *pState = lzham_new(); + if (!pState) + return NULL; + + pState->m_params = *pParams; + + if (pState->m_params.m_decompress_flags & LZHAM_DECOMP_FLAG_OUTPUT_UNBUFFERED) + { + pState->m_pRaw_decomp_buf = NULL; + pState->m_raw_decomp_buf_size = 0; + pState->m_pDecomp_buf = NULL; + } + else + { + uint32 decomp_buf_size = 1U << pState->m_params.m_dict_size_log2; + pState->m_pRaw_decomp_buf = static_cast(lzham_malloc(decomp_buf_size + 15)); + if (!pState->m_pRaw_decomp_buf) + { + lzham_delete(pState); + return NULL; + } + pState->m_raw_decomp_buf_size = decomp_buf_size; + pState->m_pDecomp_buf = math::align_up_pointer(pState->m_pRaw_decomp_buf, 16); + } + + pState->init(); + + return pState; + } + + lzham_decompress_state_ptr LZHAM_CDECL lzham_lib_decompress_reinit(lzham_decompress_state_ptr p, const lzham_decompress_params *pParams) + { + if (!p) + return lzham_lib_decompress_init(pParams); + + lzham_decompressor *pState = static_cast(p); + + if (!check_params(pParams)) + return NULL; + + if (pState->m_params.m_decompress_flags & LZHAM_DECOMP_FLAG_OUTPUT_UNBUFFERED) + { + lzham_free(pState->m_pRaw_decomp_buf); + pState->m_pRaw_decomp_buf = NULL; + pState->m_raw_decomp_buf_size = 0; + pState->m_pDecomp_buf = NULL; + } + else + { + uint32 new_dict_size = 1U << pState->m_params.m_dict_size_log2; + if ((!pState->m_pRaw_decomp_buf) || (pState->m_raw_decomp_buf_size < new_dict_size)) + { + uint8 *pNew_dict = static_cast(lzham_realloc(pState->m_pRaw_decomp_buf, new_dict_size + 15)); + if (!pNew_dict) + return NULL; + pState->m_pRaw_decomp_buf = pNew_dict; + pState->m_raw_decomp_buf_size = new_dict_size; + pState->m_pDecomp_buf = math::align_up_pointer(pState->m_pRaw_decomp_buf, 16); + } + } + + pState->m_params = *pParams; + + pState->init(); + + return pState; + } + + lzham_decompress_checksums* LZHAM_CDECL lzham_lib_decompress_deinit(lzham_decompress_state_ptr p) + { + lzham_decompressor *pState = static_cast(p); + if (!pState) + return NULL; + + lzham_decompress_checksums* checksums = new lzham_decompress_checksums(); + + checksums->adler32 = pState->m_decomp_adler32; + checksums->crc32 = pState->m_decomp_crc32; + + lzham_free(pState->m_pRaw_decomp_buf); + lzham_delete(pState); + + return checksums; + } + + lzham_decompress_status_t LZHAM_CDECL lzham_lib_decompress( + lzham_decompress_state_ptr p, + const lzham_uint8 *pIn_buf, size_t *pIn_buf_size, + lzham_uint8 *pOut_buf, size_t *pOut_buf_size, + lzham_bool no_more_input_bytes_flag) + { + lzham_decompressor *pState = static_cast(p); + + if ((!pState) || (!pState->m_params.m_dict_size_log2) || (!pIn_buf_size) || (!pOut_buf_size)) + { + return LZHAM_DECOMP_STATUS_INVALID_PARAMETER; + } + + if ((*pIn_buf_size) && (!pIn_buf)) + { + return LZHAM_DECOMP_STATUS_INVALID_PARAMETER; + } + + if ((*pOut_buf_size) && (!pOut_buf)) + { + return LZHAM_DECOMP_STATUS_INVALID_PARAMETER; + } + + pState->m_pIn_buf = pIn_buf; + pState->m_pIn_buf_size = pIn_buf_size; + pState->m_pOut_buf = pOut_buf; + pState->m_pOut_buf_size = pOut_buf_size; + pState->m_no_more_input_bytes_flag = (no_more_input_bytes_flag != 0); + + if (pState->m_params.m_decompress_flags & LZHAM_DECOMP_FLAG_OUTPUT_UNBUFFERED) + { + if (!pState->m_pOrig_out_buf) + { + pState->m_pOrig_out_buf = pOut_buf; + pState->m_orig_out_buf_size = *pOut_buf_size; + } + else + { + if ((pState->m_pOrig_out_buf != pOut_buf) || (pState->m_orig_out_buf_size != *pOut_buf_size)) + { + return LZHAM_DECOMP_STATUS_INVALID_PARAMETER; + } + } + } + + lzham_decompress_status_t status; + if (pState->m_params.m_decompress_flags & LZHAM_DECOMP_FLAG_OUTPUT_UNBUFFERED) + status = pState->decompress(); + else + status = pState->decompress(); + + return status; + } + + lzham_decompress_status_t LZHAM_CDECL lzham_lib_decompress_memory(const lzham_decompress_params *pParams, lzham_uint8* pDst_buf, size_t *pDst_len, const lzham_uint8* pSrc_buf, size_t src_len, lzham_uint32 *pAdler32, lzham_uint32 *pCrc32) + { + if (!pParams) + return LZHAM_DECOMP_STATUS_INVALID_PARAMETER; + + lzham_decompress_params params(*pParams); + params.m_decompress_flags |= LZHAM_DECOMP_FLAG_OUTPUT_UNBUFFERED; + + lzham_decompress_state_ptr pState = lzham_lib_decompress_init(¶ms); + if (!pState) + return LZHAM_DECOMP_STATUS_FAILED_INITIALIZING; + + lzham_decompress_status_t status = lzham_lib_decompress(pState, pSrc_buf, &src_len, pDst_buf, pDst_len, true); + lzham_decompress_checksums* checksums = lzham_lib_decompress_deinit(pState); + + if (pAdler32) + *pAdler32 = checksums->adler32; + if (pCrc32) + *pCrc32 = checksums->crc32; + + + return status; + } + + // ----------------- zlib-style API's + + int LZHAM_CDECL lzham_lib_z_inflateInit(lzham_z_streamp pStream) + { + return lzham_lib_z_inflateInit2(pStream, LZHAM_Z_DEFAULT_WINDOW_BITS); + } + + int LZHAM_CDECL lzham_lib_z_inflateInit2(lzham_z_streamp pStream, int window_bits) + { + if (!pStream) + return LZHAM_Z_STREAM_ERROR; + +#ifdef LZHAM_Z_API_FORCE_WINDOW_BITS + window_bits = LZHAM_Z_API_FORCE_WINDOW_BITS; +#endif + + int max_window_bits = LZHAM_64BIT_POINTERS ? LZHAM_MAX_DICT_SIZE_LOG2_X64 : LZHAM_MAX_DICT_SIZE_LOG2_X86; + if (labs(window_bits) > max_window_bits) + return LZHAM_Z_PARAM_ERROR; + + if (labs(window_bits) < LZHAM_MIN_DICT_SIZE_LOG2) + window_bits = (window_bits < 0) ? -LZHAM_MIN_DICT_SIZE_LOG2 : LZHAM_MIN_DICT_SIZE_LOG2; + + lzham_decompress_params params; + utils::zero_object(params); + params.m_struct_size = sizeof(lzham_decompress_params); + params.m_dict_size_log2 = labs(window_bits); + + params.m_decompress_flags = LZHAM_DECOMP_FLAG_COMPUTE_ADLER32; + if (window_bits > 0) + params.m_decompress_flags |= LZHAM_DECOMP_FLAG_READ_ZLIB_STREAM; + + lzham_decompress_state_ptr pState = lzham_lib_decompress_init(¶ms); + if (!pState) + return LZHAM_Z_MEM_ERROR; + pStream->state = static_cast(pState); + + pStream->data_type = 0; + pStream->adler32 = LZHAM_Z_ADLER32_INIT; + pStream->msg = NULL; + pStream->total_in = 0; + pStream->total_out = 0; + pStream->reserved = 0; + + return LZHAM_Z_OK; + } + + int LZHAM_CDECL lzham_lib_z_inflateReset(lzham_z_streamp pStream) + { + if ((!pStream) || (!pStream->state)) + return LZHAM_Z_STREAM_ERROR; + + lzham_decompress_state_ptr pState = static_cast(pStream->state); + lzham_decompressor *pDecomp = static_cast(pState); + + lzham_decompress_params params(pDecomp->m_params); + + if (!lzham_lib_decompress_reinit(pState, ¶ms)) + return LZHAM_Z_STREAM_ERROR; + + return LZHAM_Z_OK; + } + + int LZHAM_CDECL lzham_lib_z_inflate(lzham_z_streamp pStream, int flush) + { + if ((!pStream) || (!pStream->state)) + return LZHAM_Z_STREAM_ERROR; + + if ((flush == LZHAM_Z_PARTIAL_FLUSH) || (flush == LZHAM_Z_FULL_FLUSH)) + flush = LZHAM_Z_SYNC_FLUSH; + if (flush) + { + if ((flush != LZHAM_Z_SYNC_FLUSH) && (flush != LZHAM_Z_FINISH)) + return LZHAM_Z_STREAM_ERROR; + } + + size_t orig_avail_in = pStream->avail_in; + + lzham_decompress_state_ptr pState = static_cast(pStream->state); + lzham_decompressor *pDecomp = static_cast(pState); + if (pDecomp->m_z_last_status >= LZHAM_DECOMP_STATUS_FIRST_SUCCESS_OR_FAILURE_CODE) + return LZHAM_Z_DATA_ERROR; + + if (pDecomp->m_z_has_flushed && (flush != LZHAM_Z_FINISH)) + return LZHAM_Z_STREAM_ERROR; + pDecomp->m_z_has_flushed |= (flush == LZHAM_Z_FINISH); + + lzham_decompress_status_t status; + for ( ; ; ) + { + size_t in_bytes = pStream->avail_in; + size_t out_bytes = pStream->avail_out; + lzham_bool no_more_input_bytes_flag = (flush == LZHAM_Z_FINISH); + status = lzham_lib_decompress(pState, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, no_more_input_bytes_flag); + + pDecomp->m_z_last_status = status; + + pStream->next_in += (uint)in_bytes; + pStream->avail_in -= (uint)in_bytes; + pStream->total_in += (uint)in_bytes; + pStream->adler32 = pDecomp->m_decomp_adler32; + pStream->crc32 = pDecomp->m_decomp_crc32; + + pStream->next_out += (uint)out_bytes; + pStream->avail_out -= (uint)out_bytes; + pStream->total_out += (uint)out_bytes; + + if (status >= LZHAM_DECOMP_STATUS_FIRST_FAILURE_CODE) + { + if (status == LZHAM_DECOMP_STATUS_FAILED_NEED_SEED_BYTES) + return LZHAM_Z_NEED_DICT; + else + return LZHAM_Z_DATA_ERROR; // Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well). + } + + if ((status == LZHAM_DECOMP_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) + return LZHAM_Z_BUF_ERROR; // Signal caller that we can't make forward progress without supplying more input, or by setting flush to LZHAM_Z_FINISH. + else if (flush == LZHAM_Z_FINISH) + { + // Caller has indicated that all remaining input was at next_in, and all remaining output will fit entirely in next_out. + // (The output buffer at next_out MUST be large to hold the remaining uncompressed data when flush==LZHAM_Z_FINISH). + if (status == LZHAM_DECOMP_STATUS_SUCCESS) + return LZHAM_Z_STREAM_END; + // If status is LZHAM_DECOMP_STATUS_HAS_MORE_OUTPUT, there must be at least 1 more byte on the way but the caller to lzham_decompress() supplied an empty output buffer. + // Something is wrong because the caller's output buffer should be large enough to hold the entire decompressed stream when flush==LZHAM_Z_FINISH. + else if (status == LZHAM_DECOMP_STATUS_HAS_MORE_OUTPUT) + return LZHAM_Z_BUF_ERROR; + } + else if ((status == LZHAM_DECOMP_STATUS_SUCCESS) || (!pStream->avail_in) || (!pStream->avail_out)) + break; + } + + return (status == LZHAM_DECOMP_STATUS_SUCCESS) ? LZHAM_Z_STREAM_END : LZHAM_Z_OK; + } + + int LZHAM_CDECL lzham_lib_z_inflateEnd(lzham_z_streamp pStream) + { + if (!pStream) + return LZHAM_Z_STREAM_ERROR; + + lzham_decompress_state_ptr pState = static_cast(pStream->state); + if (pState) + { + lzham_decompress_checksums* checksums = lzham_lib_decompress_deinit(pState); + + pStream->adler32 = checksums->adler32; + pStream->crc32 = checksums->crc32; + pStream->state = NULL; + } + + return LZHAM_Z_OK; + } + + int LZHAM_CDECL lzham_lib_z_uncompress(unsigned char *pDest, lzham_z_ulong *pDest_len, const unsigned char *pSource, lzham_z_ulong source_len) + { + lzham_z_stream stream; + int status; + memset(&stream, 0, sizeof(stream)); + + // In case lzham_z_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) + return LZHAM_Z_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (uint)source_len; + stream.next_out = pDest; + stream.avail_out = (uint)*pDest_len; + + status = lzham_lib_z_inflateInit(&stream); + if (status != LZHAM_Z_OK) + return status; + + status = lzham_lib_z_inflate(&stream, LZHAM_Z_FINISH); + if (status != LZHAM_Z_STREAM_END) + { + lzham_lib_z_inflateEnd(&stream); + return ((status == LZHAM_Z_BUF_ERROR) && (!stream.avail_in)) ? LZHAM_Z_DATA_ERROR : status; + } + *pDest_len = stream.total_out; + + return lzham_lib_z_inflateEnd(&stream); + } + + const char * LZHAM_CDECL lzham_lib_z_error(int err) + { + static struct + { + int m_err; + const char *m_pDesc; + } + s_error_descs[] = + { + { LZHAM_Z_OK, "" }, + { LZHAM_Z_STREAM_END, "stream end" }, + { LZHAM_Z_NEED_DICT, "need dictionary" }, + { LZHAM_Z_ERRNO, "file error" }, + { LZHAM_Z_STREAM_ERROR, "stream error" }, + { LZHAM_Z_DATA_ERROR, "data error" }, + { LZHAM_Z_MEM_ERROR, "out of memory" }, + { LZHAM_Z_BUF_ERROR, "buf error" }, + { LZHAM_Z_VERSION_ERROR, "version error" }, + { LZHAM_Z_PARAM_ERROR, "parameter error" } + }; + for (uint i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) + if (s_error_descs[i].m_err == err) + return s_error_descs[i].m_pDesc; + return NULL; + } + + lzham_z_ulong lzham_lib_z_adler32(lzham_z_ulong adler, const unsigned char *ptr, size_t buf_len) + { + return adler32(ptr, buf_len, adler); + } + + lzham_z_ulong LZHAM_CDECL lzham_lib_z_crc32(lzham_z_ulong crc, const lzham_uint8 *ptr, size_t buf_len) + { + return crc32(crc, ptr, buf_len); + } + +} // namespace lzham diff --git a/r5dev/thirdparty/lzham/lzhamdecomp/lzham_lzdecompbase.cpp b/r5dev/thirdparty/lzham/lzhamdecomp/lzham_lzdecompbase.cpp new file mode 100644 index 00000000..2bd03266 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzhamdecomp/lzham_lzdecompbase.cpp @@ -0,0 +1,46 @@ +// File: lzham_lzdecompbase.cpp +// See Copyright Notice and license at the end of include/lzham.h +#include "../include/lzham_core.h" +#include "lzham_lzdecompbase.h" + +namespace lzham +{ + void CLZDecompBase::init_position_slots(uint dict_size_log2) + { + m_dict_size_log2 = dict_size_log2; + m_dict_size = 1U << dict_size_log2; + + int i, j; + for (i = 0, j = 0; i < cLZXMaxPositionSlots; i += 2) + { + m_lzx_position_extra_bits[i] = (uint8)j; + m_lzx_position_extra_bits[i + 1] = (uint8)j; + + if ((i != 0) && (j < 25)) + j++; + } + + for (i = 0, j = 0; i < cLZXMaxPositionSlots; i++) + { + m_lzx_position_base[i] = j; + m_lzx_position_extra_mask[i] = (1 << m_lzx_position_extra_bits[i]) - 1; + j += (1 << m_lzx_position_extra_bits[i]); + } + + m_num_lzx_slots = 0; + + const uint largest_dist = m_dict_size - 1; + for (i = 0; i < cLZXMaxPositionSlots; i++) + { + if ( (largest_dist >= m_lzx_position_base[i]) && + (largest_dist < (m_lzx_position_base[i] + (1 << m_lzx_position_extra_bits[i])) ) ) + { + m_num_lzx_slots = i + 1; + break; + } + } + + LZHAM_VERIFY(m_num_lzx_slots); + } + +} //namespace lzham diff --git a/r5dev/thirdparty/lzham/lzhamdecomp/lzham_lzdecompbase.h b/r5dev/thirdparty/lzham/lzhamdecomp/lzham_lzdecompbase.h new file mode 100644 index 00000000..337f7d13 --- /dev/null +++ b/r5dev/thirdparty/lzham/lzhamdecomp/lzham_lzdecompbase.h @@ -0,0 +1,90 @@ +// File: lzham_lzdecompbase.h +// See Copyright Notice and license at the end of include/lzham.h +#pragma once + +//#define LZHAM_LZDEBUG + +#define LZHAM_IS_MATCH_MODEL_INDEX(prev_char, cur_state) ((prev_char) >> (8 - CLZDecompBase::cNumIsMatchContextBits)) + ((cur_state) << CLZDecompBase::cNumIsMatchContextBits) + +#define LZHAM_USE_ALL_ARITHMETIC_CODING 0 + +#define LZHAM_RND_CONG(jcong) (69069U * jcong + 1234567U) + +namespace lzham +{ + struct CLZDecompBase + { + enum + { + cMinMatchLen = 2U, + cMaxMatchLen = 257U, + + cMaxHugeMatchLen = 65536, + + cMinDictSizeLog2 = 15, + cMaxDictSizeLog2 = 29, + + cMatchHistSize = 4, + cMaxLen2MatchDist = 2047 + }; + + enum + { + cLZXNumSecondaryLengths = 249, + + cNumHugeMatchCodes = 1, + cMaxHugeMatchCodeBits = 16, + + cLZXNumSpecialLengths = 2, + + cLZXLowestUsableMatchSlot = 1, + cLZXMaxPositionSlots = 128 + }; + + enum + { + cLZXSpecialCodeEndOfBlockCode = 0, + cLZXSpecialCodePartialStateReset = 1 + }; + + enum + { + cLZHAMDebugSyncMarkerValue = 666, + cLZHAMDebugSyncMarkerBits = 12 + }; + + enum + { + cBlockHeaderBits = 2, + cBlockCheckBits = 4, + cBlockFlushTypeBits = 2, + + cSyncBlock = 0, + cCompBlock = 1, + cRawBlock = 2, + cEOFBlock = 3 + }; + + enum + { + cNumStates = 12, + cNumLitStates = 7, + + cNumLitPredBits = 6, // must be even + cNumDeltaLitPredBits = 6, // must be even + + cNumIsMatchContextBits = 6 + }; + + uint m_dict_size_log2; + uint m_dict_size; + + uint m_num_lzx_slots; + uint m_lzx_position_base[cLZXMaxPositionSlots]; + uint m_lzx_position_extra_mask[cLZXMaxPositionSlots]; + uint8 m_lzx_position_extra_bits[cLZXMaxPositionSlots]; + + void init_position_slots(uint dict_size_log2); + }; + +} // namespace lzham