r5sdk/r5dev/mathlib/ssenoise.cpp
Kawe Mazidjatari f120354e96 Initial port to CMake
* All libraries have been isolated from each other, and build into separate artifacts.
* Project has been restructured to support isolating libraries.
* CCrashHandler now calls a callback on crash (setup from core/dllmain.cpp, this can be setup in any way for any project. This callback is getting called when the apllication crashes. Useful for flushing buffers before closing handles to logging files for example).
* Tier0 'CoreMsgV' function now calls a callback sink, which could be set by the user (currently setup to the SDK's internal logger in core/dllmain.cpp).

TODO:
* Add a batch file to autogenerate all projects.
* Add support for dedicated server.
* Add support for client dll.

Bugs:
* Game crashes on the title screen after the UI script compiler has finished (root cause unknown).
* Curl error messages are getting logged twice for the dedicated server due to the removal of all "DEDICATED" preprocessor directives to support isolating projects. This has to be fixed properly!
2023-05-10 00:05:38 +02:00

231 lines
9.6 KiB
C++
Raw Blame History

//========= Copyright <20> 1996-2006, Valve Corporation, All rights reserved. ============//
//
// Purpose: Fast low quality noise suitable for real time use
//
//=====================================================================================//
#include "mathlib/mathlib.h"
#include "mathlib/vector.h"
#include "mathlib/ssemath.h"
#include "mathlib/noisedata.h"
// memdbgon must be the last include file in a .cpp file!!!
//#include "tier0/memdbgon.h"
#define MAGIC_NUMBER (1<<15) // gives 8 bits of fraction
static fltx4 Four_MagicNumbers = { MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER };
static ALIGN16 int32 idx_mask[4] = { 0xffff, 0xffff, 0xffff, 0xffff };
#define MASK255 (*((fltx4 *)(& idx_mask )))
// returns 0..1
static inline float GetLatticePointValue(int idx_x, int idx_y, int idx_z)
{
int ret_idx = perm_a[idx_x & 0xff];
ret_idx = perm_b[(idx_y + ret_idx) & 0xff];
ret_idx = perm_c[(idx_z + ret_idx) & 0xff];
return impulse_xcoords[ret_idx];
}
fltx4 NoiseSIMD(const fltx4& x, const fltx4& y, const fltx4& z)
{
// use magic to convert to integer index
fltx4 x_idx = AndSIMD(MASK255, AddSIMD(x, Four_MagicNumbers));
fltx4 y_idx = AndSIMD(MASK255, AddSIMD(y, Four_MagicNumbers));
fltx4 z_idx = AndSIMD(MASK255, AddSIMD(z, Four_MagicNumbers));
fltx4 lattice000 = Four_Zeros, lattice001 = Four_Zeros, lattice010 = Four_Zeros, lattice011 = Four_Zeros;
fltx4 lattice100 = Four_Zeros, lattice101 = Four_Zeros, lattice110 = Four_Zeros, lattice111 = Four_Zeros;
// FIXME: Converting the input vectors to int indices will cause load-hit-stores (48 bytes)
// Converting the indexed noise values back to vectors will cause more (128 bytes)
// The noise table could store vectors if we chunked it into 2x2x2 blocks.
fltx4 xfrac = Four_Zeros, yfrac = Four_Zeros, zfrac = Four_Zeros;
#define DOPASS(i) \
{ unsigned int xi = SubInt( x_idx, i ); \
unsigned int yi = SubInt( y_idx, i ); \
unsigned int zi = SubInt( z_idx, i ); \
SubFloat( xfrac, i ) = (xi & 0xff)*(1.0f/256.0f); \
SubFloat( yfrac, i ) = (yi & 0xff)*(1.0f/256.0f); \
SubFloat( zfrac, i ) = (zi & 0xff)*(1.0f/256.0f); \
xi>>=8; \
yi>>=8; \
zi>>=8; \
\
SubFloat( lattice000, i ) = GetLatticePointValue( xi,yi,zi ); \
SubFloat( lattice001, i ) = GetLatticePointValue( xi,yi,zi+1 ); \
SubFloat( lattice010, i ) = GetLatticePointValue( xi,yi+1,zi ); \
SubFloat( lattice011, i ) = GetLatticePointValue( xi,yi+1,zi+1 ); \
SubFloat( lattice100, i ) = GetLatticePointValue( xi+1,yi,zi ); \
SubFloat( lattice101, i ) = GetLatticePointValue( xi+1,yi,zi+1 ); \
SubFloat( lattice110, i ) = GetLatticePointValue( xi+1,yi+1,zi ); \
SubFloat( lattice111, i ) = GetLatticePointValue( xi+1,yi+1,zi+1 ); \
}
DOPASS(0);
DOPASS(1);
DOPASS(2);
DOPASS(3);
// now, we have 8 lattice values for each of four points as m128s, and interpolant values for
// each axis in m128 form in [xyz]frac. Perform the trilinear interpolation as SIMD ops
// first, do x interpolation
fltx4 l2d00 = AddSIMD(lattice000, MulSIMD(xfrac, SubSIMD(lattice100, lattice000)));
fltx4 l2d01 = AddSIMD(lattice001, MulSIMD(xfrac, SubSIMD(lattice101, lattice001)));
fltx4 l2d10 = AddSIMD(lattice010, MulSIMD(xfrac, SubSIMD(lattice110, lattice010)));
fltx4 l2d11 = AddSIMD(lattice011, MulSIMD(xfrac, SubSIMD(lattice111, lattice011)));
// now, do y interpolation
fltx4 l1d0 = AddSIMD(l2d00, MulSIMD(yfrac, SubSIMD(l2d10, l2d00)));
fltx4 l1d1 = AddSIMD(l2d01, MulSIMD(yfrac, SubSIMD(l2d11, l2d01)));
// final z interpolation
fltx4 rslt = AddSIMD(l1d0, MulSIMD(zfrac, SubSIMD(l1d1, l1d0)));
// map to 0..1
return MulSIMD(Four_Twos, SubSIMD(rslt, Four_PointFives));
}
static inline void GetVectorLatticePointValue(int idx, fltx4& x, fltx4& y, fltx4& z,
int idx_x, int idx_y, int idx_z)
{
int ret_idx = perm_a[idx_x & 0xff];
ret_idx = perm_b[(idx_y + ret_idx) & 0xff];
ret_idx = perm_c[(idx_z + ret_idx) & 0xff];
float const* pData = s_randomGradients + ret_idx * 3;
SubFloat(x, idx) = pData[0];
SubFloat(y, idx) = pData[1];
SubFloat(z, idx) = pData[2];
}
FourVectors DNoiseSIMD(const fltx4& x, const fltx4& y, const fltx4& z)
{
// use magic to convert to integer index
fltx4 x_idx = AndSIMD(MASK255, AddSIMD(x, Four_MagicNumbers));
fltx4 y_idx = AndSIMD(MASK255, AddSIMD(y, Four_MagicNumbers));
fltx4 z_idx = AndSIMD(MASK255, AddSIMD(z, Four_MagicNumbers));
fltx4 xlattice000 = Four_Zeros, xlattice001 = Four_Zeros, xlattice010 = Four_Zeros, xlattice011 = Four_Zeros;
fltx4 xlattice100 = Four_Zeros, xlattice101 = Four_Zeros, xlattice110 = Four_Zeros, xlattice111 = Four_Zeros;
fltx4 ylattice000 = Four_Zeros, ylattice001 = Four_Zeros, ylattice010 = Four_Zeros, ylattice011 = Four_Zeros;
fltx4 ylattice100 = Four_Zeros, ylattice101 = Four_Zeros, ylattice110 = Four_Zeros, ylattice111 = Four_Zeros;
fltx4 zlattice000 = Four_Zeros, zlattice001 = Four_Zeros, zlattice010 = Four_Zeros, zlattice011 = Four_Zeros;
fltx4 zlattice100 = Four_Zeros, zlattice101 = Four_Zeros, zlattice110 = Four_Zeros, zlattice111 = Four_Zeros;
// FIXME: Converting the input vectors to int indices will cause load-hit-stores (48 bytes)
// Converting the indexed noise values back to vectors will cause more (128 bytes)
// The noise table could store vectors if we chunked it into 2x2x2 blocks.
fltx4 xfrac = Four_Zeros, yfrac = Four_Zeros, zfrac = Four_Zeros;
#define DODPASS(i) \
{ unsigned int xi = SubInt( x_idx, i ); \
unsigned int yi = SubInt( y_idx, i ); \
unsigned int zi = SubInt( z_idx, i ); \
SubFloat( xfrac, i ) = (xi & 0xff)*(1.0f/256.0f); \
SubFloat( yfrac, i ) = (yi & 0xff)*(1.0f/256.0f); \
SubFloat( zfrac, i ) = (zi & 0xff)*(1.0f/256.0f); \
xi>>=8; \
yi>>=8; \
zi>>=8; \
\
GetVectorLatticePointValue( i, xlattice000, ylattice000, zlattice000, xi,yi,zi ); \
GetVectorLatticePointValue( i, xlattice001, ylattice001, zlattice001, xi,yi,zi+1 ); \
GetVectorLatticePointValue( i, xlattice010, ylattice010, zlattice010, xi,yi+1,zi ); \
GetVectorLatticePointValue( i, xlattice011, ylattice011, zlattice011, xi,yi+1,zi+1 ); \
GetVectorLatticePointValue( i, xlattice100, ylattice100, zlattice100, xi+1,yi,zi ); \
GetVectorLatticePointValue( i, xlattice101, ylattice101, zlattice101, xi+1,yi,zi+1 ); \
GetVectorLatticePointValue( i, xlattice110, ylattice110, zlattice110, xi+1,yi+1,zi ); \
GetVectorLatticePointValue( i, xlattice111, ylattice111, zlattice111, xi+1,yi+1,zi+1 ); \
}
DODPASS(0);
DODPASS(1);
DODPASS(2);
DODPASS(3);
// now, we have 8 lattice values for each of four points as m128s, and interpolant values for
// each axis in m128 form in [xyz]frac. Perform the trilinear interpolation as SIMD ops
// first, do x interpolation
fltx4 xl2d00 = AddSIMD(xlattice000, MulSIMD(xfrac, SubSIMD(xlattice100, xlattice000)));
fltx4 xl2d01 = AddSIMD(xlattice001, MulSIMD(xfrac, SubSIMD(xlattice101, xlattice001)));
fltx4 xl2d10 = AddSIMD(xlattice010, MulSIMD(xfrac, SubSIMD(xlattice110, xlattice010)));
fltx4 xl2d11 = AddSIMD(xlattice011, MulSIMD(xfrac, SubSIMD(xlattice111, xlattice011)));
// now, do y interpolation
fltx4 xl1d0 = AddSIMD(xl2d00, MulSIMD(yfrac, SubSIMD(xl2d10, xl2d00)));
fltx4 xl1d1 = AddSIMD(xl2d01, MulSIMD(yfrac, SubSIMD(xl2d11, xl2d01)));
// final z interpolation
FourVectors rslt;
rslt.x = AddSIMD(xl1d0, MulSIMD(zfrac, SubSIMD(xl1d1, xl1d0)));
fltx4 yl2d00 = AddSIMD(ylattice000, MulSIMD(xfrac, SubSIMD(ylattice100, ylattice000)));
fltx4 yl2d01 = AddSIMD(ylattice001, MulSIMD(xfrac, SubSIMD(ylattice101, ylattice001)));
fltx4 yl2d10 = AddSIMD(ylattice010, MulSIMD(xfrac, SubSIMD(ylattice110, ylattice010)));
fltx4 yl2d11 = AddSIMD(ylattice011, MulSIMD(xfrac, SubSIMD(ylattice111, ylattice011)));
// now, do y interpolation
fltx4 yl1d0 = AddSIMD(yl2d00, MulSIMD(yfrac, SubSIMD(yl2d10, yl2d00)));
fltx4 yl1d1 = AddSIMD(yl2d01, MulSIMD(yfrac, SubSIMD(yl2d11, yl2d01)));
// final z interpolation
rslt.y = AddSIMD(yl1d0, MulSIMD(zfrac, SubSIMD(yl1d1, yl1d0)));
fltx4 zl2d00 = AddSIMD(zlattice000, MulSIMD(xfrac, SubSIMD(zlattice100, zlattice000)));
fltx4 zl2d01 = AddSIMD(zlattice001, MulSIMD(xfrac, SubSIMD(zlattice101, zlattice001)));
fltx4 zl2d10 = AddSIMD(zlattice010, MulSIMD(xfrac, SubSIMD(zlattice110, zlattice010)));
fltx4 zl2d11 = AddSIMD(zlattice011, MulSIMD(xfrac, SubSIMD(zlattice111, zlattice011)));
// now, do y interpolation
fltx4 zl1d0 = AddSIMD(zl2d00, MulSIMD(yfrac, SubSIMD(zl2d10, zl2d00)));
fltx4 zl1d1 = AddSIMD(zl2d01, MulSIMD(yfrac, SubSIMD(zl2d11, zl2d01)));
// final z interpolation
rslt.z = AddSIMD(zl1d0, MulSIMD(zfrac, SubSIMD(zl1d1, zl1d0)));
return rslt;
}
fltx4 NoiseSIMD(FourVectors const& pos)
{
return NoiseSIMD(pos.x, pos.y, pos.z);
}
FourVectors DNoiseSIMD(FourVectors const& pos)
{
return DNoiseSIMD(pos.x, pos.y, pos.z);
}
FourVectors CurlNoiseSIMD(FourVectors const& pos)
{
FourVectors fl4Comp1 = DNoiseSIMD(pos);
FourVectors fl4Pos = pos;
fl4Pos.x = AddSIMD(fl4Pos.x, ReplicateX4(43.256f));
fl4Pos.y = AddSIMD(fl4Pos.y, ReplicateX4(-67.89f));
fl4Pos.z = AddSIMD(fl4Pos.z, ReplicateX4(1338.2f));
FourVectors fl4Comp2 = DNoiseSIMD(fl4Pos);
fl4Pos.x = AddSIMD(fl4Pos.x, ReplicateX4(-129.856f));
fl4Pos.y = AddSIMD(fl4Pos.y, ReplicateX4(-967.23f));
fl4Pos.z = AddSIMD(fl4Pos.z, ReplicateX4(2338.98f));
FourVectors fl4Comp3 = DNoiseSIMD(fl4Pos);
// now we have the 3 derivatives of a vector valued field. return the curl of the field.
FourVectors fl4Ret;
fl4Ret.x = SubSIMD(fl4Comp3.y, fl4Comp2.z);
fl4Ret.y = SubSIMD(fl4Comp1.z, fl4Comp3.x);
fl4Ret.z = SubSIMD(fl4Comp2.x, fl4Comp1.y);
return fl4Ret;
}