//========= Copyright � 1996-2006, Valve Corporation, All rights reserved. ============// // // Purpose: generates 4 random numbers in the range 0..1 quickly, using SIMD // //=====================================================================================// #include "core/stdafx.h" #include "tier0/dbg.h" #include "tier0/threadtools.h" #include "mathlib/mathlib.h" #include "mathlib/vector.h" #include "mathlib/ssemath.h" // memdbgon must be the last include file in a .cpp file!!! //#include "tier0/memdbgon.h" // see knuth volume 3 for insight. class SIMDRandStreamContext { fltx4 m_RandY[55]; fltx4* m_pRand_J, * m_pRand_K; public: void Seed(uint32 seed) { m_pRand_J = m_RandY + 23; m_pRand_K = m_RandY + 54; for (int i = 0; i < 55; i++) { for (int j = 0; j < 4; j++) { SubFloat(m_RandY[i], j) = (seed >> 16) / 65536.0f; seed = (seed + 1) * 3141592621u; } } } inline fltx4 RandSIMD(void) { // ret= rand[k]+rand[j] fltx4 retval = AddSIMD(*m_pRand_K, *m_pRand_J); // if ( ret>=1.0) ret-=1.0 bi32x4 overflow_mask = CmpGeSIMD(retval, Four_Ones); retval = SubSIMD(retval, AndSIMD(Four_Ones, overflow_mask)); *m_pRand_K = retval; // update pointers w/ wrap-around if (--m_pRand_J < m_RandY) m_pRand_J = m_RandY + 54; if (--m_pRand_K < m_RandY) m_pRand_K = m_RandY + 54; return retval; } }; #define MAX_SIMULTANEOUS_RANDOM_STREAMS 32 static SIMDRandStreamContext s_SIMDRandContexts[MAX_SIMULTANEOUS_RANDOM_STREAMS]; static volatile LONG s_nRandContextsInUse[MAX_SIMULTANEOUS_RANDOM_STREAMS]; void SeedRandSIMD(uint32 seed) { for (int i = 0; i < MAX_SIMULTANEOUS_RANDOM_STREAMS; i++) s_SIMDRandContexts[i].Seed(seed + i); } fltx4 RandSIMD(int nContextIndex) { return s_SIMDRandContexts[nContextIndex].RandSIMD(); } int GetSIMDRandContext(void) { for (;;) { for (int i = 0; i < NELEMS(s_SIMDRandContexts); i++) { if (!s_nRandContextsInUse[i]) // available? { // try to take it! if (ThreadInterlockedAssignIf(&(s_nRandContextsInUse[i]), 1, 0)) { ThreadMemoryBarrier(); return i; // done! } } } Assert(0); // why don't we have enough buffers? ThreadSleep(0); } } void ReleaseSIMDRandContext(int nContext) { ThreadMemoryBarrier(); s_nRandContextsInUse[nContext] = 0; } fltx4 RandSIMD(void) { return s_SIMDRandContexts[0].RandSIMD(); }